54 files changed, 10626 insertions, 418 deletions
diff --git a/usr/src/lib/libc/Makefile.targ b/usr/src/lib/libc/Makefile.targ
index e3bb69581a..26e8812bd7 100644
--- a/usr/src/lib/libc/Makefile.targ
+++ b/usr/src/lib/libc/Makefile.targ
@@ -2,9 +2,8 @@
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License").  You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 # ident	"%Z%%M%	%I%	%E% SMI"
@@ -200,6 +199,21 @@ pics/%_c89.o: $(LIBCBASE)/../port/stdio/%.c
 	$(COMPILE.c) -o $@ $<
 	$(POST_PROCESS_O)
 
+# aio rules
+pics/%.o: $(LIBCBASE)/../port/aio/%.c
+	$(COMPILE.c) -o $@ $<
+	$(POST_PROCESS_O)
+
+# rt rules
+pics/%.o: $(LIBCBASE)/../port/rt/%.c
+	$(COMPILE.c) -o $@ $<
+	$(POST_PROCESS_O)
+
+# tpool rules
+pics/%.o: $(LIBCBASE)/../port/tpool/%.c
+	$(COMPILE.c) -o $@ $<
+	$(POST_PROCESS_O)
+
 # threads rules
 pics/%.o: $(LIBCBASE)/../port/threads/%.c
 	$(COMPILE.c) -o $@ $<
diff --git a/usr/src/lib/libc/README b/usr/src/lib/libc/README
index f3c5ab146d..289f766aef 100644
--- a/usr/src/lib/libc/README
+++ b/usr/src/lib/libc/README
@@ -2,9 +2,8 @@
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License").  You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 # ident	"%Z%%M%	%I%	%E% SMI"
@@ -64,9 +63,12 @@ fork-safe) and in which the calling thread has all signals deferred
 
 However, certain rules apply to the code within these critical regions:
 
-	- The code must be of guaranteed short duration; no
-	  calls to interfaces that might block indefinitely are
-	  allowed.  This means no calls into stdio or syslog().
+	- The code must be of guaranteed short duration; no calls
+	  to interfaces that might block indefinitely are allowed.
+	  This means no calls into stdio or syslog() and no calls
+	  to cond_wait() unless there is a guarantee of an almost-
+	  immediate call to cond_signal() or cond_broadcast()
+	  from elsewhere.
 
 	- The code cannot call any non-l* synchronization
 	  primitives (mutex_lock(), _private_mutex_lock(),
@@ -197,3 +199,40 @@ conditions such as the setting of CFLAGS and CPPFLAGS for the libc_i18n
 stuff need to be compatible with the ones for the libc stuff.  Whenever
 changes that affect the compilation conditions of libc occur, the changes
 should be propagated to libc_i18n.
+
+-----
+
+The putback of the project:
+	6416832 libaio and librt can and should be folded into libc
+introduced several libc-private locking interfaces:
+	void	sig_mutex_lock(mutex_t *);
+	void	sig_mutex_unlock(mutex_t *);
+	int	sig_mutex_trylock(mutex_t *);
+	int	sig_cond_wait(cond_t *, mutex_t *);
+	int	sig_cond_reltimedwait(cond_t *, mutex_t *, const timespec_t *);
+which are declared in both "thr_uberdata.h" and "mtlib.h".
+
+They are used in specialized code in libc, like the asynchronous i/o code.
+Unlike the lmutex_lock() and lmutex_unlock() interfaces described above,
+these interfaces do not define critical regions, but signals are
+deferred while locks acquired by these functions are held, making
+their use be async-signal safe.  Calls to malloc(), calloc(), realloc(),
+and free() are permissible while holding such locks.
+
+These interfaces were brought over from code in the former libaio
+and librt and are necessary because, where they are used, the code
+must execute potentially long-term waits and must be cancelable.
+sig_cond_wait() and sig_cond_reltimedwait() are cancellation points.
+
+These interfaces are available for other uses inside libc, as
+the need arises.  (There is no need if the code does not perform
+long-term waits.)  Just follow a few rules to be self-consistent:
+ - Don't mix calls to mutex_[un]lock(), lmutex_[un]lock() and
+   sig_mutex_[un]lock() on the same mutex.
+ - Don't call cond_wait() with a mutex acquired by sig_mutex_lock();
+   call sig_cond_wait() or sig_cond_reltimedwait().
+ - Use pthread_cleanup_push() and pthread_cleanup_pop() to make
+   your code cancellation-safe.
+ - The sig_*() interfaces are not in themselves fork-safe.
+   You have to employ other logic to make your code fork-safe.
+   See the tail of postfork1_child() for examples.
diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile
index 0238a550ed..4db5f28fcb 100644
--- a/usr/src/lib/libc/amd64/Makefile
+++ b/usr/src/lib/libc/amd64/Makefile
@@ -711,6 +711,24 @@ PORTI18N_COND=			\
 	wcstol_longlong.o	\
 	wcstoul_longlong.o
 
+AIOOBJS=			\
+	aio.o			\
+	aio_alloc.o		\
+	posix_aio.o		\
+
+RTOBJS=				\
+	clock_timer.o		\
+	fallocate.o		\
+	mqueue.o		\
+	pos4obj.o		\
+	sched.o			\
+	sem.o			\
+	shm.o			\
+	sigev_thread.o
+
+TPOOLOBJS=			\
+	thread_pool.o
+
 THREADSOBJS=			\
 	alloc.o			\
 	assfail.o		\
@@ -836,6 +854,9 @@ MOSTOBJS=			\
 	$(PORTSTDIO_W)		\
 	$(PORTSYS)		\
 	$(PORTSYS64)		\
+	$(AIOOBJS)		\
+	$(RTOBJS)		\
+	$(TPOOLOBJS)		\
 	$(THREADSOBJS)		\
 	$(THREADSMACHOBJS)	\
 	$(THREADSASMOBJS)	\
@@ -939,6 +960,9 @@ SRCS=							\
 	$(PORTREGEX:%.o=../port/regex/%.c)		\
 	$(PORTSTDIO:%.o=../port/stdio/%.c)		\
 	$(PORTSYS:%.o=../port/sys/%.c)			\
+	$(AIOOBJS:%.o=../port/aio/%.c)			\
+	$(RTOBJS:%.o=../port/rt/%.c)			\
+	$(TPOOLOBJS:%.o=../port/tpool/%.c)		\
 	$(THREADSOBJS:%.o=../port/threads/%.c)		\
 	$(THREADSMACHOBJS:%.o=threads/%.c)		\
 	$(UNWINDMACHOBJS:%.o=unwind/%.c)		\
@@ -966,6 +990,7 @@ $(MAPFILE):
 
 # Files which need the threads .il inline template
 TIL=				\
+	aio.o			\
 	alloc.o			\
 	assfail.o		\
 	atexit.o		\
@@ -974,7 +999,9 @@ TIL=				\
 	door_calls.o		\
 	errno.o			\
 	lwp.o			\
+	ma.o			\
 	machdep.o		\
+	posix_aio.o		\
 	pthr_attr.o		\
 	pthr_barrier.o		\
 	pthr_cond.o		\
@@ -987,12 +1014,14 @@ TIL=				\
 	scalls.o		\
 	sema.o			\
 	sigaction.o		\
+	sigev_thread.o		\
 	spawn.o			\
 	stack.o			\
 	synch.o			\
 	tdb_agent.o		\
 	thr.o			\
 	thread_interface.o	\
+	thread_pool.o		\
 	thrp_unwind.o		\
 	tls.o			\
 	tsd.o
diff --git a/usr/src/lib/libc/amd64/gen/siglongjmp.c b/usr/src/lib/libc/amd64/gen/siglongjmp.c
index fd9860aad8..4bc4c579a4 100644
--- a/usr/src/lib/libc/amd64/gen/siglongjmp.c
+++ b/usr/src/lib/libc/amd64/gen/siglongjmp.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,13 +18,17 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
+#pragma weak siglongjmp = _siglongjmp
+
+#include "synonyms.h"
 #include <sys/types.h>
 #include <sys/ucontext.h>
 #include <setjmp.h>
@@ -33,8 +36,6 @@
 
 extern int _setcontext(const ucontext_t *);
 
-#pragma weak siglongjmp = _siglongjmp
-
 void
 _siglongjmp(sigjmp_buf env, int val)
 {
diff --git a/usr/src/lib/libc/common/sys/__clock_timer.s b/usr/src/lib/libc/common/sys/__clock_timer.s
index 4c4e917836..5188262570 100644
--- a/usr/src/lib/libc/common/sys/__clock_timer.s
+++ b/usr/src/lib/libc/common/sys/__clock_timer.s
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -110,11 +110,11 @@
 
 /*
  * int
- * ___nanosleep(const timespec_t *rqtp, timespec_t *rmtp)
+ * __nanosleep(const timespec_t *rqtp, timespec_t *rmtp)
  */
 
-	ENTRY(___nanosleep)
+	ENTRY(__nanosleep)
 	SYSTRAP_RVAL1(nanosleep)
 	SYSLWPERR
 	RET
-	SET_SIZE(___nanosleep)
+	SET_SIZE(__nanosleep)
diff --git a/usr/src/lib/libc/common/sys/__signotify.s b/usr/src/lib/libc/common/sys/__signotify.s
index f49d5eb297..057a00ad45 100644
--- a/usr/src/lib/libc/common/sys/__signotify.s
+++ b/usr/src/lib/libc/common/sys/__signotify.s
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -28,9 +28,8 @@
 
 	.file	"%M%"
 
-/* unpublished system call for librt -- __signotify		*/
-/* int _signotify (int cmd, siginfo_t *siginfo,		*/
-/*					signotify_id_t *sn_id);	*/
+/* unpublished system call for POSIX message queues -- __signotify */
+/* int __signotify (int cmd, siginfo_t *siginfo, signotify_id_t *sn_id); */
 
 #include "SYS.h"
 
diff --git a/usr/src/lib/libc/common/sys/__sigrt.s b/usr/src/lib/libc/common/sys/__sigrt.s
index df1154abd0..0ce63adb4e 100644
--- a/usr/src/lib/libc/common/sys/__sigrt.s
+++ b/usr/src/lib/libc/common/sys/__sigrt.s
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -32,7 +32,7 @@
 
 /*
  * int
- * __sigqueue(pid_t pid, int signo, void *value, int si_code)
+ * __sigqueue(pid_t pid, int signo, void *value, int si_code, int block)
  */
 	SYSCALL2_RVAL1(__sigqueue,sigqueue)
 	RETC
@@ -40,9 +40,9 @@
 
 /*
  * int
- * ___sigtimedwait(const sigset_t *set, siginfo_t *info,
+ * __sigtimedwait(const sigset_t *set, siginfo_t *info,
  *	const timespec_t *timeout)
  */
-	SYSCALL2_RVAL1(___sigtimedwait,sigtimedwait)
+	SYSCALL2_RVAL1(__sigtimedwait,sigtimedwait)
 	RET
-	SET_SIZE(___sigtimedwait)
+	SET_SIZE(__sigtimedwait)
diff --git a/usr/src/lib/libc/common/sys/kaio.s b/usr/src/lib/libc/common/sys/kaio.s
index cb75d3e2d5..1cd3810403 100644
--- a/usr/src/lib/libc/common/sys/kaio.s
+++ b/usr/src/lib/libc/common/sys/kaio.s
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,12 +18,12 @@
  *
  * CDDL HEADER END
  */
+
 /*	Copyright (c) 1988 AT&T	*/
 /*	  All Rights Reserved	*/
 
-
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -33,14 +32,12 @@
 	.file	"%M%"
 
 /* C library -- kaio						*/
-/* intptr_t kaio (); */
-
-#include <sys/asm_linkage.h>
-
-	ANSI_PRAGMA_WEAK(kaio,function)
+/* intptr_t _kaio (); */
 
 #include "SYS.h"
 
-	SYSCALL_RVAL1(kaio)
+	ENTRY(_kaio)
+	SYSTRAP_RVAL1(kaio)
+	SYSCERROR
 	RET
-	SET_SIZE(kaio)
+	SET_SIZE(_kaio)
diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com
index 4c40fc780e..8613ab3972 100644
--- a/usr/src/lib/libc/i386/Makefile.com
+++ b/usr/src/lib/libc/i386/Makefile.com
@@ -751,6 +751,24 @@ PORTI18N_COND=			\
 	wcstol_longlong.o	\
 	wcstoul_longlong.o
 
+AIOOBJS=			\
+	aio.o			\
+	aio_alloc.o		\
+	posix_aio.o		\
+
+RTOBJS=				\
+	clock_timer.o		\
+	fallocate.o		\
+	mqueue.o		\
+	pos4obj.o		\
+	sched.o			\
+	sem.o			\
+	shm.o			\
+	sigev_thread.o
+
+TPOOLOBJS=			\
+	thread_pool.o
+
 THREADSOBJS=			\
 	alloc.o			\
 	assfail.o		\
@@ -871,6 +889,9 @@ MOSTOBJS=			\
 	$(PORTSTDIO_W)		\
 	$(PORTSYS)		\
 	$(PORTSYS64)		\
+	$(AIOOBJS)		\
+	$(RTOBJS)		\
+	$(TPOOLOBJS)		\
 	$(THREADSOBJS)		\
 	$(THREADSMACHOBJS)	\
 	$(THREADSASMOBJS)	\
@@ -988,6 +1009,9 @@ SRCS=							\
 	$(PORTREGEX:%.o=../port/regex/%.c)		\
 	$(PORTSTDIO:%.o=../port/stdio/%.c)		\
 	$(PORTSYS:%.o=../port/sys/%.c)			\
+	$(AIOOBJS:%.o=../port/aio/%.c)			\
+	$(RTOBJS:%.o=../port/rt/%.c)			\
+	$(TPOOLOBJS:%.o=../port/tpool/%.c)		\
 	$(THREADSOBJS:%.o=../port/threads/%.c)		\
 	$(THREADSMACHOBJS:%.o=../$(MACH)/threads/%.c)	\
 	$(UNWINDMACHOBJS:%.o=../port/unwind/%.c)	\
@@ -1016,6 +1040,7 @@ $(MAPFILE):
 
 # Files which need the threads .il inline template
 TIL=				\
+	aio.o			\
 	alloc.o			\
 	assfail.o		\
 	atexit.o		\
@@ -1024,7 +1049,9 @@ TIL=				\
 	door_calls.o		\
 	errno.o			\
 	lwp.o			\
+	ma.o			\
 	machdep.o		\
+	posix_aio.o		\
 	pthr_attr.o		\
 	pthr_barrier.o		\
 	pthr_cond.o		\
@@ -1037,12 +1064,14 @@ TIL=				\
 	scalls.o		\
 	sema.o			\
 	sigaction.o		\
+	sigev_thread.o		\
 	spawn.o			\
 	stack.o			\
 	synch.o			\
 	tdb_agent.o		\
 	thr.o			\
 	thread_interface.o	\
+	thread_pool.o		\
 	tls.o			\
 	tsd.o			\
 	unwind.o
diff --git a/usr/src/lib/libc/i386/gen/siglongjmp.c b/usr/src/lib/libc/i386/gen/siglongjmp.c
index ff40ea8f98..1b3296d93d 100644
--- a/usr/src/lib/libc/i386/gen/siglongjmp.c
+++ b/usr/src/lib/libc/i386/gen/siglongjmp.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -30,6 +30,9 @@
 
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
+#pragma weak siglongjmp = _siglongjmp
+
+#include "synonyms.h"
 #include <sys/types.h>
 #include <sys/ucontext.h>
 #include <setjmp.h>
@@ -37,8 +40,6 @@
 
 extern int _setcontext(const ucontext_t *);
 
-#pragma weak siglongjmp = _siglongjmp
-
 void
 _siglongjmp(sigjmp_buf env, int val)
 {
diff --git a/usr/src/lib/libc/inc/asyncio.h b/usr/src/lib/libc/inc/asyncio.h
new file mode 100644
index 0000000000..02d33cd700
--- /dev/null
+++ b/usr/src/lib/libc/inc/asyncio.h
@@ -0,0 +1,346 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_ASYNCIO_H
+#define	_ASYNCIO_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <thread.h>
+#include <pthread.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <siginfo.h>
+#include <aio.h>
+#include <limits.h>
+#include <ucontext.h>
+#include <sys/asynch.h>
+#include <sys/mman.h>
+
+#if !defined(_LP64)
+#define	AIOSTKSIZE	(64 * 1024)
+#else
+#define	AIOSTKSIZE	(128 * 1024)
+#endif
+
+#define	SIGAIOCANCEL		SIGLWP	/* special aio cancelation signal */
+
+#define	AIO_WAITN_MAXIOCBS	32768	/* max. iocbs per system call */
+
+/*
+ * Declare structure types.  The structures themselves are defined below.
+ */
+typedef struct aio_args		aio_args_t;
+typedef struct aio_lio		aio_lio_t;
+typedef struct notif_param	notif_param_t;
+typedef struct aio_req		aio_req_t;
+typedef struct aio_worker	aio_worker_t;
+typedef struct aio_hash		aio_hash_t;
+
+struct aio_args {
+	int 		fd;
+	caddr_t		buf;
+	size_t		bufsz;
+	offset_t	offset;
+};
+
+/*
+ * list head for UFS list I/O
+ */
+struct aio_lio {
+	mutex_t		lio_mutex;	/* list mutex */
+	cond_t		lio_cond_cv;	/* list notification for I/O done */
+	aio_lio_t	*lio_next;	/* pointer to next on freelist */
+	char		lio_mode;	/* LIO_WAIT/LIO_NOWAIT */
+	char		lio_canned;	/* lio was canceled */
+	char		lio_largefile;	/* largefile operation */
+	char		lio_waiting;	/* waiting in __lio_listio() */
+	int		lio_nent;	/* Number of list I/O's */
+	int		lio_refcnt;	/* outstanding I/O's */
+	int		lio_event;	/* Event number for notification */
+	int		lio_port;	/* Port number for notification */
+	int		lio_signo;	/* Signal number for notification */
+	union sigval	lio_sigval;	/* Signal parameter */
+	uintptr_t	lio_object;	/* for SIGEV_THREAD or SIGEV_PORT */
+	struct sigevent	*lio_sigevent;	/* Notification function and attr. */
+};
+
+/*
+ * Notification parameters
+ */
+struct notif_param {
+	int		np_signo;	/* SIGEV_SIGNAL */
+	int		np_port;	/* SIGEV_THREAD or SIGEV_PORT */
+	void		*np_user;
+	int		np_event;
+	uintptr_t	np_object;
+	int		np_lio_signo;	/* listio: SIGEV_SIGNAL */
+	int		np_lio_port;	/* listio: SIGEV_THREAD or SIGEV_PORT */
+	void		*np_lio_user;
+	int		np_lio_event;
+	uintptr_t	np_lio_object;
+};
+
+struct aio_req {
+	/*
+	 * fields protected by _aio_mutex lock.
+	 */
+	aio_req_t *req_link;		/* hash/freelist chain link */
+	/*
+	 * when req is on the doneq, then req_next is protected by
+	 * the _aio_mutex lock. when the req is on a work q, then
+	 * req_next is protected by a worker's work_qlock1 lock.
+	 */
+	aio_req_t *req_next;		/* request/done queue link */
+	aio_req_t *req_prev;		/* double linked list */
+	/*
+	 * fields protected by a worker's work_qlock1 lock.
+	 */
+	char		req_state;	/* AIO_REQ_QUEUED, ... */
+	/*
+	 * fields require no locking.
+	 */
+	char		req_type;	/* AIO_POSIX_REQ or not */
+	char		req_largefile;	/* largefile operation */
+	char		req_op;		/* AIOREAD, etc. */
+	aio_worker_t	*req_worker;	/* associate request with worker */
+	aio_result_t	*req_resultp;	/* address of result buffer */
+	aio_args_t	req_args;	/* arglist */
+	aio_lio_t	*req_head;	/* list head for LIO */
+	struct sigevent	req_sigevent;
+	void		*req_aiocbp;	/* ptr to aiocb or aiocb64 */
+	notif_param_t	req_notify;	/* notification parameters */
+};
+
+/* special lio type that destroys itself when lio refcnt becomes zero */
+#define	LIO_FSYNC	LIO_WAIT+1
+#define	LIO_DESTROY	LIO_FSYNC+1
+
+/* lio flags */
+#define	LIO_FSYNC_CANCELED	0x1
+
+/* values for aio_state */
+
+#define	AIO_REQ_QUEUED		1
+#define	AIO_REQ_INPROGRESS	2
+#define	AIO_REQ_CANCELED	3
+#define	AIO_REQ_DONE 		4
+#define	AIO_REQ_FREE		5
+#define	AIO_REQ_DONEQ 		6
+
+/* use KAIO in _aio_rw() */
+#define	AIO_NO_KAIO		0x0
+#define	AIO_KAIO		0x1
+#define	AIO_NO_DUPS		0x2
+
+#define	AIO_POSIX_REQ		0x1
+
+#define	CHECK			1
+#define	NOCHECK			2
+#define	CHECKED			3
+#define	USERAIO			4
+#define	USERAIO_DONE		5
+
+/* values for _aio_flags */
+
+/* if set, _aiodone() notifies aio_waitn about done requests */
+#define	AIO_WAIT_INPROGRESS	0x1
+/* if set, _aiodone() wakes up functions waiting for completed I/Os */
+#define	AIO_IO_WAITING		0x2
+#define	AIO_LIB_WAITN		0x4	/* aio_waitn in progress */
+#define	AIO_LIB_WAITN_PENDING	0x8	/* aio_waitn requests pending */
+
+/*
+ * Before a kaio() system call, the fd will be checked
+ * to ensure that kernel async. I/O is supported for this file.
+ * The only way to find out is if a kaio() call returns ENOTSUP,
+ * so the default will always be to try the kaio() call. Only in
+ * the specific instance of a kaio() call returning ENOTSUP
+ * will we stop submitting kaio() calls for that fd.
+ * If the fd is outside the array bounds, we will allow the kaio()
+ * call.
+ *
+ * The only way that an fd entry can go from ENOTSUP to supported
+ * is if that fd is freed up by a close(), and close will clear
+ * the entry for that fd.
+ *
+ * Each fd gets a bit in the array _kaio_supported[].
+ *
+ * uint32_t	_kaio_supported[MAX_KAIO_FDARRAY_SIZE];
+ *
+ * Array is MAX_KAIO_ARRAY_SIZE of 32-bit elements, for 8kb.
+ * If more than (MAX_KAIO_FDARRAY_SIZE * KAIO_FDARRAY_ELEM_SIZE)
+ * files are open, this can be expanded.
+ */
+
+#define	MAX_KAIO_FDARRAY_SIZE		2048
+#define	KAIO_FDARRAY_ELEM_SIZE		WORD_BIT	/* uint32_t */
+
+#define	MAX_KAIO_FDS	(MAX_KAIO_FDARRAY_SIZE * KAIO_FDARRAY_ELEM_SIZE)
+
+#define	VALID_FD(fdes)		((fdes) >= 0 && (fdes) < MAX_KAIO_FDS)
+
+#define	KAIO_SUPPORTED(fdes)						\
+	(!VALID_FD(fdes) || 						\
+		((_kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] &	\
+		(uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE))) == 0))
+
+#define	SET_KAIO_NOT_SUPPORTED(fdes)					\
+	if (VALID_FD(fdes))						\
+		_kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] |=	\
+		(uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE))
+
+#define	CLEAR_KAIO_SUPPORTED(fdes)					\
+	if (VALID_FD(fdes))						\
+		_kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] &=	\
+		~(uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE))
+
+struct aio_worker {
+	aio_worker_t *work_forw;	/* forward link in list of workers */
+	aio_worker_t *work_backw;	/* backwards link in list of workers */
+	mutex_t work_qlock1;		/* lock for work queue 1 */
+	cond_t work_idle_cv;		/* place to sleep when idle */
+	aio_req_t *work_head1;		/* head of work request queue 1 */
+	aio_req_t *work_tail1;		/* tail of work request queue 1 */
+	aio_req_t *work_next1;		/* work queue one's next pointer */
+	aio_req_t *work_prev1;		/* last request done from queue 1 */
+	aio_req_t *work_req;		/* active work request */
+	thread_t work_tid;		/* worker's thread-id */
+	int work_count1;		/* length of work queue one */
+	int work_done1;			/* number of requests done */
+	int work_minload1;		/* min length of queue */
+	int work_idleflg;		/* when set, worker is idle */
+	sigjmp_buf work_jmp_buf;	/* cancellation point */
+};
+
+struct aio_hash {			/* resultp hash table */
+	mutex_t		hash_lock;
+	aio_req_t	*hash_ptr;
+#if !defined(_LP64)
+	void		*hash_pad;	/* ensure sizeof (aio_hash_t) == 32 */
+#endif
+};
+
+extern aio_hash_t *_aio_hash;
+
+#define	HASHSZ			2048	/* power of 2 */
+#define	AIOHASH(resultp)	((((uintptr_t)(resultp) >> 17) ^ \
+				((uintptr_t)(resultp) >> 2)) & (HASHSZ - 1))
+#define	POSIX_AIO(x)		((x)->req_type == AIO_POSIX_REQ)
+
+extern int __uaio_init(void);
+extern void _kaio_init(void);
+extern intptr_t _kaio(int, ...);
+extern int _aiorw(int, caddr_t, int, offset_t, int, aio_result_t *, int);
+extern int _aio_rw(aiocb_t *, aio_lio_t *, aio_worker_t **, int, int);
+#if !defined(_LP64)
+extern int _aio_rw64(aiocb64_t *, aio_lio_t *, aio_worker_t **, int, int);
+#endif
+extern int _aio_create_worker(aio_req_t *, int);
+extern int _aio_cancel_req(aio_worker_t *, aio_req_t *, int *, int *);
+extern int aiocancel_all(int);
+extern void aio_panic(const char *);
+extern aio_req_t *_aio_hash_find(aio_result_t *);
+extern aio_req_t *_aio_hash_del(aio_result_t *);
+extern void _aio_req_mark_done(aio_req_t *);
+extern void _aio_waitn_wakeup(void);
+extern aio_worker_t *_aio_worker_alloc(void);
+extern void _aio_worker_free(void *);
+extern aio_req_t *_aio_req_alloc(void);
+extern void _aio_req_free(aio_req_t *);
+extern aio_lio_t *_aio_lio_alloc(void);
+extern void _aio_lio_free(aio_lio_t *);
+extern int _aio_idle(aio_worker_t *);
+extern void *_aio_do_request(void *);
+extern void *_aio_do_notify(void *);
+extern void _lio_remove(aio_req_t *);
+extern aio_req_t *_aio_req_remove(aio_req_t *);
+extern int _aio_get_timedelta(timespec_t *, timespec_t *);
+extern aio_result_t *_aio_req_done(void);
+extern void _aio_set_result(aio_req_t *, ssize_t, int);
+extern int _aio_sigev_thread_init(struct sigevent *);
+extern int _aio_sigev_thread(aiocb_t *);
+#if !defined(_LP64)
+extern int _aio_sigev_thread64(aiocb64_t *);
+#endif
+
+extern aio_worker_t *_kaiowp;		/* points to kaio cleanup thread */
+extern aio_worker_t *__workers_rw;	/* list of all rw workers */
+extern aio_worker_t *__nextworker_rw;	/* worker chosen for next rw request */
+extern int __rw_workerscnt;		/* number of rw workers */
+extern aio_worker_t *__workers_no;	/* list of all notification workers */
+extern aio_worker_t *__nextworker_no;	/* worker chosen, next notification */
+extern int __no_workerscnt;		/* number of notification workers */
+extern mutex_t __aio_initlock;		/* makes aio initialization atomic */
+extern cond_t __aio_initcv;
+extern int __aio_initbusy;
+extern mutex_t __aio_mutex;		/* global aio lock */
+extern cond_t _aio_iowait_cv;		/* wait for userland I/Os */
+extern cond_t _aio_waitn_cv;		/* wait for end of aio_waitn */
+extern int _max_workers;		/* max number of workers permitted */
+extern int _min_workers;		/* min number of workers */
+extern sigset_t _worker_set;		/* worker's signal mask */
+extern int _aio_worker_cnt;		/* number of AIO workers */
+extern int _sigio_enabled;		/* when set, send SIGIO signal */
+extern pid_t __pid;			/* process's PID */
+extern int __uaio_ok;			/* indicates if aio is initialized */
+extern int _kaio_ok;			/* indicates if kaio is initialized */
+extern pthread_key_t _aio_key;		/* for thread-specific data */
+extern aio_req_t *_aio_done_tail;	/* list of done requests */
+extern aio_req_t *_aio_done_head;
+extern aio_req_t *_aio_doneq;
+extern int _aio_freelist_cnt;
+extern int _aio_allocated_cnt;
+extern int _aio_donecnt;
+extern int _aio_doneq_cnt;
+extern int _aio_waitncnt;		/* # of requests for aio_waitn */
+extern int _aio_outstand_cnt;		/* # of outstanding requests */
+extern int _kaio_outstand_cnt;		/* # of outstanding kaio requests */
+extern int _aio_req_done_cnt;		/* req. done but not in "done queue" */
+extern int _aio_kernel_suspend;		/* active kernel kaio calls */
+extern int _aio_suscv_cnt;		/* aio_suspend calls waiting on cv's */
+extern int _aiowait_flag;		/* when set, aiowait() is inprogress */
+extern int _aio_flags;			/* see defines, above */
+extern uint32_t *_kaio_supported;
+
+extern const sigset_t maskset;		/* all maskable signals */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _ASYNCIO_H */
diff --git a/usr/src/lib/libc/inc/mtlib.h b/usr/src/lib/libc/inc/mtlib.h
index 89c2376949..d864e8e75a 100644
--- a/usr/src/lib/libc/inc/mtlib.h
+++ b/usr/src/lib/libc/inc/mtlib.h
@@ -57,6 +57,15 @@ extern	int	__rw_unlock(rwlock_t *);
 extern	void	lrw_rdlock(rwlock_t *);
 extern	void	lrw_wrlock(rwlock_t *);
 extern	void	lrw_unlock(rwlock_t *);
+extern	void	sig_mutex_lock(mutex_t *);
+extern	void	sig_mutex_unlock(mutex_t *);
+extern	int	sig_mutex_trylock(mutex_t *);
+extern	int	sig_cond_wait(cond_t *, mutex_t *);
+extern	int	sig_cond_reltimedwait(cond_t *, mutex_t *, const timespec_t *);
+
+/* the private libc thread-safe allocator */
+extern	void	*lmalloc(size_t);
+extern	void	lfree(void *, size_t);
 
 /* the rest are public functions */
 extern	int	_mutex_init(mutex_t *, int, void *);
@@ -91,6 +100,8 @@ extern	thread_t _thr_self(void);
 extern	void	_thr_exit(void *);
 extern	size_t	_thr_min_stack(void);
 extern	int	_thr_kill(thread_t, int);
+extern	int	_thr_create(void *, size_t, void *(*)(void *), void *, long,
+			thread_t *);
 extern	int	_thr_keycreate(thread_key_t *, void (*)(void *));
 extern	int	_thr_setspecific(thread_key_t, void *);
 extern	int	_thr_getspecific(thread_key_t, void **);
diff --git a/usr/src/lib/libc/inc/rtsched.h b/usr/src/lib/libc/inc/rtsched.h
new file mode 100644
index 0000000000..90ae11c3b2
--- /dev/null
+++ b/usr/src/lib/libc/inc/rtsched.h
@@ -0,0 +1,44 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _RTSCHED_H
+#define	_RTSCHED_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/priocntl.h>
+
+/*
+ * This definition is private to libc but is used in more than one subsystem.
+ */
+struct pcclass {
+	short		pcc_state;
+	pri_t		pcc_primin;
+	pri_t		pcc_primax;
+	pcinfo_t	pcc_info;
+};
+
+#endif	/* _RTSCHED_H */
diff --git a/usr/src/lib/libc/inc/synonyms.h b/usr/src/lib/libc/inc/synonyms.h
index 179f25f627..4de926dc9f 100644
--- a/usr/src/lib/libc/inc/synonyms.h
+++ b/usr/src/lib/libc/inc/synonyms.h
@@ -223,6 +223,10 @@ extern "C" {
 #define	chown			_chown
 #define	chroot			_chroot
 #define	_cladm			__cladm
+#define	clock_getres		_clock_getres
+#define	clock_gettime		_clock_gettime
+#define	clock_nanosleep		_clock_nanosleep
+#define	clock_settime		_clock_settime
 #define	close			_close
 #define	closedir		_closedir
 #define	closefrom		_closefrom
@@ -264,8 +268,8 @@ extern "C" {
 #define	decimal_to_single	_decimal_to_single
 #define	dgettext		_dgettext
 #define	dirname			_dirname
-#define	dladdr			_dladdr
 #define	dladdr1			_dladdr1
+#define	dladdr			_dladdr
 #define	dlamd64getunwind	_dlamd64getunwind
 #define	dlclose			_dlclose
 #define	dldump			_dldump
@@ -495,7 +499,6 @@ extern "C" {
 #define	iswupper		_iswupper
 #define	iswxdigit		_iswxdigit
 #define	jrand48			_jrand48
-#define	kaio			_kaio
 #define	kill			_kill
 #define	l64a			_l64a
 #define	ladd			_ladd
@@ -590,12 +593,19 @@ extern "C" {
 #define	munlockall		_munlockall
 #define	munlock			_munlock
 #define	munmap			_munmap
-#define	mutex_destroy		_mutex_destroy
-#define	mutex_held		_mutex_held
-#define	mutex_init		_mutex_init
-#define	mutex_lock		_mutex_lock
-#define	mutex_trylock		_mutex_trylock
-#define	mutex_unlock		_mutex_unlock
+#define	_mutex_destroy		__mutex_destroy
+#define	mutex_destroy		__mutex_destroy
+#define	_mutex_held		__mutex_held
+#define	mutex_held		__mutex_held
+#define	_mutex_init		__mutex_init
+#define	mutex_init		__mutex_init
+#define	_mutex_lock		__mutex_lock
+#define	mutex_lock		__mutex_lock
+#define	_mutex_trylock		__mutex_trylock
+#define	mutex_trylock		__mutex_trylock
+#define	_mutex_unlock		__mutex_unlock
+#define	mutex_unlock		__mutex_unlock
+#define	nanosleep		_nanosleep
 #define	nfs_getfh		_nfs_getfh
 #define	nfssvc			_nfssvc
 #define	nftw64			_nftw64
@@ -627,7 +637,6 @@ extern "C" {
 #define	port_alert		_port_alert
 #define	port_associate		_port_associate
 #define	port_create		_port_create
-#define	port_dispatch		_port_dispatch
 #define	port_dissociate		_port_dissociate
 #define	port_getn		_port_getn
 #define	port_get		_port_get
@@ -865,12 +874,23 @@ extern "C" {
 #define	sema_timedwait		_sema_timedwait
 #define	sema_trywait		_sema_trywait
 #define	sema_wait		_sema_wait
+#define	sem_close		_sem_close
 #define	semctl64		_semctl64
 #define	semctl			_semctl
+#define	sem_destroy		_sem_destroy
 #define	semget			_semget
+#define	sem_getvalue		_sem_getvalue
 #define	semids			_semids
+#define	sem_init		_sem_init
+#define	sem_open		_sem_open
 #define	semop			_semop
+#define	sem_post		_sem_post
+#define	sem_reltimedwait_np	_sem_reltimedwait_np
 #define	semtimedop		_semtimedop
+#define	sem_timedwait		_sem_timedwait
+#define	sem_trywait		_sem_trywait
+#define	sem_unlink		_sem_unlink
+#define	sem_wait		_sem_wait
 #define	setcontext		_setcontext
 #define	setegid			_setegid
 #define	setenv			_setenv
@@ -927,12 +947,16 @@ extern "C" {
 #define	sigpause		_sigpause
 #define	sigpending		_sigpending
 #define	sigprocmask		_sigprocmask
+#define	sigqueue		_sigqueue
 #define	sigrelse		_sigrelse
 #define	sigsendset		_sigsendset
 #define	sigsend			_sigsend
 #define	sigsetjmp		_sigsetjmp
 #define	sigset			_sigset
+#define	sigstack		_sigstack
 #define	sigsuspend		_sigsuspend
+#define	sigtimedwait		_sigtimedwait
+#define	sigwaitinfo		_sigwaitinfo
 #define	sigwait			_sigwait
 #define	single_to_decimal	_single_to_decimal
 #define	s_ioctl			_s_ioctl
@@ -1018,6 +1042,11 @@ extern "C" {
 #define	thr_suspend		_thr_suspend
 #define	thr_wait_mutator	_thr_wait_mutator
 #define	thr_yield		_thr_yield
+#define	timer_create		_timer_create
+#define	timer_delete		_timer_delete
+#define	timer_getoverrun	_timer_getoverrun
+#define	timer_gettime		_timer_gettime
+#define	timer_settime		_timer_settime
 #define	times			_times
 #define	time			_time
 #define	tmpnam_r		_tmpnam_r
diff --git a/usr/src/lib/libc/inc/thr_debug.h b/usr/src/lib/libc/inc/thr_debug.h
new file mode 100644
index 0000000000..5e8de4ef0a
--- /dev/null
+++ b/usr/src/lib/libc/inc/thr_debug.h
@@ -0,0 +1,44 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _THR_DEBUG_H
+#define	_THR_DEBUG_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#if defined(THREAD_DEBUG)
+
+extern void __assfail(const char *, const char *, int);
+#pragma rarely_called(__assfail)
+#define	ASSERT(EX)	(void)((EX) || (__assfail(#EX, __FILE__, __LINE__), 0))
+
+#else
+
+#define	ASSERT(EX)	((void)0)
+
+#endif
+
+#endif	/* _THR_DEBUG_H */
diff --git a/usr/src/lib/libc/inc/thr_uberdata.h b/usr/src/lib/libc/inc/thr_uberdata.h
index c7b6001926..2671ac0a69 100644
--- a/usr/src/lib/libc/inc/thr_uberdata.h
+++ b/usr/src/lib/libc/inc/thr_uberdata.h
@@ -53,12 +53,10 @@
 #include <schedctl.h>
 #include <sys/priocntl.h>
 #include <thread_db.h>
+#include <setjmp.h>
 #include "libc_int.h"
 #include "tdb_agent.h"
-
-/* belongs in <pthread.h> */
-#define	PTHREAD_CREATE_DAEMON_NP	0x100	/* = THR_DAEMON */
-#define	PTHREAD_CREATE_NONDAEMON_NP	0
+#include "thr_debug.h"
 
 /*
  * This is an implementation-specific include file for threading support.
@@ -208,14 +206,6 @@ typedef union {
 #define	PRIO_INHERIT	2
 #define	PRIO_DISINHERIT	3
 
-struct pcclass {
-	short		pcc_state;
-	pri_t		pcc_primin;
-	pri_t		pcc_primax;
-	pcinfo_t	pcc_info;
-};
-extern struct pcclass ts_class, rt_class;
-
 #define	MUTEX_TRY	0
 #define	MUTEX_LOCK	1
 
@@ -608,7 +598,7 @@ typedef struct ulwp {
 #define	MASKSET0	(FILLSET0 & ~CANTMASK0)
 #define	MASKSET1	(FILLSET1 & ~CANTMASK1)
 
-extern	const sigset_t maskset;	/* set of all maskable signals */
+extern	const sigset_t maskset;		/* set of all maskable signals */
 
 extern	int	thread_adaptive_spin;
 extern	uint_t	thread_max_spinners;
@@ -1048,7 +1038,7 @@ extern	greg_t		stkptr(void);
 /*
  * Implementation functions.  Not visible outside of the library itself.
  */
-extern	int	___nanosleep(const timespec_t *, timespec_t *);
+extern	int	__nanosleep(const timespec_t *, timespec_t *);
 extern	void	getgregs(ulwp_t *, gregset_t);
 extern	void	setgregs(ulwp_t *, gregset_t);
 extern	void	thr_panic(const char *);
@@ -1092,18 +1082,6 @@ extern	void	_flush_windows(void);
 #endif
 extern	void	set_curthread(void *);
 
-#if defined(THREAD_DEBUG)
-
-extern	void	__assfail(const char *, const char *, int);
-#pragma rarely_called(__assfail)
-#define	ASSERT(EX)	(void)((EX) || (__assfail(#EX, __FILE__, __LINE__), 0))
-
-#else	/* THREAD_DEBUG */
-
-#define	ASSERT(EX)	((void)0)
-
-#endif	/* THREAD_DEBUG */
-
 /* enter a critical section */
 #define	enter_critical(self)	(self->ul_critical++)
 
@@ -1174,21 +1152,35 @@ extern	void	*_thr_setup(ulwp_t *);
 extern	void	_fpinherit(ulwp_t *);
 extern	void	_lwp_start(void);
 extern	void	_lwp_terminate(void);
-extern	void	lmutex_unlock(mutex_t *);
 extern	void	lmutex_lock(mutex_t *);
+extern	void	lmutex_unlock(mutex_t *);
+extern	void	sig_mutex_lock(mutex_t *);
+extern	void	sig_mutex_unlock(mutex_t *);
+extern	int	sig_mutex_trylock(mutex_t *);
+extern	int	sig_cond_wait(cond_t *, mutex_t *);
+extern	int	sig_cond_reltimedwait(cond_t *, mutex_t *, const timespec_t *);
 extern	void	_prefork_handler(void);
 extern	void	_postfork_parent_handler(void);
 extern	void	_postfork_child_handler(void);
-extern	void	_postfork1_child(void);
+extern	void	postfork1_child(void);
+extern	void	postfork1_child_aio(void);
+extern	void	postfork1_child_sigev_aio(void);
+extern	void	postfork1_child_sigev_mq(void);
+extern	void	postfork1_child_sigev_timer(void);
+extern	void	postfork1_child_tpool(void);
 extern	int	fork_lock_enter(const char *);
 extern	void	fork_lock_exit(void);
 extern	void	suspend_fork(void);
 extern	void	continue_fork(int);
 extern	void	do_sigcancel(void);
-extern	void	init_sigcancel(void);
+extern	void	setup_cancelsig(int);
+extern	void	init_sigev_thread(void);
+extern	void	init_aio(void);
 extern	void	_cancelon(void);
 extern	void	_canceloff(void);
 extern	void	_canceloff_nocancel(void);
+extern	void	_cancel_prologue(void);
+extern	void	_cancel_epilogue(void);
 extern	void	no_preempt(ulwp_t *);
 extern	void	preempt(ulwp_t *);
 extern	void	_thrp_unwind(void *);
@@ -1249,8 +1241,18 @@ extern	int	__lwp_sigmask(int, const sigset_t *, sigset_t *);
 extern	void	__sighndlr(int, siginfo_t *, ucontext_t *, void (*)());
 extern	caddr_t	__sighndlrend;
 #pragma unknown_control_flow(__sighndlr)
+extern	void	_siglongjmp(sigjmp_buf, int);
 
+extern	int	_pthread_setspecific(pthread_key_t, const void *);
+extern	void	*_pthread_getspecific(pthread_key_t);
 extern	void	_pthread_exit(void *);
+extern	void	_private_testcancel(void);
+
+/* belongs in <pthread.h> */
+#define	PTHREAD_CREATE_DAEMON_NP	0x100	/* = THR_DAEMON */
+#define	PTHREAD_CREATE_NONDAEMON_NP	0
+extern	int	_pthread_attr_setdaemonstate_np(pthread_attr_t *, int);
+extern	int	_pthread_attr_getdaemonstate_np(const pthread_attr_t *, int *);
 
 /* these are private to the library */
 extern	int	_private_mutex_init(mutex_t *, int, void *);
@@ -1293,8 +1295,10 @@ extern	int	rw_read_is_held(rwlock_t *);
 extern	int	rw_write_is_held(rwlock_t *);
 
 extern	int	_thr_continue(thread_t);
-extern	int	_thrp_create(void *, size_t, void *(*func)(void *), void *,
-			long, thread_t *, pri_t, int, size_t);
+extern	int	_thr_create(void *, size_t, void *(*)(void *), void *, long,
+			thread_t *);
+extern	int	_thrp_create(void *, size_t, void *(*)(void *), void *, long,
+			thread_t *, pri_t, int, size_t);
 extern	int	_thr_getprio(thread_t, int *);
 extern	int	_thr_getspecific(thread_key_t, void **);
 extern	int	_thr_join(thread_t, thread_t *, void **);
@@ -1320,7 +1324,8 @@ extern	int	_thread_setschedparam_main(pthread_t, int,
 			const struct sched_param *, int);
 extern	int	_validate_rt_prio(int, int);
 extern	int	_thrp_setlwpprio(lwpid_t, int, int);
-extern	pri_t	_map_rtpri_to_gp(pri_t);
+extern	pri_t	map_rtpri_to_gp(pri_t);
+extern	int	get_info_by_policy(int);
 
 /*
  * System call wrappers (direct interfaces to the kernel)
diff --git a/usr/src/lib/libc/inc/thread_pool.h b/usr/src/lib/libc/inc/thread_pool.h
new file mode 100644
index 0000000000..200323703c
--- /dev/null
+++ b/usr/src/lib/libc/inc/thread_pool.h
@@ -0,0 +1,74 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_THREAD_POOL_H_
+#define	_THREAD_POOL_H_
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/types.h>
+#include <thread.h>
+#include <pthread.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+typedef	struct tpool tpool_t;	/* opaque thread pool descriptor */
+
+#if defined(__STDC__)
+
+extern	tpool_t	*tpool_create(uint_t min_threads, uint_t max_threads,
+			uint_t linger, pthread_attr_t *attr);
+extern	int	tpool_dispatch(tpool_t *tpool,
+			void (*func)(void *), void *arg);
+extern	void	tpool_destroy(tpool_t *tpool);
+extern	void	tpool_abandon(tpool_t *tpool);
+extern	void	tpool_wait(tpool_t *tpool);
+extern	void	tpool_suspend(tpool_t *tpool);
+extern	int	tpool_suspended(tpool_t *tpool);
+extern	void	tpool_resume(tpool_t *tpool);
+extern	int	tpool_member(tpool_t *tpool);
+
+#else	/* Non ANSI */
+
+extern	tpool_t	*tpool_create();
+extern	int	tpool_dispatch();
+extern	void	tpool_destroy();
+extern	void	tpool_abandon();
+extern	void	tpool_wait();
+extern	void	tpool_suspend();
+extern	int	tpool_suspended();
+extern	void	tpool_resume();
+extern	int	tpool_member();
+
+#endif	/* __STDC__ */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _THREAD_POOL_H_ */
diff --git a/usr/src/lib/libc/port/aio/aio.c b/usr/src/lib/libc/port/aio/aio.c
new file mode 100644
index 0000000000..28d425d702
--- /dev/null
+++ b/usr/src/lib/libc/port/aio/aio.c
@@ -0,0 +1,2202 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "synonyms.h"
+#include "thr_uberdata.h"
+#include "asyncio.h"
+#include <atomic.h>
+#include <sys/param.h>
+#include <sys/file.h>
+#include <sys/port.h>
+
+static int _aio_hash_insert(aio_result_t *, aio_req_t *);
+static aio_req_t *_aio_req_get(aio_worker_t *);
+static void _aio_req_add(aio_req_t *, aio_worker_t **, int);
+static void _aio_req_del(aio_worker_t *, aio_req_t *, int);
+static void _aio_work_done(aio_worker_t *);
+static void _aio_enq_doneq(aio_req_t *);
+
+extern void _aio_lio_free(aio_lio_t *);
+
+extern int __fdsync(int, int);
+extern int _port_dispatch(int, int, int, int, uintptr_t, void *);
+
+static int _aio_fsync_del(aio_worker_t *, aio_req_t *);
+static void _aiodone(aio_req_t *, ssize_t, int);
+static void _aio_cancel_work(aio_worker_t *, int, int *, int *);
+static void _aio_finish_request(aio_worker_t *, ssize_t, int);
+
+/*
+ * switch for kernel async I/O
+ */
+int _kaio_ok = 0;		/* 0 = disabled, 1 = on, -1 = error */
+
+/*
+ * Key for thread-specific data
+ */
+pthread_key_t _aio_key;
+
+/*
+ * Array for determining whether or not a file supports kaio.
+ * Initialized in _kaio_init().
+ */
+uint32_t *_kaio_supported = NULL;
+
+/*
+ *  workers for read/write requests
+ * (__aio_mutex lock protects circular linked list of workers)
+ */
+aio_worker_t *__workers_rw;	/* circular list of AIO workers */
+aio_worker_t *__nextworker_rw;	/* next worker in list of workers */
+int __rw_workerscnt;		/* number of read/write workers */
+
+/*
+ * worker for notification requests.
+ */
+aio_worker_t *__workers_no;	/* circular list of AIO workers */
+aio_worker_t *__nextworker_no;	/* next worker in list of workers */
+int __no_workerscnt;		/* number of write workers */
+
+aio_req_t *_aio_done_tail;		/* list of done requests */
+aio_req_t *_aio_done_head;
+
+mutex_t __aio_initlock = DEFAULTMUTEX;	/* makes aio initialization atomic */
+cond_t __aio_initcv = DEFAULTCV;
+int __aio_initbusy = 0;
+
+mutex_t __aio_mutex = DEFAULTMUTEX;	/* protects counts, and linked lists */
+cond_t _aio_iowait_cv = DEFAULTCV;	/* wait for userland I/Os */
+
+pid_t __pid = (pid_t)-1;		/* initialize as invalid pid */
+int _sigio_enabled = 0;			/* when set, send SIGIO signal */
+
+aio_hash_t *_aio_hash;
+
+aio_req_t *_aio_doneq;			/* double linked done queue list */
+
+int _aio_donecnt = 0;
+int _aio_waitncnt = 0;			/* # of requests for aio_waitn */
+int _aio_doneq_cnt = 0;
+int _aio_outstand_cnt = 0;		/* # of outstanding requests */
+int _kaio_outstand_cnt = 0;		/* # of outstanding kaio requests */
+int _aio_req_done_cnt = 0;		/* req. done but not in "done queue" */
+int _aio_kernel_suspend = 0;		/* active kernel kaio calls */
+int _aio_suscv_cnt = 0;			/* aio_suspend calls waiting on cv's */
+
+int _max_workers = 256;			/* max number of workers permitted */
+int _min_workers = 4;			/* min number of workers */
+int _minworkload = 2;			/* min number of request in q */
+int _aio_worker_cnt = 0;		/* number of workers to do requests */
+int __uaio_ok = 0;			/* AIO has been enabled */
+sigset_t _worker_set;			/* worker's signal mask */
+
+int _aiowait_flag = 0;			/* when set, aiowait() is inprogress */
+int _aio_flags = 0;			/* see asyncio.h defines for */
+
+aio_worker_t *_kaiowp = NULL;		/* points to kaio cleanup thread */
+
+int hz;					/* clock ticks per second */
+
+static int
+_kaio_supported_init(void)
+{
+	void *ptr;
+	size_t size;
+
+	if (_kaio_supported != NULL)	/* already initialized */
+		return (0);
+
+	size = MAX_KAIO_FDARRAY_SIZE * sizeof (uint32_t);
+	ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+	    MAP_PRIVATE | MAP_ANON, -1, (off_t)0);
+	if (ptr == MAP_FAILED)
+		return (-1);
+	_kaio_supported = ptr;
+	return (0);
+}
+
+/*
+ * The aio subsystem is initialized when an AIO request is made.
+ * Constants are initialized like the max number of workers that
+ * the subsystem can create, and the minimum number of workers
+ * permitted before imposing some restrictions.  Also, some
+ * workers are created.
+ */
+int
+__uaio_init(void)
+{
+	int ret = -1;
+	int i;
+
+	lmutex_lock(&__aio_initlock);
+	while (__aio_initbusy)
+		(void) _cond_wait(&__aio_initcv, &__aio_initlock);
+	if (__uaio_ok) {	/* already initialized */
+		lmutex_unlock(&__aio_initlock);
+		return (0);
+	}
+	__aio_initbusy = 1;
+	lmutex_unlock(&__aio_initlock);
+
+	hz = (int)sysconf(_SC_CLK_TCK);
+	__pid = getpid();
+
+	setup_cancelsig(SIGAIOCANCEL);
+
+	if (_kaio_supported_init() != 0)
+		goto out;
+
+	/*
+	 * Allocate and initialize the hash table.
+	 */
+	/* LINTED pointer cast */
+	_aio_hash = (aio_hash_t *)mmap(NULL,
+	    HASHSZ * sizeof (aio_hash_t), PROT_READ | PROT_WRITE,
+	    MAP_PRIVATE | MAP_ANON, -1, (off_t)0);
+	if ((void *)_aio_hash == MAP_FAILED) {
+		_aio_hash = NULL;
+		goto out;
+	}
+	for (i = 0; i < HASHSZ; i++)
+		(void) mutex_init(&_aio_hash[i].hash_lock, USYNC_THREAD, NULL);
+
+	/*
+	 * Initialize worker's signal mask to only catch SIGAIOCANCEL.
+	 */
+	(void) sigfillset(&_worker_set);
+	(void) sigdelset(&_worker_set, SIGAIOCANCEL);
+
+	/*
+	 * Create the minimum number of read/write workers.
+	 */
+	for (i = 0; i < _min_workers; i++)
+		(void) _aio_create_worker(NULL, AIOREAD);
+
+	/*
+	 * Create one worker to send asynchronous notifications.
+	 */
+	(void) _aio_create_worker(NULL, AIONOTIFY);
+
+	ret = 0;
+out:
+	lmutex_lock(&__aio_initlock);
+	if (ret == 0)
+		__uaio_ok = 1;
+	__aio_initbusy = 0;
+	(void) cond_broadcast(&__aio_initcv);
+	lmutex_unlock(&__aio_initlock);
+	return (ret);
+}
+
+/*
+ * Called from close() before actually performing the real _close().
+ */
+void
+_aio_close(int fd)
+{
+	if (fd < 0)	/* avoid cancelling everything */
+		return;
+	/*
+	 * Cancel all outstanding aio requests for this file descriptor.
+	 */
+	if (__uaio_ok)
+		(void) aiocancel_all(fd);
+	/*
+	 * If we have allocated the bit array, clear the bit for this file.
+	 * The next open may re-use this file descriptor and the new file
+	 * may have different kaio() behaviour.
+	 */
+	if (_kaio_supported != NULL)
+		CLEAR_KAIO_SUPPORTED(fd);
+}
+
+/*
+ * special kaio cleanup thread sits in a loop in the
+ * kernel waiting for pending kaio requests to complete.
+ */
+void *
+_kaio_cleanup_thread(void *arg)
+{
+	if (pthread_setspecific(_aio_key, arg) != 0)
+		aio_panic("_kaio_cleanup_thread, pthread_setspecific()");
+	(void) _kaio(AIOSTART);
+	return (arg);
+}
+
+/*
+ * initialize kaio.
+ */
+void
+_kaio_init()
+{
+	int error;
+	sigset_t oset;
+
+	lmutex_lock(&__aio_initlock);
+	while (__aio_initbusy)
+		(void) _cond_wait(&__aio_initcv, &__aio_initlock);
+	if (_kaio_ok) {		/* already initialized */
+		lmutex_unlock(&__aio_initlock);
+		return;
+	}
+	__aio_initbusy = 1;
+	lmutex_unlock(&__aio_initlock);
+
+	if (_kaio_supported_init() != 0)
+		error = ENOMEM;
+	else if ((_kaiowp = _aio_worker_alloc()) == NULL)
+		error = ENOMEM;
+	else if ((error = (int)_kaio(AIOINIT)) == 0) {
+		(void) pthread_sigmask(SIG_SETMASK, &maskset, &oset);
+		error = thr_create(NULL, AIOSTKSIZE, _kaio_cleanup_thread,
+		    _kaiowp, THR_DAEMON, &_kaiowp->work_tid);
+		(void) pthread_sigmask(SIG_SETMASK, &oset, NULL);
+	}
+	if (error && _kaiowp != NULL) {
+		_aio_worker_free(_kaiowp);
+		_kaiowp = NULL;
+	}
+
+	lmutex_lock(&__aio_initlock);
+	if (error)
+		_kaio_ok = -1;
+	else
+		_kaio_ok = 1;
+	__aio_initbusy = 0;
+	(void) cond_broadcast(&__aio_initcv);
+	lmutex_unlock(&__aio_initlock);
+}
+
+int
+aioread(int fd, caddr_t buf, int bufsz, off_t offset, int whence,
+    aio_result_t *resultp)
+{
+	return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOREAD));
+}
+
+int
+aiowrite(int fd, caddr_t buf, int bufsz, off_t offset, int whence,
+    aio_result_t *resultp)
+{
+	return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOWRITE));
+}
+
+#if !defined(_LP64)
+int
+aioread64(int fd, caddr_t buf, int bufsz, off64_t offset, int whence,
+    aio_result_t *resultp)
+{
+	return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOAREAD64));
+}
+
+int
+aiowrite64(int fd, caddr_t buf, int bufsz, off64_t offset, int whence,
+    aio_result_t *resultp)
+{
+	return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOAWRITE64));
+}
+#endif	/* !defined(_LP64) */
+
+int
+_aiorw(int fd, caddr_t buf, int bufsz, offset_t offset, int whence,
+    aio_result_t *resultp, int mode)
+{
+	aio_req_t *reqp;
+	aio_args_t *ap;
+	offset_t loffset;
+	struct stat stat;
+	int error = 0;
+	int kerr;
+	int umode;
+
+	switch (whence) {
+
+	case SEEK_SET:
+		loffset = offset;
+		break;
+	case SEEK_CUR:
+		if ((loffset = llseek(fd, 0, SEEK_CUR)) == -1)
+			error = -1;
+		else
+			loffset += offset;
+		break;
+	case SEEK_END:
+		if (fstat(fd, &stat) == -1)
+			error = -1;
+		else
+			loffset = offset + stat.st_size;
+		break;
+	default:
+		errno = EINVAL;
+		error = -1;
+	}
+
+	if (error)
+		return (error);
+
+	/* initialize kaio */
+	if (!_kaio_ok)
+		_kaio_init();
+
+	/*
+	 * _aio_do_request() needs the original request code (mode) to be able
+	 * to choose the appropiate 32/64 bit function.  All other functions
+	 * only require the difference between READ and WRITE (umode).
+	 */
+	if (mode == AIOAREAD64 || mode == AIOAWRITE64)
+		umode = mode - AIOAREAD64;
+	else
+		umode = mode;
+
+	/*
+	 * Try kernel aio first.
+	 * If errno is ENOTSUP/EBADFD, fall back to the thread implementation.
+	 */
+	if (_kaio_ok > 0 && KAIO_SUPPORTED(fd)) {
+		resultp->aio_errno = 0;
+		sig_mutex_lock(&__aio_mutex);
+		_kaio_outstand_cnt++;
+		kerr = (int)_kaio(((resultp->aio_return == AIO_INPROGRESS) ?
+		    (umode | AIO_POLL_BIT) : umode),
+		    fd, buf, bufsz, loffset, resultp);
+		if (kerr == 0) {
+			sig_mutex_unlock(&__aio_mutex);
+			return (0);
+		}
+		_kaio_outstand_cnt--;
+		sig_mutex_unlock(&__aio_mutex);
+		if (errno != ENOTSUP && errno != EBADFD)
+			return (-1);
+		if (errno == EBADFD)
+			SET_KAIO_NOT_SUPPORTED(fd);
+	}
+
+	if (!__uaio_ok && __uaio_init() == -1)
+		return (-1);
+
+	if ((reqp = _aio_req_alloc()) == NULL) {
+		errno = EAGAIN;
+		return (-1);
+	}
+
+	/*
+	 * _aio_do_request() checks reqp->req_op to differentiate
+	 * between 32 and 64 bit access.
+	 */
+	reqp->req_op = mode;
+	reqp->req_resultp = resultp;
+	ap = &reqp->req_args;
+	ap->fd = fd;
+	ap->buf = buf;
+	ap->bufsz = bufsz;
+	ap->offset = loffset;
+
+	if (_aio_hash_insert(resultp, reqp) != 0) {
+		_aio_req_free(reqp);
+		errno = EINVAL;
+		return (-1);
+	}
+	/*
+	 * _aio_req_add() only needs the difference between READ and
+	 * WRITE to choose the right worker queue.
+	 */
+	_aio_req_add(reqp, &__nextworker_rw, umode);
+	return (0);
+}
+
+int
+aiocancel(aio_result_t *resultp)
+{
+	aio_req_t *reqp;
+	aio_worker_t *aiowp;
+	int ret;
+	int done = 0;
+	int canceled = 0;
+
+	if (!__uaio_ok) {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	sig_mutex_lock(&__aio_mutex);
+	reqp = _aio_hash_find(resultp);
+	if (reqp == NULL) {
+		if (_aio_outstand_cnt == _aio_req_done_cnt)
+			errno = EINVAL;
+		else
+			errno = EACCES;
+		ret = -1;
+	} else {
+		aiowp = reqp->req_worker;
+		sig_mutex_lock(&aiowp->work_qlock1);
+		(void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
+		sig_mutex_unlock(&aiowp->work_qlock1);
+
+		if (canceled) {
+			ret = 0;
+		} else {
+			if (_aio_outstand_cnt == 0 ||
+			    _aio_outstand_cnt == _aio_req_done_cnt)
+				errno = EINVAL;
+			else
+				errno = EACCES;
+			ret = -1;
+		}
+	}
+	sig_mutex_unlock(&__aio_mutex);
+	return (ret);
+}
+
+/*
+ * This must be asynch safe
+ */
+aio_result_t *
+aiowait(struct timeval *uwait)
+{
+	aio_result_t *uresultp;
+	aio_result_t *kresultp;
+	aio_result_t *resultp;
+	int dontblock;
+	int timedwait = 0;
+	int kaio_errno = 0;
+	struct timeval twait;
+	struct timeval *wait = NULL;
+	hrtime_t hrtend;
+	hrtime_t hres;
+
+	if (uwait) {
+		/*
+		 * Check for a valid specified wait time.
+		 * If it is invalid, fail the call right away.
+		 */
+		if (uwait->tv_sec < 0 || uwait->tv_usec < 0 ||
+		    uwait->tv_usec >= MICROSEC) {
+			errno = EINVAL;
+			return ((aio_result_t *)-1);
+		}
+
+		if (uwait->tv_sec > 0 || uwait->tv_usec > 0) {
+			hrtend = gethrtime() +
+				(hrtime_t)uwait->tv_sec * NANOSEC +
+				(hrtime_t)uwait->tv_usec * (NANOSEC / MICROSEC);
+			twait = *uwait;
+			wait = &twait;
+			timedwait++;
+		} else {
+			/* polling */
+			sig_mutex_lock(&__aio_mutex);
+			if (_kaio_outstand_cnt == 0) {
+				kresultp = (aio_result_t *)-1;
+			} else {
+				kresultp = (aio_result_t *)_kaio(AIOWAIT,
+				    (struct timeval *)-1, 1);
+				if (kresultp != (aio_result_t *)-1 &&
+				    kresultp != NULL &&
+				    kresultp != (aio_result_t *)1) {
+					_kaio_outstand_cnt--;
+					sig_mutex_unlock(&__aio_mutex);
+					return (kresultp);
+				}
+			}
+			uresultp = _aio_req_done();
+			sig_mutex_unlock(&__aio_mutex);
+			if (uresultp != NULL &&
+			    uresultp != (aio_result_t *)-1) {
+				return (uresultp);
+			}
+			if (uresultp == (aio_result_t *)-1 &&
+			    kresultp == (aio_result_t *)-1) {
+				errno = EINVAL;
+				return ((aio_result_t *)-1);
+			} else {
+				return (NULL);
+			}
+		}
+	}
+
+	for (;;) {
+		sig_mutex_lock(&__aio_mutex);
+		uresultp = _aio_req_done();
+		if (uresultp != NULL && uresultp != (aio_result_t *)-1) {
+			sig_mutex_unlock(&__aio_mutex);
+			resultp = uresultp;
+			break;
+		}
+		_aiowait_flag++;
+		dontblock = (uresultp == (aio_result_t *)-1);
+		if (dontblock && _kaio_outstand_cnt == 0) {
+			kresultp = (aio_result_t *)-1;
+			kaio_errno = EINVAL;
+		} else {
+			sig_mutex_unlock(&__aio_mutex);
+			kresultp = (aio_result_t *)_kaio(AIOWAIT,
+			    wait, dontblock);
+			sig_mutex_lock(&__aio_mutex);
+			kaio_errno = errno;
+		}
+		_aiowait_flag--;
+		sig_mutex_unlock(&__aio_mutex);
+		if (kresultp == (aio_result_t *)1) {
+			/* aiowait() awakened by an aionotify() */
+			continue;
+		} else if (kresultp != NULL &&
+		    kresultp != (aio_result_t *)-1) {
+			resultp = kresultp;
+			sig_mutex_lock(&__aio_mutex);
+			_kaio_outstand_cnt--;
+			sig_mutex_unlock(&__aio_mutex);
+			break;
+		} else if (kresultp == (aio_result_t *)-1 &&
+		    kaio_errno == EINVAL &&
+		    uresultp == (aio_result_t *)-1) {
+			errno = kaio_errno;
+			resultp = (aio_result_t *)-1;
+			break;
+		} else if (kresultp == (aio_result_t *)-1 &&
+		    kaio_errno == EINTR) {
+			errno = kaio_errno;
+			resultp = (aio_result_t *)-1;
+			break;
+		} else if (timedwait) {
+			hres = hrtend - gethrtime();
+			if (hres <= 0) {
+				/* time is up; return */
+				resultp = NULL;
+				break;
+			} else {
+				/*
+				 * Some time left.  Round up the remaining time
+				 * in nanoseconds to microsec.  Retry the call.
+				 */
+				hres += (NANOSEC / MICROSEC) - 1;
+				wait->tv_sec = hres / NANOSEC;
+				wait->tv_usec =
+					(hres % NANOSEC) / (NANOSEC / MICROSEC);
+			}
+		} else {
+			ASSERT(kresultp == NULL && uresultp == NULL);
+			resultp = NULL;
+			continue;
+		}
+	}
+	return (resultp);
+}
+
+/*
+ * _aio_get_timedelta calculates the remaining time and stores the result
+ * into timespec_t *wait.
+ */
+
+int
+_aio_get_timedelta(timespec_t *end, timespec_t *wait)
+{
+	int	ret = 0;
+	struct	timeval cur;
+	timespec_t curtime;
+
+	(void) gettimeofday(&cur, NULL);
+	curtime.tv_sec = cur.tv_sec;
+	curtime.tv_nsec = cur.tv_usec * 1000;   /* convert us to ns */
+
+	if (end->tv_sec >= curtime.tv_sec) {
+		wait->tv_sec = end->tv_sec - curtime.tv_sec;
+		if (end->tv_nsec >= curtime.tv_nsec) {
+			wait->tv_nsec = end->tv_nsec - curtime.tv_nsec;
+			if (wait->tv_sec == 0 && wait->tv_nsec == 0)
+				ret = -1;	/* timer expired */
+		} else {
+			if (end->tv_sec > curtime.tv_sec) {
+				wait->tv_sec -= 1;
+				wait->tv_nsec = NANOSEC -
+				    (curtime.tv_nsec - end->tv_nsec);
+			} else {
+				ret = -1;	/* timer expired */
+			}
+		}
+	} else {
+		ret = -1;
+	}
+	return (ret);
+}
+
+/*
+ * If closing by file descriptor: we will simply cancel all the outstanding
+ * aio`s and return.  Those aio's in question will have either noticed the
+ * cancellation notice before, during, or after initiating io.
+ */
+int
+aiocancel_all(int fd)
+{
+	aio_req_t *reqp;
+	aio_req_t **reqpp;
+	aio_worker_t *first;
+	aio_worker_t *next;
+	int canceled = 0;
+	int done = 0;
+	int cancelall = 0;
+
+	sig_mutex_lock(&__aio_mutex);
+
+	if (_aio_outstand_cnt == 0) {
+		sig_mutex_unlock(&__aio_mutex);
+		return (AIO_ALLDONE);
+	}
+
+	/*
+	 * Cancel requests from the read/write workers' queues.
+	 */
+	first = __nextworker_rw;
+	next = first;
+	do {
+		_aio_cancel_work(next, fd, &canceled, &done);
+	} while ((next = next->work_forw) != first);
+
+	/*
+	 * finally, check if there are requests on the done queue that
+	 * should be canceled.
+	 */
+	if (fd < 0)
+		cancelall = 1;
+	reqpp = &_aio_done_tail;
+	while ((reqp = *reqpp) != NULL) {
+		if (cancelall || reqp->req_args.fd == fd) {
+			*reqpp = reqp->req_next;
+			_aio_donecnt--;
+			(void) _aio_hash_del(reqp->req_resultp);
+			_aio_req_free(reqp);
+		} else
+			reqpp = &reqp->req_next;
+	}
+	if (cancelall) {
+		ASSERT(_aio_donecnt == 0);
+		_aio_done_head = NULL;
+	}
+	sig_mutex_unlock(&__aio_mutex);
+
+	if (canceled && done == 0)
+		return (AIO_CANCELED);
+	else if (done && canceled == 0)
+		return (AIO_ALLDONE);
+	else if ((canceled + done == 0) && KAIO_SUPPORTED(fd))
+		return ((int)_kaio(AIOCANCEL, fd, NULL));
+	return (AIO_NOTCANCELED);
+}
+
+/*
+ * Cancel requests from a given work queue.  If the file descriptor
+ * parameter, fd, is non-negative, then only cancel those requests
+ * in this queue that are to this file descriptor.  If the fd
+ * parameter is -1, then cancel all requests.
+ */
+static void
+_aio_cancel_work(aio_worker_t *aiowp, int fd, int *canceled, int *done)
+{
+	aio_req_t *reqp;
+
+	sig_mutex_lock(&aiowp->work_qlock1);
+	/*
+	 * cancel queued requests first.
+	 */
+	reqp = aiowp->work_tail1;
+	while (reqp != NULL) {
+		if (fd < 0 || reqp->req_args.fd == fd) {
+			if (_aio_cancel_req(aiowp, reqp, canceled, done)) {
+				/*
+				 * Callers locks were dropped.
+				 * reqp is invalid; start traversing
+				 * the list from the beginning again.
+				 */
+				reqp = aiowp->work_tail1;
+				continue;
+			}
+		}
+		reqp = reqp->req_next;
+	}
+	/*
+	 * Since the queued requests have been canceled, there can
+	 * only be one inprogress request that should be canceled.
+	 */
+	if ((reqp = aiowp->work_req) != NULL &&
+	    (fd < 0 || reqp->req_args.fd == fd))
+		(void) _aio_cancel_req(aiowp, reqp, canceled, done);
+	sig_mutex_unlock(&aiowp->work_qlock1);
+}
+
+/*
+ * Cancel a request.  Return 1 if the callers locks were temporarily
+ * dropped, otherwise return 0.
+ */
+int
+_aio_cancel_req(aio_worker_t *aiowp, aio_req_t *reqp, int *canceled, int *done)
+{
+	int ostate = reqp->req_state;
+
+	ASSERT(MUTEX_HELD(&__aio_mutex));
+	ASSERT(MUTEX_HELD(&aiowp->work_qlock1));
+	if (ostate == AIO_REQ_CANCELED)
+		return (0);
+	if (ostate == AIO_REQ_DONE || ostate == AIO_REQ_DONEQ) {
+		(*done)++;
+		return (0);
+	}
+	if (reqp->req_op == AIOFSYNC && reqp != aiowp->work_req) {
+		ASSERT(POSIX_AIO(reqp));
+		/* Cancel the queued aio_fsync() request */
+		if (!reqp->req_head->lio_canned) {
+			reqp->req_head->lio_canned = 1;
+			_aio_outstand_cnt--;
+			(*canceled)++;
+		}
+		return (0);
+	}
+	reqp->req_state = AIO_REQ_CANCELED;
+	_aio_req_del(aiowp, reqp, ostate);
+	(void) _aio_hash_del(reqp->req_resultp);
+	(*canceled)++;
+	if (reqp == aiowp->work_req) {
+		ASSERT(ostate == AIO_REQ_INPROGRESS);
+		/*
+		 * Set the result values now, before _aiodone() is called.
+		 * We do this because the application can expect aio_return
+		 * and aio_errno to be set to -1 and ECANCELED, respectively,
+		 * immediately after a successful return from aiocancel()
+		 * or aio_cancel().
+		 */
+		_aio_set_result(reqp, -1, ECANCELED);
+		(void) thr_kill(aiowp->work_tid, SIGAIOCANCEL);
+		return (0);
+	}
+	if (!POSIX_AIO(reqp)) {
+		_aio_outstand_cnt--;
+		_aio_set_result(reqp, -1, ECANCELED);
+		return (0);
+	}
+	sig_mutex_unlock(&aiowp->work_qlock1);
+	sig_mutex_unlock(&__aio_mutex);
+	_aiodone(reqp, -1, ECANCELED);
+	sig_mutex_lock(&__aio_mutex);
+	sig_mutex_lock(&aiowp->work_qlock1);
+	return (1);
+}
+
+int
+_aio_create_worker(aio_req_t *reqp, int mode)
+{
+	aio_worker_t *aiowp, **workers, **nextworker;
+	int *aio_workerscnt;
+	void *(*func)(void *);
+	sigset_t oset;
+	int error;
+
+	/*
+	 * Put the new worker thread in the right queue.
+	 */
+	switch (mode) {
+	case AIOREAD:
+	case AIOWRITE:
+	case AIOAREAD:
+	case AIOAWRITE:
+#if !defined(_LP64)
+	case AIOAREAD64:
+	case AIOAWRITE64:
+#endif
+		workers = &__workers_rw;
+		nextworker = &__nextworker_rw;
+		aio_workerscnt = &__rw_workerscnt;
+		func = _aio_do_request;
+		break;
+	case AIONOTIFY:
+		workers = &__workers_no;
+		nextworker = &__nextworker_no;
+		func = _aio_do_notify;
+		aio_workerscnt = &__no_workerscnt;
+		break;
+	default:
+		aio_panic("_aio_create_worker: invalid mode");
+		break;
+	}
+
+	if ((aiowp = _aio_worker_alloc()) == NULL)
+		return (-1);
+
+	if (reqp) {
+		reqp->req_state = AIO_REQ_QUEUED;
+		reqp->req_worker = aiowp;
+		aiowp->work_head1 = reqp;
+		aiowp->work_tail1 = reqp;
+		aiowp->work_next1 = reqp;
+		aiowp->work_count1 = 1;
+		aiowp->work_minload1 = 1;
+	}
+
+	(void) pthread_sigmask(SIG_SETMASK, &maskset, &oset);
+	error = thr_create(NULL, AIOSTKSIZE, func, aiowp,
+		THR_DAEMON | THR_SUSPENDED, &aiowp->work_tid);
+	(void) pthread_sigmask(SIG_SETMASK, &oset, NULL);
+	if (error) {
+		if (reqp) {
+			reqp->req_state = 0;
+			reqp->req_worker = NULL;
+		}
+		_aio_worker_free(aiowp);
+		return (-1);
+	}
+
+	lmutex_lock(&__aio_mutex);
+	(*aio_workerscnt)++;
+	if (*workers == NULL) {
+		aiowp->work_forw = aiowp;
+		aiowp->work_backw = aiowp;
+		*nextworker = aiowp;
+		*workers = aiowp;
+	} else {
+		aiowp->work_backw = (*workers)->work_backw;
+		aiowp->work_forw = (*workers);
+		(*workers)->work_backw->work_forw = aiowp;
+		(*workers)->work_backw = aiowp;
+	}
+	_aio_worker_cnt++;
+	lmutex_unlock(&__aio_mutex);
+
+	(void) thr_continue(aiowp->work_tid);
+
+	return (0);
+}
+
+/*
+ * This is the worker's main routine.
+ * The task of this function is to execute all queued requests;
+ * once the last pending request is executed this function will block
+ * in _aio_idle().  A new incoming request must wakeup this thread to
+ * restart the work.
+ * Every worker has an own work queue.  The queue lock is required
+ * to synchronize the addition of new requests for this worker or
+ * cancellation of pending/running requests.
+ *
+ * Cancellation scenarios:
+ * The cancellation of a request is being done asynchronously using
+ * _aio_cancel_req() from another thread context.
+ * A queued request can be cancelled in different manners :
+ * a) request is queued but not "in progress" or "done" (AIO_REQ_QUEUED):
+ *	- lock the queue -> remove the request -> unlock the queue
+ *	- this function/thread does not detect this cancellation process
+ * b) request is in progress (AIO_REQ_INPROGRESS) :
+ *	- this function first allow the cancellation of the running
+ *	  request with the flag "work_cancel_flg=1"
+ * 		see _aio_req_get() -> _aio_cancel_on()
+ *	  During this phase, it is allowed to interrupt the worker
+ *	  thread running the request (this thread) using the SIGAIOCANCEL
+ *	  signal.
+ *	  Once this thread returns from the kernel (because the request
+ *	  is just done), then it must disable a possible cancellation
+ *	  and proceed to finish the request.  To disable the cancellation
+ *	  this thread must use _aio_cancel_off() to set "work_cancel_flg=0".
+ * c) request is already done (AIO_REQ_DONE || AIO_REQ_DONEQ):
+ *	  same procedure as in a)
+ *
+ * To b)
+ *	This thread uses sigsetjmp() to define the position in the code, where
+ *	it wish to continue working in the case that a SIGAIOCANCEL signal
+ *	is detected.
+ *	Normally this thread should get the cancellation signal during the
+ *	kernel phase (reading or writing).  In that case the signal handler
+ *	aiosigcancelhndlr() is activated using the worker thread context,
+ *	which again will use the siglongjmp() function to break the standard
+ *	code flow and jump to the "sigsetjmp" position, provided that
+ *	"work_cancel_flg" is set to "1".
+ *	Because the "work_cancel_flg" is only manipulated by this worker
+ *	thread and it can only run on one CPU at a given time, it is not
+ *	necessary to protect that flag with the queue lock.
+ *	Returning from the kernel (read or write system call) we must
+ *	first disable the use of the SIGAIOCANCEL signal and accordingly
+ *	the use of the siglongjmp() function to prevent a possible deadlock:
+ *	- It can happens that this worker thread returns from the kernel and
+ *	  blocks in "work_qlock1",
+ *	- then a second thread cancels the apparently "in progress" request
+ *	  and sends the SIGAIOCANCEL signal to the worker thread,
+ *	- the worker thread gets assigned the "work_qlock1" and will returns
+ *	  from the kernel,
+ *	- the kernel detects the pending signal and activates the signal
+ *	  handler instead,
+ *	- if the "work_cancel_flg" is still set then the signal handler
+ *	  should use siglongjmp() to cancel the "in progress" request and
+ *	  it would try to acquire the same work_qlock1 in _aio_req_get()
+ *	  for a second time => deadlock.
+ *	To avoid that situation we disable the cancellation of the request
+ *	in progress BEFORE we try to acquire the work_qlock1.
+ *	In that case the signal handler will not call siglongjmp() and the
+ *	worker thread will continue running the standard code flow.
+ *	Then this thread must check the AIO_REQ_CANCELED flag to emulate
+ *	an eventually required siglongjmp() freeing the work_qlock1 and
+ *	avoiding a deadlock.
+ */
+void *
+_aio_do_request(void *arglist)
+{
+	aio_worker_t *aiowp = (aio_worker_t *)arglist;
+	ulwp_t *self = curthread;
+	struct aio_args *arg;
+	aio_req_t *reqp;		/* current AIO request */
+	ssize_t retval;
+	int error;
+
+	if (pthread_setspecific(_aio_key, aiowp) != 0)
+		aio_panic("_aio_do_request, pthread_setspecific()");
+	(void) pthread_sigmask(SIG_SETMASK, &_worker_set, NULL);
+	ASSERT(aiowp->work_req == NULL);
+
+	/*
+	 * We resume here when an operation is cancelled.
+	 * On first entry, aiowp->work_req == NULL, so all
+	 * we do is block SIGAIOCANCEL.
+	 */
+	(void) sigsetjmp(aiowp->work_jmp_buf, 0);
+	ASSERT(self->ul_sigdefer == 0);
+
+	sigoff(self);	/* block SIGAIOCANCEL */
+	if (aiowp->work_req != NULL)
+		_aio_finish_request(aiowp, -1, ECANCELED);
+
+	for (;;) {
+		/*
+		 * Put completed requests on aio_done_list.  This has
+		 * to be done as part of the main loop to ensure that
+		 * we don't artificially starve any aiowait'ers.
+		 */
+		if (aiowp->work_done1)
+			_aio_work_done(aiowp);
+
+top:
+		/* consume any deferred SIGAIOCANCEL signal here */
+		sigon(self);
+		sigoff(self);
+
+		while ((reqp = _aio_req_get(aiowp)) == NULL) {
+			if (_aio_idle(aiowp) != 0)
+				goto top;
+		}
+		arg = &reqp->req_args;
+		ASSERT(reqp->req_state == AIO_REQ_INPROGRESS ||
+		    reqp->req_state == AIO_REQ_CANCELED);
+		error = 0;
+
+		switch (reqp->req_op) {
+		case AIOREAD:
+		case AIOAREAD:
+			sigon(self);	/* unblock SIGAIOCANCEL */
+			retval = pread(arg->fd, arg->buf,
+			    arg->bufsz, arg->offset);
+			if (retval == -1) {
+				if (errno == ESPIPE) {
+					retval = read(arg->fd,
+					    arg->buf, arg->bufsz);
+					if (retval == -1)
+						error = errno;
+				} else {
+					error = errno;
+				}
+			}
+			sigoff(self);	/* block SIGAIOCANCEL */
+			break;
+		case AIOWRITE:
+		case AIOAWRITE:
+			sigon(self);	/* unblock SIGAIOCANCEL */
+			retval = pwrite(arg->fd, arg->buf,
+			    arg->bufsz, arg->offset);
+			if (retval == -1) {
+				if (errno == ESPIPE) {
+					retval = write(arg->fd,
+					    arg->buf, arg->bufsz);
+					if (retval == -1)
+						error = errno;
+				} else {
+					error = errno;
+				}
+			}
+			sigoff(self);	/* block SIGAIOCANCEL */
+			break;
+#if !defined(_LP64)
+		case AIOAREAD64:
+			sigon(self);	/* unblock SIGAIOCANCEL */
+			retval = pread64(arg->fd, arg->buf,
+			    arg->bufsz, arg->offset);
+			if (retval == -1) {
+				if (errno == ESPIPE) {
+					retval = read(arg->fd,
+					    arg->buf, arg->bufsz);
+					if (retval == -1)
+						error = errno;
+				} else {
+					error = errno;
+				}
+			}
+			sigoff(self);	/* block SIGAIOCANCEL */
+			break;
+		case AIOAWRITE64:
+			sigon(self);	/* unblock SIGAIOCANCEL */
+			retval = pwrite64(arg->fd, arg->buf,
+			    arg->bufsz, arg->offset);
+			if (retval == -1) {
+				if (errno == ESPIPE) {
+					retval = write(arg->fd,
+					    arg->buf, arg->bufsz);
+					if (retval == -1)
+						error = errno;
+				} else {
+					error = errno;
+				}
+			}
+			sigoff(self);	/* block SIGAIOCANCEL */
+			break;
+#endif	/* !defined(_LP64) */
+		case AIOFSYNC:
+			if (_aio_fsync_del(aiowp, reqp))
+				goto top;
+			ASSERT(reqp->req_head == NULL);
+			/*
+			 * All writes for this fsync request are now
+			 * acknowledged.  Now make these writes visible
+			 * and put the final request into the hash table.
+			 */
+			if (reqp->req_state == AIO_REQ_CANCELED) {
+				/* EMPTY */;
+			} else if (arg->offset == O_SYNC) {
+				if ((retval = __fdsync(arg->fd, FSYNC)) == -1)
+					error = errno;
+			} else {
+				if ((retval = __fdsync(arg->fd, FDSYNC)) == -1)
+					error = errno;
+			}
+			if (_aio_hash_insert(reqp->req_resultp, reqp) != 0)
+				aio_panic("_aio_do_request(): AIOFSYNC: "
+				    "request already in hash table");
+			break;
+		default:
+			aio_panic("_aio_do_request, bad op");
+		}
+
+		_aio_finish_request(aiowp, retval, error);
+	}
+	/* NOTREACHED */
+	return (NULL);
+}
+
+/*
+ * Perform the tail processing for _aio_do_request().
+ * The in-progress request may or may not have been cancelled.
+ */
+static void
+_aio_finish_request(aio_worker_t *aiowp, ssize_t retval, int error)
+{
+	aio_req_t *reqp;
+
+	sig_mutex_lock(&aiowp->work_qlock1);
+	if ((reqp = aiowp->work_req) == NULL)
+		sig_mutex_unlock(&aiowp->work_qlock1);
+	else {
+		aiowp->work_req = NULL;
+		if (reqp->req_state == AIO_REQ_CANCELED) {
+			retval = -1;
+			error = ECANCELED;
+		}
+		if (!POSIX_AIO(reqp)) {
+			sig_mutex_unlock(&aiowp->work_qlock1);
+			sig_mutex_lock(&__aio_mutex);
+			if (reqp->req_state == AIO_REQ_INPROGRESS)
+				reqp->req_state = AIO_REQ_DONE;
+			_aio_req_done_cnt++;
+			_aio_set_result(reqp, retval, error);
+			if (error == ECANCELED)
+				_aio_outstand_cnt--;
+			sig_mutex_unlock(&__aio_mutex);
+		} else {
+			if (reqp->req_state == AIO_REQ_INPROGRESS)
+				reqp->req_state = AIO_REQ_DONE;
+			sig_mutex_unlock(&aiowp->work_qlock1);
+			_aiodone(reqp, retval, error);
+		}
+	}
+}
+
+void
+_aio_req_mark_done(aio_req_t *reqp)
+{
+#if !defined(_LP64)
+	if (reqp->req_largefile)
+		((aiocb64_t *)reqp->req_aiocbp)->aio_state = USERAIO_DONE;
+	else
+#endif
+		((aiocb_t *)reqp->req_aiocbp)->aio_state = USERAIO_DONE;
+}
+
+/*
+ * Sleep for 'ticks' clock ticks to give somebody else a chance to run,
+ * hopefully to consume one of our queued signals.
+ */
+static void
+_aio_delay(int ticks)
+{
+	(void) usleep(ticks * (MICROSEC / hz));
+}
+
+/*
+ * Actually send the notifications.
+ * We could block indefinitely here if the application
+ * is not listening for the signal or port notifications.
+ */
+static void
+send_notification(notif_param_t *npp)
+{
+	extern int __sigqueue(pid_t pid, int signo,
+		/* const union sigval */ void *value, int si_code, int block);
+
+	if (npp->np_signo)
+		(void) __sigqueue(__pid, npp->np_signo, npp->np_user,
+		    SI_ASYNCIO, 1);
+	else if (npp->np_port >= 0)
+		(void) _port_dispatch(npp->np_port, 0, PORT_SOURCE_AIO,
+		    npp->np_event, npp->np_object, npp->np_user);
+
+	if (npp->np_lio_signo)
+		(void) __sigqueue(__pid, npp->np_lio_signo, npp->np_lio_user,
+		    SI_ASYNCIO, 1);
+	else if (npp->np_lio_port >= 0)
+		(void) _port_dispatch(npp->np_lio_port, 0, PORT_SOURCE_AIO,
+		    npp->np_lio_event, npp->np_lio_object, npp->np_lio_user);
+}
+
+/*
+ * Asynchronous notification worker.
+ */
+void *
+_aio_do_notify(void *arg)
+{
+	aio_worker_t *aiowp = (aio_worker_t *)arg;
+	aio_req_t *reqp;
+
+	/*
+	 * This isn't really necessary.  All signals are blocked.
+	 */
+	if (pthread_setspecific(_aio_key, aiowp) != 0)
+		aio_panic("_aio_do_notify, pthread_setspecific()");
+
+	/*
+	 * Notifications are never cancelled.
+	 * All signals remain blocked, forever.
+	 */
+	for (;;) {
+		while ((reqp = _aio_req_get(aiowp)) == NULL) {
+			if (_aio_idle(aiowp) != 0)
+				aio_panic("_aio_do_notify: _aio_idle() failed");
+		}
+		send_notification(&reqp->req_notify);
+		_aio_req_free(reqp);
+	}
+
+	/* NOTREACHED */
+	return (NULL);
+}
+
+/*
+ * Do the completion semantics for a request that was either canceled
+ * by _aio_cancel_req() or was completed by _aio_do_request().
+ */
+static void
+_aiodone(aio_req_t *reqp, ssize_t retval, int error)
+{
+	aio_result_t *resultp = reqp->req_resultp;
+	int notify = 0;
+	aio_lio_t *head;
+	int sigev_none;
+	int sigev_signal;
+	int sigev_thread;
+	int sigev_port;
+	notif_param_t np;
+
+	/*
+	 * We call _aiodone() only for Posix I/O.
+	 */
+	ASSERT(POSIX_AIO(reqp));
+
+	sigev_none = 0;
+	sigev_signal = 0;
+	sigev_thread = 0;
+	sigev_port = 0;
+	np.np_signo = 0;
+	np.np_port = -1;
+	np.np_lio_signo = 0;
+	np.np_lio_port = -1;
+
+	switch (reqp->req_sigevent.sigev_notify) {
+	case SIGEV_NONE:
+		sigev_none = 1;
+		break;
+	case SIGEV_SIGNAL:
+		sigev_signal = 1;
+		break;
+	case SIGEV_THREAD:
+		sigev_thread = 1;
+		break;
+	case SIGEV_PORT:
+		sigev_port = 1;
+		break;
+	default:
+		aio_panic("_aiodone: improper sigev_notify");
+		break;
+	}
+
+	/*
+	 * Figure out the notification parameters while holding __aio_mutex.
+	 * Actually perform the notifications after dropping __aio_mutex.
+	 * This allows us to sleep for a long time (if the notifications
+	 * incur delays) without impeding other async I/O operations.
+	 */
+
+	sig_mutex_lock(&__aio_mutex);
+
+	if (sigev_signal) {
+		if ((np.np_signo = reqp->req_sigevent.sigev_signo) != 0)
+			notify = 1;
+		np.np_user = reqp->req_sigevent.sigev_value.sival_ptr;
+	} else if (sigev_thread | sigev_port) {
+		if ((np.np_port = reqp->req_sigevent.sigev_signo) >= 0)
+			notify = 1;
+		np.np_event = reqp->req_op;
+		if (np.np_event == AIOFSYNC && reqp->req_largefile)
+			np.np_event = AIOFSYNC64;
+		np.np_object = (uintptr_t)reqp->req_aiocbp;
+		np.np_user = reqp->req_sigevent.sigev_value.sival_ptr;
+	}
+
+	if (resultp->aio_errno == EINPROGRESS)
+		_aio_set_result(reqp, retval, error);
+
+	_aio_outstand_cnt--;
+
+	head = reqp->req_head;
+	reqp->req_head = NULL;
+
+	if (sigev_none) {
+		_aio_enq_doneq(reqp);
+		reqp = NULL;
+	} else {
+		(void) _aio_hash_del(resultp);
+		_aio_req_mark_done(reqp);
+	}
+
+	_aio_waitn_wakeup();
+
+	/*
+	 * __aio_waitn() sets AIO_WAIT_INPROGRESS and
+	 * __aio_suspend() increments "_aio_kernel_suspend"
+	 * when they are waiting in the kernel for completed I/Os.
+	 *
+	 * _kaio(AIONOTIFY) awakes the corresponding function
+	 * in the kernel; then the corresponding __aio_waitn() or
+	 * __aio_suspend() function could reap the recently
+	 * completed I/Os (_aiodone()).
+	 */
+	if ((_aio_flags & AIO_WAIT_INPROGRESS) || _aio_kernel_suspend > 0)
+		(void) _kaio(AIONOTIFY);
+
+	sig_mutex_unlock(&__aio_mutex);
+
+	if (head != NULL) {
+		/*
+		 * If all the lio requests have completed,
+		 * prepare to notify the waiting thread.
+		 */
+		sig_mutex_lock(&head->lio_mutex);
+		ASSERT(head->lio_refcnt == head->lio_nent);
+		if (head->lio_refcnt == 1) {
+			int waiting = 0;
+			if (head->lio_mode == LIO_WAIT) {
+				if ((waiting = head->lio_waiting) != 0)
+					(void) cond_signal(&head->lio_cond_cv);
+			} else if (head->lio_port < 0) { /* none or signal */
+				if ((np.np_lio_signo = head->lio_signo) != 0)
+					notify = 1;
+				np.np_lio_user = head->lio_sigval.sival_ptr;
+			} else {			/* thread or port */
+				notify = 1;
+				np.np_lio_port = head->lio_port;
+				np.np_lio_event = head->lio_event;
+				np.np_lio_object =
+				    (uintptr_t)head->lio_sigevent;
+				np.np_lio_user = head->lio_sigval.sival_ptr;
+			}
+			head->lio_nent = head->lio_refcnt = 0;
+			sig_mutex_unlock(&head->lio_mutex);
+			if (waiting == 0)
+				_aio_lio_free(head);
+		} else {
+			head->lio_nent--;
+			head->lio_refcnt--;
+			sig_mutex_unlock(&head->lio_mutex);
+		}
+	}
+
+	/*
+	 * The request is completed; now perform the notifications.
+	 */
+	if (notify) {
+		if (reqp != NULL) {
+			/*
+			 * We usually put the request on the notification
+			 * queue because we don't want to block and delay
+			 * other operations behind us in the work queue.
+			 * Also we must never block on a cancel notification
+			 * because we are being called from an application
+			 * thread in this case and that could lead to deadlock
+			 * if no other thread is receiving notificatins.
+			 */
+			reqp->req_notify = np;
+			reqp->req_op = AIONOTIFY;
+			_aio_req_add(reqp, &__workers_no, AIONOTIFY);
+			reqp = NULL;
+		} else {
+			/*
+			 * We already put the request on the done queue,
+			 * so we can't queue it to the notification queue.
+			 * Just do the notification directly.
+			 */
+			send_notification(&np);
+		}
+	}
+
+	if (reqp != NULL)
+		_aio_req_free(reqp);
+}
+
+/*
+ * Delete fsync requests from list head until there is
+ * only one left.  Return 0 when there is only one,
+ * otherwise return a non-zero value.
+ */
+static int
+_aio_fsync_del(aio_worker_t *aiowp, aio_req_t *reqp)
+{
+	aio_lio_t *head = reqp->req_head;
+	int rval = 0;
+
+	ASSERT(reqp == aiowp->work_req);
+	sig_mutex_lock(&aiowp->work_qlock1);
+	sig_mutex_lock(&head->lio_mutex);
+	if (head->lio_refcnt > 1) {
+		head->lio_refcnt--;
+		head->lio_nent--;
+		aiowp->work_req = NULL;
+		sig_mutex_unlock(&head->lio_mutex);
+		sig_mutex_unlock(&aiowp->work_qlock1);
+		sig_mutex_lock(&__aio_mutex);
+		_aio_outstand_cnt--;
+		_aio_waitn_wakeup();
+		sig_mutex_unlock(&__aio_mutex);
+		_aio_req_free(reqp);
+		return (1);
+	}
+	ASSERT(head->lio_nent == 1 && head->lio_refcnt == 1);
+	reqp->req_head = NULL;
+	if (head->lio_canned)
+		reqp->req_state = AIO_REQ_CANCELED;
+	if (head->lio_mode == LIO_DESTROY) {
+		aiowp->work_req = NULL;
+		rval = 1;
+	}
+	sig_mutex_unlock(&head->lio_mutex);
+	sig_mutex_unlock(&aiowp->work_qlock1);
+	head->lio_refcnt--;
+	head->lio_nent--;
+	_aio_lio_free(head);
+	if (rval != 0)
+		_aio_req_free(reqp);
+	return (rval);
+}
+
+/*
+ * A worker is set idle when its work queue is empty.
+ * The worker checks again that it has no more work
+ * and then goes to sleep waiting for more work.
+ */
+int
+_aio_idle(aio_worker_t *aiowp)
+{
+	int error = 0;
+
+	sig_mutex_lock(&aiowp->work_qlock1);
+	if (aiowp->work_count1 == 0) {
+		ASSERT(aiowp->work_minload1 == 0);
+		aiowp->work_idleflg = 1;
+		/*
+		 * A cancellation handler is not needed here.
+		 * aio worker threads are never cancelled via pthread_cancel().
+		 */
+		error = sig_cond_wait(&aiowp->work_idle_cv,
+		    &aiowp->work_qlock1);
+		/*
+		 * The idle flag is normally cleared before worker is awakened
+		 * by aio_req_add().  On error (EINTR), we clear it ourself.
+		 */
+		if (error)
+			aiowp->work_idleflg = 0;
+	}
+	sig_mutex_unlock(&aiowp->work_qlock1);
+	return (error);
+}
+
+/*
+ * A worker's completed AIO requests are placed onto a global
+ * done queue.  The application is only sent a SIGIO signal if
+ * the process has a handler enabled and it is not waiting via
+ * aiowait().
+ */
+static void
+_aio_work_done(aio_worker_t *aiowp)
+{
+	aio_req_t *reqp;
+
+	sig_mutex_lock(&aiowp->work_qlock1);
+	reqp = aiowp->work_prev1;
+	reqp->req_next = NULL;
+	aiowp->work_done1 = 0;
+	aiowp->work_tail1 = aiowp->work_next1;
+	if (aiowp->work_tail1 == NULL)
+		aiowp->work_head1 = NULL;
+	aiowp->work_prev1 = NULL;
+	sig_mutex_unlock(&aiowp->work_qlock1);
+	sig_mutex_lock(&__aio_mutex);
+	_aio_donecnt++;
+	_aio_outstand_cnt--;
+	_aio_req_done_cnt--;
+	ASSERT(_aio_donecnt > 0 &&
+	    _aio_outstand_cnt >= 0 &&
+	    _aio_req_done_cnt >= 0);
+	ASSERT(reqp != NULL);
+
+	if (_aio_done_tail == NULL) {
+		_aio_done_head = _aio_done_tail = reqp;
+	} else {
+		_aio_done_head->req_next = reqp;
+		_aio_done_head = reqp;
+	}
+
+	if (_aiowait_flag) {
+		sig_mutex_unlock(&__aio_mutex);
+		(void) _kaio(AIONOTIFY);
+	} else {
+		sig_mutex_unlock(&__aio_mutex);
+		if (_sigio_enabled)
+			(void) kill(__pid, SIGIO);
+	}
+}
+
+/*
+ * The done queue consists of AIO requests that are in either the
+ * AIO_REQ_DONE or AIO_REQ_CANCELED state.  Requests that were cancelled
+ * are discarded.  If the done queue is empty then NULL is returned.
+ * Otherwise the address of a done aio_result_t is returned.
+ */
+aio_result_t *
+_aio_req_done(void)
+{
+	aio_req_t *reqp;
+	aio_result_t *resultp;
+
+	ASSERT(MUTEX_HELD(&__aio_mutex));
+
+	if ((reqp = _aio_done_tail) != NULL) {
+		if ((_aio_done_tail = reqp->req_next) == NULL)
+			_aio_done_head = NULL;
+		ASSERT(_aio_donecnt > 0);
+		_aio_donecnt--;
+		(void) _aio_hash_del(reqp->req_resultp);
+		resultp = reqp->req_resultp;
+		ASSERT(reqp->req_state == AIO_REQ_DONE);
+		_aio_req_free(reqp);
+		return (resultp);
+	}
+	/* is queue empty? */
+	if (reqp == NULL && _aio_outstand_cnt == 0) {
+		return ((aio_result_t *)-1);
+	}
+	return (NULL);
+}
+
+/*
+ * Set the return and errno values for the application's use.
+ *
+ * For the Posix interfaces, we must set the return value first followed
+ * by the errno value because the Posix interfaces allow for a change
+ * in the errno value from EINPROGRESS to something else to signal
+ * the completion of the asynchronous request.
+ *
+ * The opposite is true for the Solaris interfaces.  These allow for
+ * a change in the return value from AIO_INPROGRESS to something else
+ * to signal the completion of the asynchronous request.
+ */
+void
+_aio_set_result(aio_req_t *reqp, ssize_t retval, int error)
+{
+	aio_result_t *resultp = reqp->req_resultp;
+
+	if (POSIX_AIO(reqp)) {
+		resultp->aio_return = retval;
+		membar_producer();
+		resultp->aio_errno = error;
+	} else {
+		resultp->aio_errno = error;
+		membar_producer();
+		resultp->aio_return = retval;
+	}
+}
+
+/*
+ * Add an AIO request onto the next work queue.
+ * A circular list of workers is used to choose the next worker.
+ */
+void
+_aio_req_add(aio_req_t *reqp, aio_worker_t **nextworker, int mode)
+{
+	ulwp_t *self = curthread;
+	aio_worker_t *aiowp;
+	aio_worker_t *first;
+	int load_bal_flg = 1;
+	int found;
+
+	ASSERT(reqp->req_state != AIO_REQ_DONEQ);
+	reqp->req_next = NULL;
+	/*
+	 * Try to acquire the next worker's work queue.  If it is locked,
+	 * then search the list of workers until a queue is found unlocked,
+	 * or until the list is completely traversed at which point another
+	 * worker will be created.
+	 */
+	sigoff(self);		/* defer SIGIO */
+	sig_mutex_lock(&__aio_mutex);
+	first = aiowp = *nextworker;
+	if (mode != AIONOTIFY)
+		_aio_outstand_cnt++;
+	sig_mutex_unlock(&__aio_mutex);
+
+	switch (mode) {
+	case AIOREAD:
+	case AIOWRITE:
+	case AIOAREAD:
+	case AIOAWRITE:
+#if !defined(_LP64)
+	case AIOAREAD64:
+	case AIOAWRITE64:
+#endif
+		/* try to find an idle worker */
+		found = 0;
+		do {
+			if (sig_mutex_trylock(&aiowp->work_qlock1) == 0) {
+				if (aiowp->work_idleflg) {
+					found = 1;
+					break;
+				}
+				sig_mutex_unlock(&aiowp->work_qlock1);
+			}
+		} while ((aiowp = aiowp->work_forw) != first);
+
+		if (found) {
+			aiowp->work_minload1++;
+			break;
+		}
+
+		/* try to acquire some worker's queue lock */
+		do {
+			if (sig_mutex_trylock(&aiowp->work_qlock1) == 0) {
+				found = 1;
+				break;
+			}
+		} while ((aiowp = aiowp->work_forw) != first);
+
+		/*
+		 * Create more workers when the workers appear overloaded.
+		 * Either all the workers are busy draining their queues
+		 * or no worker's queue lock could be acquired.
+		 */
+		if (!found) {
+			if (_aio_worker_cnt < _max_workers) {
+				if (_aio_create_worker(reqp, mode))
+					aio_panic("_aio_req_add: add worker");
+				sigon(self);	/* reenable SIGIO */
+				return;
+			}
+
+			/*
+			 * No worker available and we have created
+			 * _max_workers, keep going through the
+			 * list slowly until we get a lock
+			 */
+			while (sig_mutex_trylock(&aiowp->work_qlock1) != 0) {
+				/*
+				 * give someone else a chance
+				 */
+				_aio_delay(1);
+				aiowp = aiowp->work_forw;
+			}
+		}
+
+		ASSERT(MUTEX_HELD(&aiowp->work_qlock1));
+		if (_aio_worker_cnt < _max_workers &&
+		    aiowp->work_minload1 >= _minworkload) {
+			sig_mutex_unlock(&aiowp->work_qlock1);
+			sig_mutex_lock(&__aio_mutex);
+			*nextworker = aiowp->work_forw;
+			sig_mutex_unlock(&__aio_mutex);
+			if (_aio_create_worker(reqp, mode))
+				aio_panic("aio_req_add: add worker");
+			sigon(self);	/* reenable SIGIO */
+			return;
+		}
+		aiowp->work_minload1++;
+		break;
+	case AIOFSYNC:
+	case AIONOTIFY:
+		load_bal_flg = 0;
+		sig_mutex_lock(&aiowp->work_qlock1);
+		break;
+	default:
+		aio_panic("_aio_req_add: invalid mode");
+		break;
+	}
+	/*
+	 * Put request onto worker's work queue.
+	 */
+	if (aiowp->work_tail1 == NULL) {
+		ASSERT(aiowp->work_count1 == 0);
+		aiowp->work_tail1 = reqp;
+		aiowp->work_next1 = reqp;
+	} else {
+		aiowp->work_head1->req_next = reqp;
+		if (aiowp->work_next1 == NULL)
+			aiowp->work_next1 = reqp;
+	}
+	reqp->req_state = AIO_REQ_QUEUED;
+	reqp->req_worker = aiowp;
+	aiowp->work_head1 = reqp;
+	/*
+	 * Awaken worker if it is not currently active.
+	 */
+	if (aiowp->work_count1++ == 0 && aiowp->work_idleflg) {
+		aiowp->work_idleflg = 0;
+		(void) cond_signal(&aiowp->work_idle_cv);
+	}
+	sig_mutex_unlock(&aiowp->work_qlock1);
+
+	if (load_bal_flg) {
+		sig_mutex_lock(&__aio_mutex);
+		*nextworker = aiowp->work_forw;
+		sig_mutex_unlock(&__aio_mutex);
+	}
+	sigon(self);	/* reenable SIGIO */
+}
+
+/*
+ * Get an AIO request for a specified worker.
+ * If the work queue is empty, return NULL.
+ */
+aio_req_t *
+_aio_req_get(aio_worker_t *aiowp)
+{
+	aio_req_t *reqp;
+
+	sig_mutex_lock(&aiowp->work_qlock1);
+	if ((reqp = aiowp->work_next1) != NULL) {
+		/*
+		 * Remove a POSIX request from the queue; the
+		 * request queue is a singularly linked list
+		 * with a previous pointer.  The request is
+		 * removed by updating the previous pointer.
+		 *
+		 * Non-posix requests are left on the queue
+		 * to eventually be placed on the done queue.
+		 */
+
+		if (POSIX_AIO(reqp)) {
+			if (aiowp->work_prev1 == NULL) {
+				aiowp->work_tail1 = reqp->req_next;
+				if (aiowp->work_tail1 == NULL)
+					aiowp->work_head1 = NULL;
+			} else {
+				aiowp->work_prev1->req_next = reqp->req_next;
+				if (aiowp->work_head1 == reqp)
+					aiowp->work_head1 = reqp->req_next;
+			}
+
+		} else {
+			aiowp->work_prev1 = reqp;
+			ASSERT(aiowp->work_done1 >= 0);
+			aiowp->work_done1++;
+		}
+		ASSERT(reqp != reqp->req_next);
+		aiowp->work_next1 = reqp->req_next;
+		ASSERT(aiowp->work_count1 >= 1);
+		aiowp->work_count1--;
+		switch (reqp->req_op) {
+		case AIOREAD:
+		case AIOWRITE:
+		case AIOAREAD:
+		case AIOAWRITE:
+#if !defined(_LP64)
+		case AIOAREAD64:
+		case AIOAWRITE64:
+#endif
+			ASSERT(aiowp->work_minload1 > 0);
+			aiowp->work_minload1--;
+			break;
+		}
+		reqp->req_state = AIO_REQ_INPROGRESS;
+	}
+	aiowp->work_req = reqp;
+	ASSERT(reqp != NULL || aiowp->work_count1 == 0);
+	sig_mutex_unlock(&aiowp->work_qlock1);
+	return (reqp);
+}
+
+static void
+_aio_req_del(aio_worker_t *aiowp, aio_req_t *reqp, int ostate)
+{
+	aio_req_t **last;
+	aio_req_t *lastrp;
+	aio_req_t *next;
+
+	ASSERT(aiowp != NULL);
+	ASSERT(MUTEX_HELD(&aiowp->work_qlock1));
+	if (POSIX_AIO(reqp)) {
+		if (ostate != AIO_REQ_QUEUED)
+			return;
+	}
+	last = &aiowp->work_tail1;
+	lastrp = aiowp->work_tail1;
+	ASSERT(ostate == AIO_REQ_QUEUED || ostate == AIO_REQ_INPROGRESS);
+	while ((next = *last) != NULL) {
+		if (next == reqp) {
+			*last = next->req_next;
+			if (aiowp->work_next1 == next)
+				aiowp->work_next1 = next->req_next;
+
+			if ((next->req_next != NULL) ||
+			    (aiowp->work_done1 == 0)) {
+				if (aiowp->work_head1 == next)
+					aiowp->work_head1 = next->req_next;
+				if (aiowp->work_prev1 == next)
+					aiowp->work_prev1 = next->req_next;
+			} else {
+				if (aiowp->work_head1 == next)
+					aiowp->work_head1 = lastrp;
+				if (aiowp->work_prev1 == next)
+					aiowp->work_prev1 = lastrp;
+			}
+
+			if (ostate == AIO_REQ_QUEUED) {
+				ASSERT(aiowp->work_count1 >= 1);
+				aiowp->work_count1--;
+				ASSERT(aiowp->work_minload1 >= 1);
+				aiowp->work_minload1--;
+			} else {
+				ASSERT(ostate == AIO_REQ_INPROGRESS &&
+				    !POSIX_AIO(reqp));
+				aiowp->work_done1--;
+			}
+			return;
+		}
+		last = &next->req_next;
+		lastrp = next;
+	}
+	/* NOTREACHED */
+}
+
+static void
+_aio_enq_doneq(aio_req_t *reqp)
+{
+	if (_aio_doneq == NULL) {
+		_aio_doneq = reqp;
+		reqp->req_next = reqp->req_prev = reqp;
+	} else {
+		reqp->req_next = _aio_doneq;
+		reqp->req_prev = _aio_doneq->req_prev;
+		_aio_doneq->req_prev->req_next = reqp;
+		_aio_doneq->req_prev = reqp;
+	}
+	reqp->req_state = AIO_REQ_DONEQ;
+	_aio_doneq_cnt++;
+}
+
+/*
+ * caller owns the _aio_mutex
+ */
+aio_req_t *
+_aio_req_remove(aio_req_t *reqp)
+{
+	if (reqp && reqp->req_state != AIO_REQ_DONEQ)
+		return (NULL);
+
+	if (reqp) {
+		/* request in done queue */
+		if (_aio_doneq == reqp)
+			_aio_doneq = reqp->req_next;
+		if (_aio_doneq == reqp) {
+			/* only one request on queue */
+			_aio_doneq = NULL;
+		} else {
+			aio_req_t *tmp = reqp->req_next;
+			reqp->req_prev->req_next = tmp;
+			tmp->req_prev = reqp->req_prev;
+		}
+	} else if ((reqp = _aio_doneq) != NULL) {
+		if (reqp == reqp->req_next) {
+			/* only one request on queue */
+			_aio_doneq = NULL;
+		} else {
+			reqp->req_prev->req_next = _aio_doneq = reqp->req_next;
+			_aio_doneq->req_prev = reqp->req_prev;
+		}
+	}
+	if (reqp) {
+		_aio_doneq_cnt--;
+		reqp->req_next = reqp->req_prev = reqp;
+		reqp->req_state = AIO_REQ_DONE;
+	}
+	return (reqp);
+}
+
+/*
+ * An AIO request is identified by an aio_result_t pointer.  The library
+ * maps this aio_result_t pointer to its internal representation using a
+ * hash table.  This function adds an aio_result_t pointer to the hash table.
+ */
+static int
+_aio_hash_insert(aio_result_t *resultp, aio_req_t *reqp)
+{
+	aio_hash_t *hashp;
+	aio_req_t **prev;
+	aio_req_t *next;
+
+	hashp = _aio_hash + AIOHASH(resultp);
+	lmutex_lock(&hashp->hash_lock);
+	prev = &hashp->hash_ptr;
+	while ((next = *prev) != NULL) {
+		if (resultp == next->req_resultp) {
+			lmutex_unlock(&hashp->hash_lock);
+			return (-1);
+		}
+		prev = &next->req_link;
+	}
+	*prev = reqp;
+	ASSERT(reqp->req_link == NULL);
+	lmutex_unlock(&hashp->hash_lock);
+	return (0);
+}
+
+/*
+ * Remove an entry from the hash table.
+ */
+aio_req_t *
+_aio_hash_del(aio_result_t *resultp)
+{
+	aio_hash_t *hashp;
+	aio_req_t **prev;
+	aio_req_t *next = NULL;
+
+	if (_aio_hash != NULL) {
+		hashp = _aio_hash + AIOHASH(resultp);
+		lmutex_lock(&hashp->hash_lock);
+		prev = &hashp->hash_ptr;
+		while ((next = *prev) != NULL) {
+			if (resultp == next->req_resultp) {
+				*prev = next->req_link;
+				next->req_link = NULL;
+				break;
+			}
+			prev = &next->req_link;
+		}
+		lmutex_unlock(&hashp->hash_lock);
+	}
+	return (next);
+}
+
+/*
+ *  find an entry in the hash table
+ */
+aio_req_t *
+_aio_hash_find(aio_result_t *resultp)
+{
+	aio_hash_t *hashp;
+	aio_req_t **prev;
+	aio_req_t *next = NULL;
+
+	if (_aio_hash != NULL) {
+		hashp = _aio_hash + AIOHASH(resultp);
+		lmutex_lock(&hashp->hash_lock);
+		prev = &hashp->hash_ptr;
+		while ((next = *prev) != NULL) {
+			if (resultp == next->req_resultp)
+				break;
+			prev = &next->req_link;
+		}
+		lmutex_unlock(&hashp->hash_lock);
+	}
+	return (next);
+}
+
+/*
+ * AIO interface for POSIX
+ */
+int
+_aio_rw(aiocb_t *aiocbp, aio_lio_t *lio_head, aio_worker_t **nextworker,
+    int mode, int flg)
+{
+	aio_req_t *reqp;
+	aio_args_t *ap;
+	int kerr;
+
+	if (aiocbp == NULL) {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	/* initialize kaio */
+	if (!_kaio_ok)
+		_kaio_init();
+
+	aiocbp->aio_state = NOCHECK;
+
+	/*
+	 * If we have been called because a list I/O
+	 * kaio() failed, we dont want to repeat the
+	 * system call
+	 */
+
+	if (flg & AIO_KAIO) {
+		/*
+		 * Try kernel aio first.
+		 * If errno is ENOTSUP/EBADFD,
+		 * fall back to the thread implementation.
+		 */
+		if (_kaio_ok > 0 && KAIO_SUPPORTED(aiocbp->aio_fildes)) {
+			aiocbp->aio_resultp.aio_errno = EINPROGRESS;
+			aiocbp->aio_state = CHECK;
+			kerr = (int)_kaio(mode, aiocbp);
+			if (kerr == 0)
+				return (0);
+			if (errno != ENOTSUP && errno != EBADFD) {
+				aiocbp->aio_resultp.aio_errno = errno;
+				aiocbp->aio_resultp.aio_return = -1;
+				aiocbp->aio_state = NOCHECK;
+				return (-1);
+			}
+			if (errno == EBADFD)
+				SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
+		}
+	}
+
+	aiocbp->aio_resultp.aio_errno = EINPROGRESS;
+	aiocbp->aio_state = USERAIO;
+
+	if (!__uaio_ok && __uaio_init() == -1)
+		return (-1);
+
+	if ((reqp = _aio_req_alloc()) == NULL) {
+		errno = EAGAIN;
+		return (-1);
+	}
+
+	/*
+	 * If an LIO request, add the list head to the aio request
+	 */
+	reqp->req_head = lio_head;
+	reqp->req_type = AIO_POSIX_REQ;
+	reqp->req_op = mode;
+	reqp->req_largefile = 0;
+
+	if (aiocbp->aio_sigevent.sigev_notify == SIGEV_NONE) {
+		reqp->req_sigevent.sigev_notify = SIGEV_NONE;
+	} else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_SIGNAL) {
+		reqp->req_sigevent.sigev_notify = SIGEV_SIGNAL;
+		reqp->req_sigevent.sigev_signo =
+		    aiocbp->aio_sigevent.sigev_signo;
+		reqp->req_sigevent.sigev_value.sival_ptr =
+		    aiocbp->aio_sigevent.sigev_value.sival_ptr;
+	} else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_PORT) {
+		port_notify_t *pn = aiocbp->aio_sigevent.sigev_value.sival_ptr;
+		reqp->req_sigevent.sigev_notify = SIGEV_PORT;
+		/*
+		 * Reuse the sigevent structure to contain the port number
+		 * and the user value.  Same for SIGEV_THREAD, below.
+		 */
+		reqp->req_sigevent.sigev_signo =
+		    pn->portnfy_port;
+		reqp->req_sigevent.sigev_value.sival_ptr =
+		    pn->portnfy_user;
+	} else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_THREAD) {
+		reqp->req_sigevent.sigev_notify = SIGEV_THREAD;
+		/*
+		 * The sigevent structure contains the port number
+		 * and the user value.  Same for SIGEV_PORT, above.
+		 */
+		reqp->req_sigevent.sigev_signo =
+		    aiocbp->aio_sigevent.sigev_signo;
+		reqp->req_sigevent.sigev_value.sival_ptr =
+		    aiocbp->aio_sigevent.sigev_value.sival_ptr;
+	}
+
+	reqp->req_resultp = &aiocbp->aio_resultp;
+	reqp->req_aiocbp = aiocbp;
+	ap = &reqp->req_args;
+	ap->fd = aiocbp->aio_fildes;
+	ap->buf = (caddr_t)aiocbp->aio_buf;
+	ap->bufsz = aiocbp->aio_nbytes;
+	ap->offset = aiocbp->aio_offset;
+
+	if ((flg & AIO_NO_DUPS) &&
+	    _aio_hash_insert(&aiocbp->aio_resultp, reqp) != 0) {
+		aio_panic("_aio_rw(): request already in hash table");
+		_aio_req_free(reqp);
+		errno = EINVAL;
+		return (-1);
+	}
+	_aio_req_add(reqp, nextworker, mode);
+	return (0);
+}
+
+#if !defined(_LP64)
+/*
+ * 64-bit AIO interface for POSIX
+ */
+int
+_aio_rw64(aiocb64_t *aiocbp, aio_lio_t *lio_head, aio_worker_t **nextworker,
+    int mode, int flg)
+{
+	aio_req_t *reqp;
+	aio_args_t *ap;
+	int kerr;
+
+	if (aiocbp == NULL) {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	/* initialize kaio */
+	if (!_kaio_ok)
+		_kaio_init();
+
+	aiocbp->aio_state = NOCHECK;
+
+	/*
+	 * If we have been called because a list I/O
+	 * kaio() failed, we dont want to repeat the
+	 * system call
+	 */
+
+	if (flg & AIO_KAIO) {
+		/*
+		 * Try kernel aio first.
+		 * If errno is ENOTSUP/EBADFD,
+		 * fall back to the thread implementation.
+		 */
+		if (_kaio_ok > 0 && KAIO_SUPPORTED(aiocbp->aio_fildes)) {
+			aiocbp->aio_resultp.aio_errno = EINPROGRESS;
+			aiocbp->aio_state = CHECK;
+			kerr = (int)_kaio(mode, aiocbp);
+			if (kerr == 0)
+				return (0);
+			if (errno != ENOTSUP && errno != EBADFD) {
+				aiocbp->aio_resultp.aio_errno = errno;
+				aiocbp->aio_resultp.aio_return = -1;
+				aiocbp->aio_state = NOCHECK;
+				return (-1);
+			}
+			if (errno == EBADFD)
+				SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
+		}
+	}
+
+	aiocbp->aio_resultp.aio_errno = EINPROGRESS;
+	aiocbp->aio_state = USERAIO;
+
+	if (!__uaio_ok && __uaio_init() == -1)
+		return (-1);
+
+	if ((reqp = _aio_req_alloc()) == NULL) {
+		errno = EAGAIN;
+		return (-1);
+	}
+
+	/*
+	 * If an LIO request, add the list head to the aio request
+	 */
+	reqp->req_head = lio_head;
+	reqp->req_type = AIO_POSIX_REQ;
+	reqp->req_op = mode;
+	reqp->req_largefile = 1;
+
+	if (aiocbp->aio_sigevent.sigev_notify == SIGEV_NONE) {
+		reqp->req_sigevent.sigev_notify = SIGEV_NONE;
+	} else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_SIGNAL) {
+		reqp->req_sigevent.sigev_notify = SIGEV_SIGNAL;
+		reqp->req_sigevent.sigev_signo =
+		    aiocbp->aio_sigevent.sigev_signo;
+		reqp->req_sigevent.sigev_value.sival_ptr =
+		    aiocbp->aio_sigevent.sigev_value.sival_ptr;
+	} else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_PORT) {
+		port_notify_t *pn = aiocbp->aio_sigevent.sigev_value.sival_ptr;
+		reqp->req_sigevent.sigev_notify = SIGEV_PORT;
+		reqp->req_sigevent.sigev_signo =
+		    pn->portnfy_port;
+		reqp->req_sigevent.sigev_value.sival_ptr =
+		    pn->portnfy_user;
+	} else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_THREAD) {
+		reqp->req_sigevent.sigev_notify = SIGEV_THREAD;
+		reqp->req_sigevent.sigev_signo =
+		    aiocbp->aio_sigevent.sigev_signo;
+		reqp->req_sigevent.sigev_value.sival_ptr =
+		    aiocbp->aio_sigevent.sigev_value.sival_ptr;
+	}
+
+	reqp->req_resultp = &aiocbp->aio_resultp;
+	reqp->req_aiocbp = aiocbp;
+	ap = &reqp->req_args;
+	ap->fd = aiocbp->aio_fildes;
+	ap->buf = (caddr_t)aiocbp->aio_buf;
+	ap->bufsz = aiocbp->aio_nbytes;
+	ap->offset = aiocbp->aio_offset;
+
+	if ((flg & AIO_NO_DUPS) &&
+	    _aio_hash_insert(&aiocbp->aio_resultp, reqp) != 0) {
+		aio_panic("_aio_rw64(): request already in hash table");
+		_aio_req_free(reqp);
+		errno = EINVAL;
+		return (-1);
+	}
+	_aio_req_add(reqp, nextworker, mode);
+	return (0);
+}
+#endif	/* !defined(_LP64) */
diff --git a/usr/src/lib/libc/port/aio/aio_alloc.c b/usr/src/lib/libc/port/aio/aio_alloc.c
new file mode 100644
index 0000000000..db919872e4
--- /dev/null
+++ b/usr/src/lib/libc/port/aio/aio_alloc.c
@@ -0,0 +1,435 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "synonyms.h"
+#include "thr_uberdata.h"
+#include "asyncio.h"
+
+/*
+ * The aio subsystem memory allocation strategy:
+ *
+ * For each of the structure types we wish to allocate/free
+ * (aio_worker_t, aio_req_t, aio_lio_t), we use mmap() to allocate
+ * chunks of memory which are then subdivided into individual
+ * elements which are put into a free list from which allocations
+ * are made and to which frees are returned.
+ *
+ * Chunks start small (8 Kbytes) and get larger (size doubling)
+ * as more chunks are needed.  This keeps memory usage small for
+ * light use and fragmentation small for heavy use.
+ *
+ * Chunks are never unmapped except as an aftermath of fork()
+ * in the child process, when they are all unmapped (because
+ * all of the worker threads disappear in the child).
+ */
+
+#define	INITIAL_CHUNKSIZE	(8 * 1024)
+
+/*
+ * The header structure for each chunk.
+ * A pointer and a size_t ensures proper alignment for whatever follows.
+ */
+typedef struct chunk {
+	struct chunk	*chunk_next;	/* linked list */
+	size_t		chunk_size;	/* size of this chunk */
+} chunk_t;
+
+chunk_t *chunk_list = NULL;		/* list of all chunks */
+mutex_t chunk_lock = DEFAULTMUTEX;
+
+chunk_t *
+chunk_alloc(size_t size)
+{
+	chunk_t *chp = NULL;
+	void *ptr;
+
+	ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+	    MAP_PRIVATE | MAP_ANON, -1, (off_t)0);
+	if (ptr != MAP_FAILED) {
+		lmutex_lock(&chunk_lock);
+		chp = ptr;
+		chp->chunk_next = chunk_list;
+		chunk_list = chp;
+		chp->chunk_size = size;
+		lmutex_unlock(&chunk_lock);
+	}
+
+	return (chp);
+}
+
+aio_worker_t *worker_freelist = NULL;	/* free list of worker structures */
+aio_worker_t *worker_freelast = NULL;
+size_t worker_chunksize = 0;
+mutex_t worker_lock = DEFAULTMUTEX;
+
+/*
+ * Allocate a worker control block.
+ */
+aio_worker_t *
+_aio_worker_alloc(void)
+{
+	aio_worker_t *aiowp;
+	chunk_t *chp;
+	size_t chunksize;
+	int nelem;
+	int i;
+
+	lmutex_lock(&worker_lock);
+	if ((aiowp = worker_freelist) == NULL) {
+		if ((chunksize = 2 * worker_chunksize) == 0)
+			chunksize = INITIAL_CHUNKSIZE;
+		if ((chp = chunk_alloc(chunksize)) == NULL) {
+			lmutex_unlock(&worker_lock);
+			return (NULL);
+		}
+		worker_chunksize = chunksize;
+		worker_freelist = (aio_worker_t *)(uintptr_t)(chp + 1);
+		nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_worker_t);
+		for (i = 0, aiowp = worker_freelist; i < nelem; i++, aiowp++)
+			aiowp->work_forw = aiowp + 1;
+		worker_freelast = aiowp - 1;
+		worker_freelast->work_forw = NULL;
+		aiowp = worker_freelist;
+	}
+	if ((worker_freelist = aiowp->work_forw) == NULL)
+		worker_freelast = NULL;
+	lmutex_unlock(&worker_lock);
+
+	aiowp->work_forw = NULL;
+	(void) mutex_init(&aiowp->work_qlock1, USYNC_THREAD, NULL);
+	(void) cond_init(&aiowp->work_idle_cv, USYNC_THREAD, NULL);
+
+	return (aiowp);
+}
+
+/*
+ * Free a worker control block.
+ * Declared with void *arg so it can be a pthread_key_create() destructor.
+ */
+void
+_aio_worker_free(void *arg)
+{
+	aio_worker_t *aiowp = arg;
+
+	(void) mutex_destroy(&aiowp->work_qlock1);
+	(void) cond_destroy(&aiowp->work_idle_cv);
+	(void) memset(aiowp, 0, sizeof (*aiowp));
+
+	lmutex_lock(&worker_lock);
+	if (worker_freelast == NULL) {
+		worker_freelist = worker_freelast = aiowp;
+	} else {
+		worker_freelast->work_forw = aiowp;
+		worker_freelast = aiowp;
+	}
+	lmutex_unlock(&worker_lock);
+}
+
+aio_req_t *_aio_freelist = NULL;	/* free list of request structures */
+aio_req_t *_aio_freelast = NULL;
+size_t request_chunksize = 0;
+int _aio_freelist_cnt = 0;
+int _aio_allocated_cnt = 0;
+mutex_t __aio_cache_lock = DEFAULTMUTEX;
+
+/*
+ * Allocate an aio request structure.
+ */
+aio_req_t *
+_aio_req_alloc(void)
+{
+	aio_req_t *reqp;
+	chunk_t *chp;
+	size_t chunksize;
+	int nelem;
+	int i;
+
+	lmutex_lock(&__aio_cache_lock);
+	if ((reqp = _aio_freelist) == NULL) {
+		if ((chunksize = 2 * request_chunksize) == 0)
+			chunksize = INITIAL_CHUNKSIZE;
+		if ((chp = chunk_alloc(chunksize)) == NULL) {
+			lmutex_unlock(&__aio_cache_lock);
+			return (NULL);
+		}
+		request_chunksize = chunksize;
+		_aio_freelist = (aio_req_t *)(uintptr_t)(chp + 1);
+		nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_req_t);
+		for (i = 0, reqp = _aio_freelist; i < nelem; i++, reqp++) {
+			reqp->req_state = AIO_REQ_FREE;
+			reqp->req_link = reqp + 1;
+		}
+		_aio_freelast = reqp - 1;
+		_aio_freelast->req_link = NULL;
+		_aio_freelist_cnt = nelem;
+		reqp = _aio_freelist;
+	}
+	if ((_aio_freelist = reqp->req_link) == NULL)
+		_aio_freelast = NULL;
+	_aio_freelist_cnt--;
+	_aio_allocated_cnt++;
+	lmutex_unlock(&__aio_cache_lock);
+
+	ASSERT(reqp->req_state == AIO_REQ_FREE);
+	reqp->req_state = 0;
+	reqp->req_link = NULL;
+	reqp->req_sigevent.sigev_notify = SIGEV_NONE;
+
+	return (reqp);
+}
+
+/*
+ * Free an aio request structure.
+ */
+void
+_aio_req_free(aio_req_t *reqp)
+{
+	ASSERT(reqp->req_state != AIO_REQ_FREE &&
+	    reqp->req_state != AIO_REQ_DONEQ);
+	(void) memset(reqp, 0, sizeof (*reqp));
+	reqp->req_state = AIO_REQ_FREE;
+
+	lmutex_lock(&__aio_cache_lock);
+	if (_aio_freelast == NULL) {
+		_aio_freelist = _aio_freelast = reqp;
+	} else {
+		_aio_freelast->req_link = reqp;
+		_aio_freelast = reqp;
+	}
+	_aio_freelist_cnt++;
+	_aio_allocated_cnt--;
+	lmutex_unlock(&__aio_cache_lock);
+}
+
+aio_lio_t *_lio_head_freelist = NULL;	/* free list of lio head structures */
+aio_lio_t *_lio_head_freelast = NULL;
+size_t lio_head_chunksize = 0;
+int _lio_alloc = 0;
+int _lio_free = 0;
+mutex_t __lio_mutex = DEFAULTMUTEX;
+
+/*
+ * Allocate a listio head structure.
+ */
+aio_lio_t *
+_aio_lio_alloc(void)
+{
+	aio_lio_t *head;
+	chunk_t *chp;
+	size_t chunksize;
+	int nelem;
+	int i;
+
+	lmutex_lock(&__lio_mutex);
+	if ((head = _lio_head_freelist) == NULL) {
+		if ((chunksize = 2 * lio_head_chunksize) == 0)
+			chunksize = INITIAL_CHUNKSIZE;
+		if ((chp = chunk_alloc(chunksize)) == NULL) {
+			lmutex_unlock(&__lio_mutex);
+			return (NULL);
+		}
+		lio_head_chunksize = chunksize;
+		_lio_head_freelist = (aio_lio_t *)(uintptr_t)(chp + 1);
+		nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_lio_t);
+		for (i = 0, head = _lio_head_freelist; i < nelem; i++, head++)
+			head->lio_next = head + 1;
+		_lio_head_freelast = head - 1;
+		_lio_head_freelast->lio_next = NULL;
+		_lio_alloc += nelem;
+		_lio_free = nelem;
+		head = _lio_head_freelist;
+	}
+	if ((_lio_head_freelist = head->lio_next) == NULL)
+		_lio_head_freelast = NULL;
+	_lio_free--;
+	lmutex_unlock(&__lio_mutex);
+
+	ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
+	head->lio_next = NULL;
+	head->lio_port = -1;
+	(void) mutex_init(&head->lio_mutex, USYNC_THREAD, NULL);
+	(void) cond_init(&head->lio_cond_cv, USYNC_THREAD, NULL);
+
+	return (head);
+}
+
+/*
+ * Free a listio head structure.
+ */
+void
+_aio_lio_free(aio_lio_t *head)
+{
+	ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
+	(void) mutex_destroy(&head->lio_mutex);
+	(void) cond_destroy(&head->lio_cond_cv);
+	(void) memset(head, 0, sizeof (*head));
+
+	lmutex_lock(&__lio_mutex);
+	if (_lio_head_freelast == NULL) {
+		_lio_head_freelist = _lio_head_freelast = head;
+	} else {
+		_lio_head_freelast->lio_next = head;
+		_lio_head_freelast = head;
+	}
+	_lio_free++;
+	lmutex_unlock(&__lio_mutex);
+}
+
+void
+postfork1_child_aio(void)
+{
+	chunk_t *chp;
+
+	/*
+	 * All of the workers are gone; free their structures.
+	 */
+	if (_kaio_supported != NULL) {
+		(void) munmap((void *)_kaio_supported,
+		    MAX_KAIO_FDARRAY_SIZE * sizeof (uint32_t));
+		_kaio_supported = NULL;
+	}
+	if (_aio_hash != NULL) {
+		(void) munmap((void *)_aio_hash, HASHSZ * sizeof (aio_hash_t));
+		_aio_hash = NULL;
+	}
+	for (chp = chunk_list; chp != NULL; chp = chunk_list) {
+		chunk_list = chp->chunk_next;
+		(void) munmap((void *)chp, chp->chunk_size);
+	}
+
+	/*
+	 * Reinitialize global variables
+	 */
+
+	worker_freelist = NULL;
+	worker_freelast = NULL;
+	worker_chunksize = 0;
+	(void) mutex_init(&worker_lock, USYNC_THREAD, NULL);
+
+	_aio_freelist = NULL;
+	_aio_freelast = NULL;
+	request_chunksize = 0;
+	_aio_freelist_cnt = 0;
+	_aio_allocated_cnt = 0;
+	(void) mutex_init(&__aio_cache_lock, USYNC_THREAD, NULL);
+
+	_lio_head_freelist = NULL;
+	_lio_head_freelast = NULL;
+	lio_head_chunksize = 0;
+	_lio_alloc = 0;
+	_lio_free = 0;
+	(void) mutex_init(&__lio_mutex, USYNC_THREAD, NULL);
+
+	(void) mutex_init(&__aio_initlock, USYNC_THREAD, NULL);
+	(void) cond_init(&__aio_initcv, USYNC_THREAD, NULL);
+	__aio_initbusy = 0;
+
+	(void) mutex_init(&__aio_mutex, USYNC_THREAD, NULL);
+	(void) cond_init(&_aio_iowait_cv, USYNC_THREAD, NULL);
+	(void) cond_init(&_aio_waitn_cv, USYNC_THREAD, NULL);
+
+	_kaio_ok = 0;
+	__uaio_ok = 0;
+
+	_kaiowp = NULL;
+
+	__workers_rw = NULL;
+	__nextworker_rw = NULL;
+	__rw_workerscnt = 0;
+
+	__workers_no = NULL;
+	__nextworker_no = NULL;
+	__no_workerscnt = 0;
+
+	_aio_worker_cnt = 0;
+
+	_aio_done_head = NULL;
+	_aio_done_tail = NULL;
+	_aio_donecnt = 0;
+
+	_aio_doneq = NULL;
+	_aio_doneq_cnt = 0;
+
+	_aio_waitncnt = 0;
+	_aio_outstand_cnt = 0;
+	_kaio_outstand_cnt = 0;
+	_aio_req_done_cnt = 0;
+	_aio_kernel_suspend = 0;
+	_aio_suscv_cnt = 0;
+
+	_aiowait_flag = 0;
+	_aio_flags = 0;
+}
+
+#define	DISPLAY(var)	\
+	(void) fprintf(stderr, #var "\t= %d\n", var)
+
+static void
+_aio_exit_info(void)
+{
+	if ((_kaio_ok | __uaio_ok) == 0)
+		return;
+	(void) fprintf(stderr, "\n");
+	DISPLAY(_aio_freelist_cnt);
+	DISPLAY(_aio_allocated_cnt);
+	DISPLAY(_lio_alloc);
+	DISPLAY(_lio_free);
+	DISPLAY(__rw_workerscnt);
+	DISPLAY(__no_workerscnt);
+	DISPLAY(_aio_worker_cnt);
+	DISPLAY(_aio_donecnt);
+	DISPLAY(_aio_doneq_cnt);
+	DISPLAY(_aio_waitncnt);
+	DISPLAY(_aio_outstand_cnt);
+	DISPLAY(_kaio_outstand_cnt);
+	DISPLAY(_aio_req_done_cnt);
+	DISPLAY(_aio_kernel_suspend);
+	DISPLAY(_aio_suscv_cnt);
+	DISPLAY(_aiowait_flag);
+	DISPLAY(_aio_flags);
+}
+
+void
+init_aio(void)
+{
+	char *str;
+
+	(void) pthread_key_create(&_aio_key, _aio_worker_free);
+	if ((str = getenv("_AIO_MIN_WORKERS")) != NULL) {
+		if ((_min_workers = atoi(str)) <= 0)
+			_min_workers = 4;
+	}
+	if ((str = getenv("_AIO_MAX_WORKERS")) != NULL) {
+		if ((_max_workers = atoi(str)) <= 0)
+			_max_workers = 256;
+		if (_max_workers < _min_workers + 1)
+			_max_workers = _min_workers + 1;
+	}
+	if ((str = getenv("_AIO_EXIT_INFO")) != NULL && atoi(str) != 0)
+		(void) atexit(_aio_exit_info);
+}
diff --git a/usr/src/lib/libc/port/aio/posix_aio.c b/usr/src/lib/libc/port/aio/posix_aio.c
new file mode 100644
index 0000000000..5e3c3ac41d
--- /dev/null
+++ b/usr/src/lib/libc/port/aio/posix_aio.c
@@ -0,0 +1,1758 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * posix_aio.c implements the POSIX async. I/O functions.
+ *
+ *	aio_read
+ *	aio_write
+ *	aio_error
+ *	aio_return
+ *	aio_suspend
+ *	lio_listio
+ *	aio_fsync
+ *	aio_cancel
+ */
+
+#include "synonyms.h"
+#include "thr_uberdata.h"
+#include "asyncio.h"
+#include <atomic.h>
+#include <sys/file.h>
+#include <sys/port.h>
+
+extern int __fdsync(int, int);
+
+cond_t	_aio_waitn_cv = DEFAULTCV;	/* wait for end of aio_waitn */
+
+static int _aio_check_timeout(const timespec_t *, timespec_t *, int *);
+
+/* defines for timedwait in __aio_waitn()  and __aio_suspend() */
+#define	AIO_TIMEOUT_INDEF	-1
+#define	AIO_TIMEOUT_POLL	0
+#define	AIO_TIMEOUT_WAIT	1
+#define	AIO_TIMEOUT_UNDEF	2
+
+/*
+ * List I/O stuff
+ */
+static void _lio_list_decr(aio_lio_t *);
+static long aio_list_max = 0;
+
+int
+aio_read(aiocb_t *aiocbp)
+{
+	if (aiocbp == NULL || aiocbp->aio_reqprio < 0) {
+		errno = EINVAL;
+		return (-1);
+	}
+	if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
+		errno = EBUSY;
+		return (-1);
+	}
+	if (_aio_sigev_thread(aiocbp) != 0)
+		return (-1);
+	aiocbp->aio_lio_opcode = LIO_READ;
+	return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAREAD,
+	    (AIO_KAIO | AIO_NO_DUPS)));
+}
+
+int
+aio_write(aiocb_t *aiocbp)
+{
+	if (aiocbp == NULL || aiocbp->aio_reqprio < 0) {
+		errno = EINVAL;
+		return (-1);
+	}
+	if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
+		errno = EBUSY;
+		return (-1);
+	}
+	if (_aio_sigev_thread(aiocbp) != 0)
+		return (-1);
+	aiocbp->aio_lio_opcode = LIO_WRITE;
+	return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAWRITE,
+	    (AIO_KAIO | AIO_NO_DUPS)));
+}
+
+/*
+ * __lio_listio() cancellation handler.
+ */
+/* ARGSUSED */
+static void
+_lio_listio_cleanup(aio_lio_t *head)
+{
+	int freeit = 0;
+
+	ASSERT(MUTEX_HELD(&head->lio_mutex));
+	if (head->lio_refcnt == 0) {
+		ASSERT(head->lio_nent == 0);
+		freeit = 1;
+	}
+	head->lio_waiting = 0;
+	sig_mutex_unlock(&head->lio_mutex);
+	if (freeit)
+		_aio_lio_free(head);
+}
+
+int
+lio_listio(int mode, aiocb_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list,
+	int nent, struct sigevent *_RESTRICT_KYWD sigevp)
+{
+	int 		aio_ufs = 0;
+	int 		oerrno = 0;
+	aio_lio_t	*head = NULL;
+	aiocb_t		*aiocbp;
+	int		state = 0;
+	int 		EIOflg = 0;
+	int 		rw;
+	int		do_kaio = 0;
+	int 		error;
+	int 		i;
+
+	if (!_kaio_ok)
+		_kaio_init();
+
+	if (aio_list_max == 0)
+		aio_list_max = sysconf(_SC_AIO_LISTIO_MAX);
+
+	if (nent <= 0 || nent > aio_list_max) {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	switch (mode) {
+	case LIO_WAIT:
+		state = NOCHECK;
+		break;
+	case LIO_NOWAIT:
+		state = CHECK;
+		break;
+	default:
+		errno = EINVAL;
+		return (-1);
+	}
+
+	for (i = 0; i < nent; i++) {
+		if ((aiocbp = list[i]) == NULL)
+			continue;
+		if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
+			errno = EBUSY;
+			return (-1);
+		}
+		if (_aio_sigev_thread(aiocbp) != 0)
+			return (-1);
+		if (aiocbp->aio_lio_opcode == LIO_NOP)
+			aiocbp->aio_state = NOCHECK;
+		else {
+			aiocbp->aio_state = state;
+			if (KAIO_SUPPORTED(aiocbp->aio_fildes))
+				do_kaio++;
+			else
+				aiocbp->aio_resultp.aio_errno = ENOTSUP;
+		}
+	}
+	if (_aio_sigev_thread_init(sigevp) != 0)
+		return (-1);
+
+	if (do_kaio) {
+		error = (int)_kaio(AIOLIO, mode, list, nent, sigevp);
+		if (error == 0)
+			return (0);
+		oerrno = errno;
+	} else {
+		oerrno = errno = ENOTSUP;
+		error = -1;
+	}
+
+	if (error == -1 && errno == ENOTSUP) {
+		error = errno = 0;
+		/*
+		 * If LIO_WAIT, or notification required, allocate a list head.
+		 */
+		if (mode == LIO_WAIT ||
+		    (sigevp != NULL &&
+		    (sigevp->sigev_notify == SIGEV_SIGNAL ||
+		    sigevp->sigev_notify == SIGEV_THREAD ||
+		    sigevp->sigev_notify == SIGEV_PORT)))
+			head = _aio_lio_alloc();
+		if (head) {
+			sig_mutex_lock(&head->lio_mutex);
+			head->lio_mode = mode;
+			head->lio_largefile = 0;
+			if (mode == LIO_NOWAIT && sigevp != NULL) {
+				if (sigevp->sigev_notify == SIGEV_THREAD) {
+					head->lio_port = sigevp->sigev_signo;
+					head->lio_event = AIOLIO;
+					head->lio_sigevent = sigevp;
+					head->lio_sigval.sival_ptr =
+					    sigevp->sigev_value.sival_ptr;
+				} else if (sigevp->sigev_notify == SIGEV_PORT) {
+					port_notify_t *pn =
+					    sigevp->sigev_value.sival_ptr;
+					head->lio_port = pn->portnfy_port;
+					head->lio_event = AIOLIO;
+					head->lio_sigevent = sigevp;
+					head->lio_sigval.sival_ptr =
+					    pn->portnfy_user;
+				} else {	/* SIGEV_SIGNAL */
+					head->lio_signo = sigevp->sigev_signo;
+					head->lio_sigval.sival_ptr =
+					    sigevp->sigev_value.sival_ptr;
+				}
+			}
+			head->lio_nent = head->lio_refcnt = nent;
+			sig_mutex_unlock(&head->lio_mutex);
+		}
+		/*
+		 * find UFS requests, errno == ENOTSUP/EBADFD,
+		 */
+		for (i = 0; i < nent; i++) {
+			if ((aiocbp = list[i]) == NULL ||
+			    aiocbp->aio_lio_opcode == LIO_NOP ||
+			    (aiocbp->aio_resultp.aio_errno != ENOTSUP &&
+			    aiocbp->aio_resultp.aio_errno != EBADFD)) {
+				if (head)
+					_lio_list_decr(head);
+				continue;
+			}
+			if (aiocbp->aio_resultp.aio_errno == EBADFD)
+				SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
+			if (aiocbp->aio_reqprio < 0) {
+				aiocbp->aio_resultp.aio_errno = EINVAL;
+				aiocbp->aio_resultp.aio_return = -1;
+				EIOflg = 1;
+				if (head)
+					_lio_list_decr(head);
+				continue;
+			}
+			/*
+			 * submit an AIO request with flags AIO_NO_KAIO
+			 * to avoid the kaio() syscall in _aio_rw()
+			 */
+			switch (aiocbp->aio_lio_opcode) {
+			case LIO_READ:
+				rw = AIOAREAD;
+				break;
+			case LIO_WRITE:
+				rw = AIOAWRITE;
+				break;
+			}
+			error = _aio_rw(aiocbp, head, &__nextworker_rw, rw,
+			    (AIO_NO_KAIO | AIO_NO_DUPS));
+			if (error == 0)
+				aio_ufs++;
+			else {
+				if (head)
+					_lio_list_decr(head);
+				aiocbp->aio_resultp.aio_errno = error;
+				EIOflg = 1;
+			}
+		}
+	}
+	if (EIOflg) {
+		errno = EIO;
+		return (-1);
+	}
+	if (mode == LIO_WAIT && oerrno == ENOTSUP) {
+		/*
+		 * call kaio(AIOLIOWAIT) to get all outstanding
+		 * kernel AIO requests
+		 */
+		if ((nent - aio_ufs) > 0)
+			(void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp);
+		if (head != NULL && head->lio_nent > 0) {
+			sig_mutex_lock(&head->lio_mutex);
+			while (head->lio_refcnt > 0) {
+				int err;
+				head->lio_waiting = 1;
+				pthread_cleanup_push(_lio_listio_cleanup, head);
+				err = sig_cond_wait(&head->lio_cond_cv,
+				    &head->lio_mutex);
+				pthread_cleanup_pop(0);
+				head->lio_waiting = 0;
+				if (err && head->lio_nent > 0) {
+					sig_mutex_unlock(&head->lio_mutex);
+					errno = err;
+					return (-1);
+				}
+			}
+			sig_mutex_unlock(&head->lio_mutex);
+			ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
+			_aio_lio_free(head);
+			for (i = 0; i < nent; i++) {
+				if ((aiocbp = list[i]) != NULL &&
+				    aiocbp->aio_resultp.aio_errno) {
+					errno = EIO;
+					return (-1);
+				}
+			}
+		}
+		return (0);
+	}
+	return (error);
+}
+
+static void
+_lio_list_decr(aio_lio_t *head)
+{
+	sig_mutex_lock(&head->lio_mutex);
+	head->lio_nent--;
+	head->lio_refcnt--;
+	sig_mutex_unlock(&head->lio_mutex);
+}
+
+/*
+ * __aio_suspend() cancellation handler.
+ */
+/* ARGSUSED */
+static void
+_aio_suspend_cleanup(int *counter)
+{
+	ASSERT(MUTEX_HELD(&__aio_mutex));
+	(*counter)--;		/* _aio_kernel_suspend or _aio_suscv_cnt */
+	sig_mutex_unlock(&__aio_mutex);
+}
+
+static int
+__aio_suspend(void **list, int nent, const timespec_t *timo, int largefile)
+{
+	int		cv_err;	/* error code from cond_xxx() */
+	int		kerr;	/* error code from _kaio(AIOSUSPEND) */
+	int		i;
+	timespec_t	twait;	/* copy of timo for internal calculations */
+	timespec_t	*wait = NULL;
+	int		timedwait;
+	int		req_outstanding;
+	aiocb_t		**listp;
+	aiocb_t		*aiocbp;
+#if !defined(_LP64)
+	aiocb64_t	**listp64;
+	aiocb64_t	*aiocbp64;
+#endif
+	hrtime_t	hrtstart;
+	hrtime_t	hrtend;
+	hrtime_t	hrtres;
+
+#if defined(_LP64)
+	if (largefile)
+		aio_panic("__aio_suspend: largefile set when _LP64 defined");
+#endif
+
+	if (nent <= 0) {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	if (timo) {
+		if (timo->tv_sec < 0 || timo->tv_nsec < 0 ||
+		    timo->tv_nsec >= NANOSEC) {
+			errno = EINVAL;
+			return (-1);
+		}
+		/* Initialize start time if time monitoring desired */
+		if (timo->tv_sec > 0 || timo->tv_nsec > 0) {
+			timedwait = AIO_TIMEOUT_WAIT;
+			hrtstart = gethrtime();
+		} else {
+			/* content of timeout = 0 : polling */
+			timedwait = AIO_TIMEOUT_POLL;
+		}
+	} else {
+		/* timeout pointer = NULL : wait indefinitely */
+		timedwait = AIO_TIMEOUT_INDEF;
+	}
+
+#if !defined(_LP64)
+	if (largefile) {
+		listp64 = (aiocb64_t **)list;
+		for (i = 0; i < nent; i++) {
+			if ((aiocbp64 = listp64[i]) != NULL &&
+			    aiocbp64->aio_state == CHECK)
+				aiocbp64->aio_state = CHECKED;
+		}
+	} else
+#endif	/* !_LP64 */
+	{
+		listp = (aiocb_t **)list;
+		for (i = 0; i < nent; i++) {
+			if ((aiocbp = listp[i]) != NULL &&
+			    aiocbp->aio_state == CHECK)
+				aiocbp->aio_state = CHECKED;
+		}
+	}
+
+	sig_mutex_lock(&__aio_mutex);
+
+	/*
+	 * The next "if -case" is required to accelerate the
+	 * access to completed RAW-IO requests.
+	 */
+	if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
+		/* Only kernel requests pending */
+
+		/*
+		 * _aio_kernel_suspend is used to detect completed non RAW-IO
+		 * requests.
+		 * As long as this thread resides in the kernel (_kaio) further
+		 * asynchronous non RAW-IO requests could be submitted.
+		 */
+		_aio_kernel_suspend++;
+
+		/*
+		 * Always do the kaio() call without using the KAIO_SUPPORTED()
+		 * checks because it is not mandatory to have a valid fd
+		 * set in the list entries, only the resultp must be set.
+		 *
+		 * _kaio(AIOSUSPEND ...) return values :
+		 *  0:  everythink ok, completed request found
+		 * -1:  error
+		 *  1:  no error : _aiodone awaked the _kaio(AIOSUSPEND,,)
+		 *	system call using  _kaio(AIONOTIFY). It means, that some
+		 *	non RAW-IOs completed inbetween.
+		 */
+
+		pthread_cleanup_push(_aio_suspend_cleanup,
+		    &_aio_kernel_suspend);
+		pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
+		sig_mutex_unlock(&__aio_mutex);
+		_cancel_prologue();
+		kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND,
+		    list, nent, timo, -1);
+		_cancel_epilogue();
+		pthread_cleanup_pop(1);	/* sig_mutex_lock(&__aio_mutex) */
+		pthread_cleanup_pop(0);
+
+		_aio_kernel_suspend--;
+
+		if (!kerr) {
+			sig_mutex_unlock(&__aio_mutex);
+			return (0);
+		}
+	} else {
+		kerr = 1;	/* simulation: _kaio detected AIONOTIFY */
+	}
+
+	/*
+	 * Return kernel error code if no other IOs are outstanding.
+	 */
+	req_outstanding = _aio_doneq_cnt + _aio_outstand_cnt;
+
+	sig_mutex_unlock(&__aio_mutex);
+
+	if (req_outstanding == 0) {
+		/* no IOs outstanding in the thread pool */
+		if (kerr == 1)
+			/* return "no IOs completed" */
+			errno = EAGAIN;
+		return (-1);
+	}
+
+	/*
+	 * IOs using the thread pool are outstanding.
+	 */
+	if (timedwait == AIO_TIMEOUT_WAIT) {
+		/* time monitoring */
+		hrtend = hrtstart + (hrtime_t)timo->tv_sec * (hrtime_t)NANOSEC +
+		    (hrtime_t)timo->tv_nsec;
+		hrtres = hrtend - gethrtime();
+		if (hrtres <= 0)
+			hrtres = 1;
+		twait.tv_sec = hrtres / (hrtime_t)NANOSEC;
+		twait.tv_nsec = hrtres % (hrtime_t)NANOSEC;
+		wait = &twait;
+	} else if (timedwait == AIO_TIMEOUT_POLL) {
+		twait = *timo;	/* content of timo = 0 : polling */
+		wait = &twait;
+	}
+
+	for (;;) {
+		int	error;
+		int	inprogress;
+
+		/* first scan file system requests */
+		inprogress = 0;
+		for (i = 0; i < nent; i++) {
+#if !defined(_LP64)
+			if (largefile) {
+				if ((aiocbp64 = listp64[i]) == NULL)
+					continue;
+				error = aiocbp64->aio_resultp.aio_errno;
+			} else
+#endif
+			{
+				if ((aiocbp = listp[i]) == NULL)
+					continue;
+				error = aiocbp->aio_resultp.aio_errno;
+			}
+			if (error == EINPROGRESS)
+				inprogress = 1;
+			else if (error != ECANCELED) {
+				errno = 0;
+				return (0);
+			}
+		}
+
+		sig_mutex_lock(&__aio_mutex);
+
+		/*
+		 * If there aren't outstanding I/Os in the thread pool then
+		 * we have to return here, provided that all kernel RAW-IOs
+		 * also completed.
+		 * If the kernel was notified to return, then we have to check
+		 * possible pending RAW-IOs.
+		 */
+		if (_aio_outstand_cnt == 0 && inprogress == 0 && kerr != 1) {
+			sig_mutex_unlock(&__aio_mutex);
+			errno = EAGAIN;
+			break;
+		}
+
+		/*
+		 * There are outstanding IOs in the thread pool or the kernel
+		 * was notified to return.
+		 * Check pending RAW-IOs first.
+		 */
+		if (kerr == 1) {
+			/*
+			 * _aiodone just notified the kernel about
+			 * completed non RAW-IOs (AIONOTIFY was detected).
+			 */
+			if (timedwait == AIO_TIMEOUT_WAIT) {
+				/* Update remaining timeout for the kernel */
+				hrtres = hrtend - gethrtime();
+				if (hrtres <= 0) {
+					/* timer expired */
+					sig_mutex_unlock(&__aio_mutex);
+					errno = EAGAIN;
+					break;
+				}
+				wait->tv_sec = hrtres / (hrtime_t)NANOSEC;
+				wait->tv_nsec = hrtres % (hrtime_t)NANOSEC;
+			}
+			_aio_kernel_suspend++;
+
+			pthread_cleanup_push(_aio_suspend_cleanup,
+			    &_aio_kernel_suspend);
+			pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
+			sig_mutex_unlock(&__aio_mutex);
+			_cancel_prologue();
+			kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND,
+			    list, nent, wait, -1);
+			_cancel_epilogue();
+			pthread_cleanup_pop(1);
+			pthread_cleanup_pop(0);
+
+			_aio_kernel_suspend--;
+
+			if (!kerr) {
+				sig_mutex_unlock(&__aio_mutex);
+				return (0);
+			}
+		}
+
+		if (timedwait == AIO_TIMEOUT_POLL) {
+			sig_mutex_unlock(&__aio_mutex);
+			errno = EAGAIN;
+			break;
+		}
+
+		if (timedwait == AIO_TIMEOUT_WAIT) {
+			/* Update remaining timeout */
+			hrtres = hrtend - gethrtime();
+			if (hrtres <= 0) {
+				/* timer expired */
+				sig_mutex_unlock(&__aio_mutex);
+				errno = EAGAIN;
+				break;
+			}
+			wait->tv_sec = hrtres / (hrtime_t)NANOSEC;
+			wait->tv_nsec = hrtres % (hrtime_t)NANOSEC;
+		}
+
+		if (_aio_outstand_cnt == 0) {
+			sig_mutex_unlock(&__aio_mutex);
+			continue;
+		}
+
+		_aio_suscv_cnt++;	/* ID for _aiodone (wake up) */
+
+		pthread_cleanup_push(_aio_suspend_cleanup, &_aio_suscv_cnt);
+		if (timedwait == AIO_TIMEOUT_WAIT) {
+			cv_err = sig_cond_reltimedwait(&_aio_iowait_cv,
+			    &__aio_mutex, wait);
+			if (cv_err == ETIME)
+				cv_err = EAGAIN;
+		} else {
+			/* wait indefinitely */
+			cv_err = sig_cond_wait(&_aio_iowait_cv, &__aio_mutex);
+		}
+		/* this decrements _aio_suscv_cnt and drops __aio_mutex */
+		pthread_cleanup_pop(1);
+
+		if (cv_err) {
+			errno = cv_err;
+			break;
+		}
+	}
+	return (-1);
+}
+
+int
+aio_suspend(const aiocb_t * const list[], int nent,
+    const timespec_t *timeout)
+{
+	return (__aio_suspend((void **)list, nent, timeout, 0));
+}
+
+int
+aio_error(const aiocb_t *aiocbp)
+{
+	const aio_result_t *resultp = &aiocbp->aio_resultp;
+	int error;
+
+	if ((error = resultp->aio_errno) == EINPROGRESS) {
+		if (aiocbp->aio_state == CHECK) {
+			/*
+			 * Always do the kaio() call without using the
+			 * KAIO_SUPPORTED() checks because it is not
+			 * mandatory to have a valid fd set in the
+			 * aiocb, only the resultp must be set.
+			 */
+			if ((int)_kaio(AIOERROR, aiocbp) == EINVAL) {
+				errno = EINVAL;
+				return (-1);
+			}
+			error = resultp->aio_errno;
+		} else if (aiocbp->aio_state == CHECKED) {
+			((aiocb_t *)aiocbp)->aio_state = CHECK;
+		}
+	}
+	return (error);
+}
+
+ssize_t
+aio_return(aiocb_t *aiocbp)
+{
+	aio_result_t *resultp = &aiocbp->aio_resultp;
+	aio_req_t *reqp;
+	int error;
+	ssize_t retval;
+
+	/*
+	 * The _aiodone() function stores resultp->aio_return before
+	 * storing resultp->aio_errno (with an membar_producer() in
+	 * between).  We use membar_consumer() below to ensure proper
+	 * memory ordering between _aiodone() and ourself.
+	 */
+	error = resultp->aio_errno;
+	membar_consumer();
+	retval = resultp->aio_return;
+
+	/*
+	 * we use this condition to indicate either that
+	 * aio_return() has been called before or should
+	 * not have been called yet.
+	 */
+	if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) {
+		errno = error;
+		return (-1);
+	}
+
+	/*
+	 * Before we return, mark the result as being returned so that later
+	 * calls to aio_return() will return the fact that the result has
+	 * already been returned.
+	 */
+	sig_mutex_lock(&__aio_mutex);
+	/* retest, in case more than one thread actually got in here */
+	if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) {
+		sig_mutex_unlock(&__aio_mutex);
+		errno = EINVAL;
+		return (-1);
+	}
+	resultp->aio_return = -1;
+	resultp->aio_errno = EINVAL;
+	if ((reqp = _aio_hash_del(resultp)) == NULL)
+		sig_mutex_unlock(&__aio_mutex);
+	else {
+		aiocbp->aio_state = NOCHECK;
+		ASSERT(reqp->req_head == NULL);
+		(void) _aio_req_remove(reqp);
+		sig_mutex_unlock(&__aio_mutex);
+		_aio_req_free(reqp);
+	}
+
+	if (retval == -1)
+		errno = error;
+	return (retval);
+}
+
+void
+_lio_remove(aio_req_t *reqp)
+{
+	aio_lio_t *head;
+	int refcnt;
+
+	if ((head = reqp->req_head) != NULL) {
+		sig_mutex_lock(&head->lio_mutex);
+		ASSERT(head->lio_refcnt == head->lio_nent);
+		refcnt = --head->lio_nent;
+		head->lio_refcnt--;
+		sig_mutex_unlock(&head->lio_mutex);
+		if (refcnt == 0)
+			_aio_lio_free(head);
+		reqp->req_head = NULL;
+	}
+}
+
+/*
+ * This function returns the number of asynchronous I/O requests submitted.
+ */
+static int
+__aio_fsync_bar(aiocb_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp,
+    int workerscnt)
+{
+	int i;
+	int error;
+	aio_worker_t *next = aiowp;
+
+	for (i = 0; i < workerscnt; i++) {
+		error = _aio_rw(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO);
+		if (error != 0) {
+			sig_mutex_lock(&head->lio_mutex);
+			head->lio_mode = LIO_DESTROY;	/* ignore fsync */
+			head->lio_nent -= workerscnt - i;
+			head->lio_refcnt -= workerscnt - i;
+			sig_mutex_unlock(&head->lio_mutex);
+			errno = EAGAIN;
+			return (i);
+		}
+		next = next->work_forw;
+	}
+	return (i);
+}
+
+int
+aio_fsync(int op, aiocb_t *aiocbp)
+{
+	aio_lio_t *head;
+	struct stat statb;
+	int fret;
+
+	if (aiocbp == NULL)
+		return (0);
+	if (aiocbp->aio_reqprio < 0 || (op != O_DSYNC && op != O_SYNC)) {
+		errno = EINVAL;
+		return (-1);
+	}
+	if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
+		errno = EBUSY;
+		return (-1);
+	}
+	if (fstat(aiocbp->aio_fildes, &statb) < 0)
+		return (-1);
+	if (_aio_sigev_thread(aiocbp) != 0)
+		return (-1);
+
+	/*
+	 * Kernel aio_fsync() is not supported.
+	 * We force user-level aio_fsync() just
+	 * for the notification side-effect.
+	 */
+	if (!__uaio_ok && __uaio_init() == -1)
+		return (-1);
+
+	/*
+	 * The first asynchronous I/O request in the current process will
+	 * create a bunch of workers (via __uaio_init()).  If the number
+	 * of workers is zero then the number of pending asynchronous I/O
+	 * requests is zero.  In such a case only execute the standard
+	 * fsync(3C) or fdatasync(3RT) as appropriate.
+	 */
+	if (__rw_workerscnt == 0) {
+		if (op == O_DSYNC)
+			return (__fdsync(aiocbp->aio_fildes, FDSYNC));
+		else
+			return (__fdsync(aiocbp->aio_fildes, FSYNC));
+	}
+
+	/*
+	 * re-use aio_offset as the op field.
+	 * 	O_DSYNC - fdatasync()
+	 * 	O_SYNC - fsync()
+	 */
+	aiocbp->aio_offset = op;
+	aiocbp->aio_lio_opcode = AIOFSYNC;
+
+	/*
+	 * Create a list of fsync requests.  The worker that
+	 * gets the last request will do the fsync request.
+	 */
+	head = _aio_lio_alloc();
+	if (head == NULL) {
+		errno = EAGAIN;
+		return (-1);
+	}
+	head->lio_mode = LIO_FSYNC;
+	head->lio_nent = head->lio_refcnt = __rw_workerscnt;
+	head->lio_largefile = 0;
+
+	/*
+	 * Insert an fsync request on every worker's queue.
+	 */
+	fret = __aio_fsync_bar(aiocbp, head, __workers_rw, __rw_workerscnt);
+	if (fret != __rw_workerscnt) {
+		/*
+		 * Fewer fsync requests than workers means that it was
+		 * not possible to submit fsync requests to all workers.
+		 * Actions:
+		 * a) number of fsync requests submitted is 0:
+		 *    => free allocated memory (aio_lio_t).
+		 * b) number of fsync requests submitted is > 0:
+		 *    => the last worker executing the fsync request
+		 *	 will free the aio_lio_t struct.
+		 */
+		if (fret == 0)
+			_aio_lio_free(head);
+		return (-1);
+	}
+	return (0);
+}
+
+int
+aio_cancel(int fd, aiocb_t *aiocbp)
+{
+	aio_req_t *reqp;
+	aio_worker_t *aiowp;
+	int done = 0;
+	int canceled = 0;
+	struct stat buf;
+
+	if (fstat(fd, &buf) < 0)
+		return (-1);
+
+	if (aiocbp != NULL) {
+		if (fd != aiocbp->aio_fildes) {
+			errno = EINVAL;
+			return (-1);
+		}
+		if (aiocbp->aio_state == USERAIO) {
+			sig_mutex_lock(&__aio_mutex);
+			reqp = _aio_hash_find(&aiocbp->aio_resultp);
+			if (reqp == NULL) {
+				sig_mutex_unlock(&__aio_mutex);
+				return (AIO_ALLDONE);
+			}
+			aiowp = reqp->req_worker;
+			sig_mutex_lock(&aiowp->work_qlock1);
+			(void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
+			sig_mutex_unlock(&aiowp->work_qlock1);
+			sig_mutex_unlock(&__aio_mutex);
+			if (done)
+				return (AIO_ALLDONE);
+			if (canceled)
+				return (AIO_CANCELED);
+			return (AIO_NOTCANCELED);
+		}
+		if (aiocbp->aio_state == USERAIO_DONE)
+			return (AIO_ALLDONE);
+		return ((int)_kaio(AIOCANCEL, fd, aiocbp));
+	}
+
+	return (aiocancel_all(fd));
+}
+
+/*
+ * __aio_waitn() cancellation handler.
+ */
+/* ARGSUSED */
+static void
+_aio_waitn_cleanup(void *arg)
+{
+	ASSERT(MUTEX_HELD(&__aio_mutex));
+
+	/* check for pending aio_waitn() calls */
+	_aio_flags &= ~(AIO_LIB_WAITN | AIO_WAIT_INPROGRESS | AIO_IO_WAITING);
+	if (_aio_flags & AIO_LIB_WAITN_PENDING) {
+		_aio_flags &= ~AIO_LIB_WAITN_PENDING;
+		(void) cond_signal(&_aio_waitn_cv);
+	}
+
+	sig_mutex_unlock(&__aio_mutex);
+}
+
+/*
+ * aio_waitn can be used to reap the results of several I/O operations that
+ * were submitted asynchronously. The submission of I/Os can be done using
+ * existing POSIX interfaces: lio_listio, aio_write or aio_read.
+ * aio_waitn waits until "nwait" I/Os (supplied as a parameter) have
+ * completed and it returns the descriptors for these I/Os in "list". The
+ * maximum size of this list is given by "nent" and the actual number of I/Os
+ * completed is returned in "nwait". Otherwise aio_waitn might also
+ * return if the timeout expires. Additionally, aio_waitn returns 0 if
+ * successful or -1 if an error occurred.
+ */
+static int
+__aio_waitn(void **list, uint_t nent, uint_t *nwait, const timespec_t *utimo)
+{
+	int error = 0;
+	uint_t dnwait = 0;	/* amount of requests in the waitn-done list */
+	uint_t kwaitcnt;	/* expected "done" requests from kernel */
+	uint_t knentcnt;	/* max. expected "done" requests from kernel */
+	int uerrno = 0;
+	int kerrno = 0;		/* save errno from _kaio() call */
+	int timedwait = AIO_TIMEOUT_UNDEF;
+	aio_req_t *reqp;
+	timespec_t end;
+	timespec_t twait;	/* copy of utimo for internal calculations */
+	timespec_t *wait = NULL;
+
+	if (nent == 0 || *nwait == 0 || *nwait > nent) {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	/*
+	 * Only one running aio_waitn call per process allowed.
+	 * Further calls will be blocked here until the running
+	 * call finishes.
+	 */
+
+	sig_mutex_lock(&__aio_mutex);
+
+	while (_aio_flags & AIO_LIB_WAITN) {
+		if (utimo && utimo->tv_sec == 0 && utimo->tv_nsec == 0) {
+			sig_mutex_unlock(&__aio_mutex);
+			*nwait = 0;
+			return (0);
+		}
+		_aio_flags |= AIO_LIB_WAITN_PENDING;
+		pthread_cleanup_push(sig_mutex_unlock, &__aio_mutex);
+		error = sig_cond_wait(&_aio_waitn_cv, &__aio_mutex);
+		pthread_cleanup_pop(0);
+		if (error != 0) {
+			sig_mutex_unlock(&__aio_mutex);
+			*nwait = 0;
+			errno = error;
+			return (-1);
+		}
+	}
+
+	pthread_cleanup_push(_aio_waitn_cleanup, NULL);
+
+	_aio_flags |= AIO_LIB_WAITN;
+
+	if (*nwait >= AIO_WAITN_MAXIOCBS) {
+		if (_aio_check_timeout(utimo, &end, &timedwait) != 0) {
+			error = -1;
+			dnwait = 0;
+			goto out;
+		}
+		if (timedwait != AIO_TIMEOUT_INDEF) {
+			twait = *utimo;
+			wait = &twait;
+		}
+	}
+
+	/*
+	 * If both counters are still set to zero, then only
+	 * kernel requests are currently outstanding (raw-I/Os).
+	 */
+	if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
+		for (;;) {
+			kwaitcnt = *nwait - dnwait;
+			knentcnt = nent - dnwait;
+			if (knentcnt > AIO_WAITN_MAXIOCBS)
+				knentcnt = AIO_WAITN_MAXIOCBS;
+			kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt;
+
+			pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
+			sig_mutex_unlock(&__aio_mutex);
+			_cancel_prologue();
+			error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt,
+			    &kwaitcnt, wait);
+			_cancel_epilogue();
+			pthread_cleanup_pop(1);
+
+			if (error == 0) {
+				dnwait += kwaitcnt;
+				if (dnwait >= *nwait ||
+				    *nwait < AIO_WAITN_MAXIOCBS)
+					break;
+				if (timedwait == AIO_TIMEOUT_WAIT) {
+					error = _aio_get_timedelta(&end, wait);
+					if (error ==  -1) {
+						/* timer expired */
+						errno = ETIME;
+						break;
+					}
+				}
+				continue;
+			}
+			if (errno == EAGAIN) {
+				if (dnwait > 0)
+					error = 0;
+				break;
+			}
+			if (errno == ETIME || errno == EINTR) {
+				dnwait += kwaitcnt;
+				break;
+			}
+			/* fatal error */
+			break;
+		}
+
+		goto out;
+	}
+
+	/* File system I/Os outstanding ... */
+
+	if (timedwait == AIO_TIMEOUT_UNDEF) {
+		if (_aio_check_timeout(utimo, &end, &timedwait) != 0) {
+			error = -1;
+			dnwait = 0;
+			goto out;
+		}
+		if (timedwait != AIO_TIMEOUT_INDEF) {
+			twait = *utimo;
+			wait = &twait;
+		}
+	}
+
+	for (;;) {
+		uint_t	sum_reqs;
+
+		/*
+		 * Calculate sum of active non RAW-IO requests (sum_reqs).
+		 * If the expected amount of completed requests (*nwait) is
+		 * greater than the calculated sum (sum_reqs) then
+		 * use _kaio to check pending RAW-IO requests.
+		 */
+		sum_reqs = _aio_doneq_cnt + dnwait + _aio_outstand_cnt;
+		kwaitcnt = (*nwait > sum_reqs) ? *nwait - sum_reqs : 0;
+
+		if (kwaitcnt != 0) {
+			/* possibly some kernel I/Os outstanding */
+			knentcnt = nent - dnwait;
+			if (knentcnt > AIO_WAITN_MAXIOCBS)
+				knentcnt = AIO_WAITN_MAXIOCBS;
+			kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt;
+
+			_aio_flags |= AIO_WAIT_INPROGRESS;
+
+			pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
+			sig_mutex_unlock(&__aio_mutex);
+			_cancel_prologue();
+			error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt,
+			    &kwaitcnt, wait);
+			_cancel_epilogue();
+			pthread_cleanup_pop(1);
+
+			_aio_flags &= ~AIO_WAIT_INPROGRESS;
+
+			if (error == 0) {
+				dnwait += kwaitcnt;
+			} else {
+				switch (errno) {
+				case EINVAL:
+				case EAGAIN:
+					/* don't wait for kernel I/Os */
+					kerrno = 0; /* ignore _kaio() errno */
+					*nwait = _aio_doneq_cnt +
+					    _aio_outstand_cnt + dnwait;
+					error = 0;
+					break;
+				case EINTR:
+				case ETIME:
+					/* just scan for completed LIB I/Os */
+					dnwait += kwaitcnt;
+					timedwait = AIO_TIMEOUT_POLL;
+					kerrno = errno;	/* save _kaio() errno */
+					error = 0;
+					break;
+				default:
+					kerrno = errno;	/* save _kaio() errno */
+					break;
+				}
+			}
+			if (error)
+				break;		/* fatal kernel error */
+		}
+
+		/* check completed FS requests in the "done" queue */
+
+		while (_aio_doneq_cnt && dnwait < nent) {
+			/* get done requests */
+			if ((reqp = _aio_req_remove(NULL)) != NULL) {
+				(void) _aio_hash_del(reqp->req_resultp);
+				list[dnwait++] = reqp->req_aiocbp;
+				_aio_req_mark_done(reqp);
+				_lio_remove(reqp);
+				_aio_req_free(reqp);
+			}
+		}
+
+		if (dnwait >= *nwait) {
+			/* min. requested amount of completed I/Os satisfied */
+			break;
+		}
+		if (timedwait == AIO_TIMEOUT_WAIT &&
+		    (error = _aio_get_timedelta(&end, wait)) == -1) {
+			/* timer expired */
+			uerrno = ETIME;
+			break;
+		}
+
+		/*
+		 * If some I/Os are outstanding and we have to wait for them,
+		 * then sleep here.  _aiodone() will call _aio_waitn_wakeup()
+		 * to wakeup this thread as soon as the required amount of
+		 * completed I/Os is done.
+		 */
+		if (_aio_outstand_cnt > 0 && timedwait != AIO_TIMEOUT_POLL) {
+			/*
+			 * _aio_waitn_wakeup() will wake up this thread when:
+			 * - _aio_waitncnt requests are completed or
+			 * - _aio_outstand_cnt becomes zero.
+			 * sig_cond_reltimedwait() could also return with
+			 * a timeout error (ETIME).
+			 */
+			if (*nwait < _aio_outstand_cnt)
+				_aio_waitncnt = *nwait;
+			else
+				_aio_waitncnt = _aio_outstand_cnt;
+
+			_aio_flags |= AIO_IO_WAITING;
+
+			if (wait)
+				uerrno = sig_cond_reltimedwait(&_aio_iowait_cv,
+				    &__aio_mutex, wait);
+			else
+				uerrno = sig_cond_wait(&_aio_iowait_cv,
+				    &__aio_mutex);
+
+			_aio_flags &= ~AIO_IO_WAITING;
+
+			if (uerrno == ETIME) {
+				timedwait = AIO_TIMEOUT_POLL;
+				continue;
+			}
+			if (uerrno != 0)
+				timedwait = AIO_TIMEOUT_POLL;
+		}
+
+		if (timedwait == AIO_TIMEOUT_POLL) {
+			/* polling or timer expired */
+			break;
+		}
+	}
+
+	errno = uerrno == 0 ? kerrno : uerrno;
+	if (errno)
+		error = -1;
+	else
+		error = 0;
+
+out:
+	*nwait = dnwait;
+
+	pthread_cleanup_pop(1);		/* drops __aio_mutex */
+
+	return (error);
+}
+
+int
+aio_waitn(aiocb_t *list[], uint_t nent, uint_t *nwait,
+	const timespec_t *timeout)
+{
+	return (__aio_waitn((void **)list, nent, nwait, timeout));
+}
+
+void
+_aio_waitn_wakeup(void)
+{
+	/*
+	 * __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that
+	 * it is waiting for completed I/Os. The number of required
+	 * completed I/Os is stored into "_aio_waitncnt".
+	 * aio_waitn() is woken up when
+	 * - there are no further outstanding I/Os
+	 *   (_aio_outstand_cnt == 0) or
+	 * - the expected number of I/Os has completed.
+	 * Only one __aio_waitn() function waits for completed I/Os at
+	 * a time.
+	 *
+	 * __aio_suspend() increments "_aio_suscv_cnt" to notify
+	 * _aiodone() that at least one __aio_suspend() call is
+	 * waiting for completed I/Os.
+	 * There could be more than one __aio_suspend() function
+	 * waiting for completed I/Os. Because every function should
+	 * be waiting for different I/Os, _aiodone() has to wake up all
+	 * __aio_suspend() functions each time.
+	 * Every __aio_suspend() function will compare the recently
+	 * completed I/O with its own list.
+	 */
+	ASSERT(MUTEX_HELD(&__aio_mutex));
+	if (_aio_flags & AIO_IO_WAITING) {
+		if (_aio_waitncnt > 0)
+			_aio_waitncnt--;
+		if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 ||
+		    _aio_suscv_cnt > 0)
+			(void) cond_broadcast(&_aio_iowait_cv);
+	} else {
+		/* Wake up waiting aio_suspend calls */
+		if (_aio_suscv_cnt > 0)
+			(void) cond_broadcast(&_aio_iowait_cv);
+	}
+}
+
+/*
+ * timedwait values :
+ * AIO_TIMEOUT_POLL 	: polling
+ * AIO_TIMEOUT_WAIT 	: timeout
+ * AIO_TIMEOUT_INDEF	: wait indefinitely
+ */
+static int
+_aio_check_timeout(const timespec_t *utimo, timespec_t *end, int *timedwait)
+{
+	struct	timeval	curtime;
+
+	if (utimo) {
+		if (utimo->tv_sec < 0 || utimo->tv_nsec < 0 ||
+		    utimo->tv_nsec >= NANOSEC) {
+			errno = EINVAL;
+			return (-1);
+		}
+		if (utimo->tv_sec > 0 || utimo->tv_nsec > 0) {
+			(void) gettimeofday(&curtime, NULL);
+			end->tv_sec = utimo->tv_sec + curtime.tv_sec;
+			end->tv_nsec = utimo->tv_nsec + 1000 * curtime.tv_usec;
+			if (end->tv_nsec >= NANOSEC) {
+				end->tv_nsec -= NANOSEC;
+				end->tv_sec += 1;
+			}
+			*timedwait = AIO_TIMEOUT_WAIT;
+		} else {
+			/* polling */
+			*timedwait = AIO_TIMEOUT_POLL;
+		}
+	} else {
+		*timedwait = AIO_TIMEOUT_INDEF;		/* wait indefinitely */
+	}
+	return (0);
+}
+
+#if !defined(_LP64)
+
+int
+aio_read64(aiocb64_t *aiocbp)
+{
+	if (aiocbp == NULL || aiocbp->aio_reqprio < 0) {
+		errno = EINVAL;
+		return (-1);
+	}
+	if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
+		errno = EBUSY;
+		return (-1);
+	}
+	if (_aio_sigev_thread64(aiocbp) != 0)
+		return (-1);
+	aiocbp->aio_lio_opcode = LIO_READ;
+	return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAREAD64,
+	    (AIO_KAIO | AIO_NO_DUPS)));
+}
+
+int
+aio_write64(aiocb64_t *aiocbp)
+{
+	if (aiocbp == NULL || aiocbp->aio_reqprio < 0) {
+		errno = EINVAL;
+		return (-1);
+	}
+	if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
+		errno = EBUSY;
+		return (-1);
+	}
+	if (_aio_sigev_thread64(aiocbp) != 0)
+		return (-1);
+	aiocbp->aio_lio_opcode = LIO_WRITE;
+	return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAWRITE64,
+	    (AIO_KAIO | AIO_NO_DUPS)));
+}
+
+int
+lio_listio64(int mode, aiocb64_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list,
+	int nent, struct sigevent *_RESTRICT_KYWD sigevp)
+{
+	int 		aio_ufs = 0;
+	int 		oerrno = 0;
+	aio_lio_t	*head = NULL;
+	aiocb64_t	*aiocbp;
+	int		state = 0;
+	int 		EIOflg = 0;
+	int 		rw;
+	int		do_kaio = 0;
+	int 		error;
+	int 		i;
+
+	if (!_kaio_ok)
+		_kaio_init();
+
+	if (aio_list_max == 0)
+		aio_list_max = sysconf(_SC_AIO_LISTIO_MAX);
+
+	if (nent <= 0 || nent > aio_list_max) {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	switch (mode) {
+	case LIO_WAIT:
+		state = NOCHECK;
+		break;
+	case LIO_NOWAIT:
+		state = CHECK;
+		break;
+	default:
+		errno = EINVAL;
+		return (-1);
+	}
+
+	for (i = 0; i < nent; i++) {
+		if ((aiocbp = list[i]) == NULL)
+			continue;
+		if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
+			errno = EBUSY;
+			return (-1);
+		}
+		if (_aio_sigev_thread64(aiocbp) != 0)
+			return (-1);
+		if (aiocbp->aio_lio_opcode == LIO_NOP)
+			aiocbp->aio_state = NOCHECK;
+		else {
+			aiocbp->aio_state = state;
+			if (KAIO_SUPPORTED(aiocbp->aio_fildes))
+				do_kaio++;
+			else
+				aiocbp->aio_resultp.aio_errno = ENOTSUP;
+		}
+	}
+	if (_aio_sigev_thread_init(sigevp) != 0)
+		return (-1);
+
+	if (do_kaio) {
+		error = (int)_kaio(AIOLIO64, mode, list, nent, sigevp);
+		if (error == 0)
+			return (0);
+		oerrno = errno;
+	} else {
+		oerrno = errno = ENOTSUP;
+		error = -1;
+	}
+
+	if (error == -1 && errno == ENOTSUP) {
+		error = errno = 0;
+		/*
+		 * If LIO_WAIT, or notification required, allocate a list head.
+		 */
+		if (mode == LIO_WAIT ||
+		    (sigevp != NULL &&
+		    (sigevp->sigev_notify == SIGEV_SIGNAL ||
+		    sigevp->sigev_notify == SIGEV_THREAD ||
+		    sigevp->sigev_notify == SIGEV_PORT)))
+			head = _aio_lio_alloc();
+		if (head) {
+			sig_mutex_lock(&head->lio_mutex);
+			head->lio_mode = mode;
+			head->lio_largefile = 1;
+			if (mode == LIO_NOWAIT && sigevp != NULL) {
+				if (sigevp->sigev_notify == SIGEV_THREAD) {
+					head->lio_port = sigevp->sigev_signo;
+					head->lio_event = AIOLIO64;
+					head->lio_sigevent = sigevp;
+					head->lio_sigval.sival_ptr =
+					    sigevp->sigev_value.sival_ptr;
+				} else if (sigevp->sigev_notify == SIGEV_PORT) {
+					port_notify_t *pn =
+					    sigevp->sigev_value.sival_ptr;
+					head->lio_port = pn->portnfy_port;
+					head->lio_event = AIOLIO64;
+					head->lio_sigevent = sigevp;
+					head->lio_sigval.sival_ptr =
+					    pn->portnfy_user;
+				} else {	/* SIGEV_SIGNAL */
+					head->lio_signo = sigevp->sigev_signo;
+					head->lio_sigval.sival_ptr =
+					    sigevp->sigev_value.sival_ptr;
+				}
+			}
+			head->lio_nent = head->lio_refcnt = nent;
+			sig_mutex_unlock(&head->lio_mutex);
+		}
+		/*
+		 * find UFS requests, errno == ENOTSUP/EBADFD,
+		 */
+		for (i = 0; i < nent; i++) {
+			if ((aiocbp = list[i]) == NULL ||
+			    aiocbp->aio_lio_opcode == LIO_NOP ||
+			    (aiocbp->aio_resultp.aio_errno != ENOTSUP &&
+			    aiocbp->aio_resultp.aio_errno != EBADFD)) {
+				if (head)
+					_lio_list_decr(head);
+				continue;
+			}
+			if (aiocbp->aio_resultp.aio_errno == EBADFD)
+				SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
+			if (aiocbp->aio_reqprio < 0) {
+				aiocbp->aio_resultp.aio_errno = EINVAL;
+				aiocbp->aio_resultp.aio_return = -1;
+				EIOflg = 1;
+				if (head)
+					_lio_list_decr(head);
+				continue;
+			}
+			/*
+			 * submit an AIO request with flags AIO_NO_KAIO
+			 * to avoid the kaio() syscall in _aio_rw()
+			 */
+			switch (aiocbp->aio_lio_opcode) {
+			case LIO_READ:
+				rw = AIOAREAD64;
+				break;
+			case LIO_WRITE:
+				rw = AIOAWRITE64;
+				break;
+			}
+			error = _aio_rw64(aiocbp, head, &__nextworker_rw, rw,
+			    (AIO_NO_KAIO | AIO_NO_DUPS));
+			if (error == 0)
+				aio_ufs++;
+			else {
+				if (head)
+					_lio_list_decr(head);
+				aiocbp->aio_resultp.aio_errno = error;
+				EIOflg = 1;
+			}
+		}
+	}
+	if (EIOflg) {
+		errno = EIO;
+		return (-1);
+	}
+	if (mode == LIO_WAIT && oerrno == ENOTSUP) {
+		/*
+		 * call kaio(AIOLIOWAIT) to get all outstanding
+		 * kernel AIO requests
+		 */
+		if ((nent - aio_ufs) > 0)
+			(void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp);
+		if (head != NULL && head->lio_nent > 0) {
+			sig_mutex_lock(&head->lio_mutex);
+			while (head->lio_refcnt > 0) {
+				int err;
+				head->lio_waiting = 1;
+				pthread_cleanup_push(_lio_listio_cleanup, head);
+				err = sig_cond_wait(&head->lio_cond_cv,
+				    &head->lio_mutex);
+				pthread_cleanup_pop(0);
+				head->lio_waiting = 0;
+				if (err && head->lio_nent > 0) {
+					sig_mutex_unlock(&head->lio_mutex);
+					errno = err;
+					return (-1);
+				}
+			}
+			sig_mutex_unlock(&head->lio_mutex);
+			ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
+			_aio_lio_free(head);
+			for (i = 0; i < nent; i++) {
+				if ((aiocbp = list[i]) != NULL &&
+				    aiocbp->aio_resultp.aio_errno) {
+					errno = EIO;
+					return (-1);
+				}
+			}
+		}
+		return (0);
+	}
+	return (error);
+}
+
+int
+aio_suspend64(const aiocb64_t * const list[], int nent,
+    const timespec_t *timeout)
+{
+	return (__aio_suspend((void **)list, nent, timeout, 1));
+}
+
+int
+aio_error64(const aiocb64_t *aiocbp)
+{
+	const aio_result_t *resultp = &aiocbp->aio_resultp;
+	int error;
+
+	if ((error = resultp->aio_errno) == EINPROGRESS) {
+		if (aiocbp->aio_state == CHECK) {
+			/*
+			 * Always do the kaio() call without using the
+			 * KAIO_SUPPORTED() checks because it is not
+			 * mandatory to have a valid fd set in the
+			 * aiocb, only the resultp must be set.
+			 */
+			if ((int)_kaio(AIOERROR64, aiocbp) == EINVAL) {
+				errno = EINVAL;
+				return (-1);
+			}
+			error = resultp->aio_errno;
+		} else if (aiocbp->aio_state == CHECKED) {
+			((aiocb64_t *)aiocbp)->aio_state = CHECK;
+		}
+	}
+	return (error);
+}
+
+ssize_t
+aio_return64(aiocb64_t *aiocbp)
+{
+	aio_result_t *resultp = &aiocbp->aio_resultp;
+	aio_req_t *reqp;
+	int error;
+	ssize_t retval;
+
+	/*
+	 * The _aiodone() function stores resultp->aio_return before
+	 * storing resultp->aio_errno (with an membar_producer() in
+	 * between).  We use membar_consumer() below to ensure proper
+	 * memory ordering between _aiodone() and ourself.
+	 */
+	error = resultp->aio_errno;
+	membar_consumer();
+	retval = resultp->aio_return;
+
+	/*
+	 * we use this condition to indicate either that
+	 * aio_return() has been called before or should
+	 * not have been called yet.
+	 */
+	if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) {
+		errno = error;
+		return (-1);
+	}
+
+	/*
+	 * Before we return, mark the result as being returned so that later
+	 * calls to aio_return() will return the fact that the result has
+	 * already been returned.
+	 */
+	sig_mutex_lock(&__aio_mutex);
+	/* retest, in case more than one thread actually got in here */
+	if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) {
+		sig_mutex_unlock(&__aio_mutex);
+		errno = EINVAL;
+		return (-1);
+	}
+	resultp->aio_return = -1;
+	resultp->aio_errno = EINVAL;
+	if ((reqp = _aio_hash_del(resultp)) == NULL)
+		sig_mutex_unlock(&__aio_mutex);
+	else {
+		aiocbp->aio_state = NOCHECK;
+		ASSERT(reqp->req_head == NULL);
+		(void) _aio_req_remove(reqp);
+		sig_mutex_unlock(&__aio_mutex);
+		_aio_req_free(reqp);
+	}
+
+	if (retval == -1)
+		errno = error;
+	return (retval);
+}
+
+static int
+__aio_fsync_bar64(aiocb64_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp,
+    int workerscnt)
+{
+	int i;
+	int error;
+	aio_worker_t *next = aiowp;
+
+	for (i = 0; i < workerscnt; i++) {
+		error = _aio_rw64(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO);
+		if (error != 0) {
+			sig_mutex_lock(&head->lio_mutex);
+			head->lio_mode = LIO_DESTROY;	/* ignore fsync */
+			head->lio_nent -= workerscnt - i;
+			head->lio_refcnt -= workerscnt - i;
+			sig_mutex_unlock(&head->lio_mutex);
+			errno = EAGAIN;
+			return (i);
+		}
+		next = next->work_forw;
+	}
+	return (i);
+}
+
+int
+aio_fsync64(int op, aiocb64_t *aiocbp)
+{
+	aio_lio_t *head;
+	struct stat statb;
+	int fret;
+
+	if (aiocbp == NULL)
+		return (0);
+	if (aiocbp->aio_reqprio < 0 || (op != O_DSYNC && op != O_SYNC)) {
+		errno = EINVAL;
+		return (-1);
+	}
+	if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
+		errno = EBUSY;
+		return (-1);
+	}
+	if (fstat(aiocbp->aio_fildes, &statb) < 0)
+		return (-1);
+	if (_aio_sigev_thread64(aiocbp) != 0)
+		return (-1);
+
+	/*
+	 * Kernel aio_fsync() is not supported.
+	 * We force user-level aio_fsync() just
+	 * for the notification side-effect.
+	 */
+	if (!__uaio_ok && __uaio_init() == -1)
+		return (-1);
+
+	/*
+	 * The first asynchronous I/O request in the current process will
+	 * create a bunch of workers (via __uaio_init()).  If the number
+	 * of workers is zero then the number of pending asynchronous I/O
+	 * requests is zero.  In such a case only execute the standard
+	 * fsync(3C) or fdatasync(3RT) as appropriate.
+	 */
+	if (__rw_workerscnt == 0) {
+		if (op == O_DSYNC)
+			return (__fdsync(aiocbp->aio_fildes, FDSYNC));
+		else
+			return (__fdsync(aiocbp->aio_fildes, FSYNC));
+	}
+
+	/*
+	 * re-use aio_offset as the op field.
+	 * 	O_DSYNC - fdatasync()
+	 * 	O_SYNC - fsync()
+	 */
+	aiocbp->aio_offset = op;
+	aiocbp->aio_lio_opcode = AIOFSYNC;
+
+	/*
+	 * Create a list of fsync requests.  The worker that
+	 * gets the last request will do the fsync request.
+	 */
+	head = _aio_lio_alloc();
+	if (head == NULL) {
+		errno = EAGAIN;
+		return (-1);
+	}
+	head->lio_mode = LIO_FSYNC;
+	head->lio_nent = head->lio_refcnt = __rw_workerscnt;
+	head->lio_largefile = 1;
+
+	/*
+	 * Insert an fsync request on every worker's queue.
+	 */
+	fret = __aio_fsync_bar64(aiocbp, head, __workers_rw, __rw_workerscnt);
+	if (fret != __rw_workerscnt) {
+		/*
+		 * Fewer fsync requests than workers means that it was
+		 * not possible to submit fsync requests to all workers.
+		 * Actions:
+		 * a) number of fsync requests submitted is 0:
+		 *    => free allocated memory (aio_lio_t).
+		 * b) number of fsync requests submitted is > 0:
+		 *    => the last worker executing the fsync request
+		 *	 will free the aio_lio_t struct.
+		 */
+		if (fret == 0)
+			_aio_lio_free(head);
+		return (-1);
+	}
+	return (0);
+}
+
+int
+aio_cancel64(int fd, aiocb64_t *aiocbp)
+{
+	aio_req_t *reqp;
+	aio_worker_t *aiowp;
+	int done = 0;
+	int canceled = 0;
+	struct stat buf;
+
+	if (fstat(fd, &buf) < 0)
+		return (-1);
+
+	if (aiocbp != NULL) {
+		if (fd != aiocbp->aio_fildes) {
+			errno = EINVAL;
+			return (-1);
+		}
+		if (aiocbp->aio_state == USERAIO) {
+			sig_mutex_lock(&__aio_mutex);
+			reqp = _aio_hash_find(&aiocbp->aio_resultp);
+			if (reqp == NULL) {
+				sig_mutex_unlock(&__aio_mutex);
+				return (AIO_ALLDONE);
+			}
+			aiowp = reqp->req_worker;
+			sig_mutex_lock(&aiowp->work_qlock1);
+			(void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
+			sig_mutex_unlock(&aiowp->work_qlock1);
+			sig_mutex_unlock(&__aio_mutex);
+			if (done)
+				return (AIO_ALLDONE);
+			if (canceled)
+				return (AIO_CANCELED);
+			return (AIO_NOTCANCELED);
+		}
+		if (aiocbp->aio_state == USERAIO_DONE)
+			return (AIO_ALLDONE);
+		return ((int)_kaio(AIOCANCEL, fd, aiocbp));
+	}
+
+	return (aiocancel_all(fd));
+}
+
+int
+aio_waitn64(aiocb64_t *list[], uint_t nent, uint_t *nwait,
+	const timespec_t *timeout)
+{
+	return (__aio_waitn((void **)list, nent, nwait, timeout));
+}
+
+#endif /* !defined(_LP64) */
diff --git a/usr/src/lib/libc/port/gen/event_port.c b/usr/src/lib/libc/port/gen/event_port.c
index 84ade99164..f4eb057dec 100644
--- a/usr/src/lib/libc/port/gen/event_port.c
+++ b/usr/src/lib/libc/port/gen/event_port.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -34,7 +34,6 @@
 #pragma weak port_get = _port_get
 #pragma weak port_getn = _port_getn
 #pragma weak port_alert = _port_alert
-#pragma weak port_dispatch = _port_dispatch
 
 #include "lint.h"
 #include <sys/types.h>
@@ -128,7 +127,7 @@ _port_send(int port, int events, void *user)
 }
 
 /*
- * port_dispatch() will block if there are not resources available to
+ * _port_dispatch() will block if there are not resources available to
  * satisfy the request.
  */
 
diff --git a/usr/src/lib/libc/port/llib-lc b/usr/src/lib/libc/port/llib-lc
index 0c213a116c..502aa4cb33 100644
--- a/usr/src/lib/libc/port/llib-lc
+++ b/usr/src/lib/libc/port/llib-lc
@@ -29,6 +29,7 @@
 /* LINTLIBRARY */
 /* PROTOLIB1 */
 
+#include <aio.h>
 #include <alloca.h>
 #include <atomic.h>
 #include <ctype.h>
@@ -51,6 +52,7 @@
 #include <locale.h>
 #include <memory.h>
 #include <mon.h>
+#include <mqueue.h>
 #include <nan.h>
 #include <ndbm.h>
 #include <limits.h>
@@ -61,7 +63,9 @@
 #include <rctl.h>
 #include <regex.h>
 #include <rpcsvc/ypclnt.h>
+#include <sched.h>
 #include <search.h>
+#include <semaphore.h>
 #include <setjmp.h>
 #include <shadow.h>
 #include <siginfo.h>
@@ -80,6 +84,7 @@
 #include <synch.h>
 #include <sys/acctctl.h>
 #include <sys/acl.h>
+#include <sys/asynch.h>
 #include <sys/byteorder.h>
 #include <sys/cladm.h>
 #include <sys/corectl.h>
diff --git a/usr/src/lib/libc/port/rt/clock_timer.c b/usr/src/lib/libc/port/rt/clock_timer.c
new file mode 100644
index 0000000000..8dfb35be91
--- /dev/null
+++ b/usr/src/lib/libc/port/rt/clock_timer.c
@@ -0,0 +1,179 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#pragma	weak clock_getres = _clock_getres
+#pragma	weak clock_gettime = _clock_gettime
+#pragma	weak clock_settime = _clock_settime
+#pragma	weak timer_create = _timer_create
+#pragma	weak timer_delete = _timer_delete
+#pragma	weak timer_getoverrun = _timer_getoverrun
+#pragma	weak timer_gettime = _timer_gettime
+#pragma	weak timer_settime = _timer_settime
+
+#include "synonyms.h"
+#include <time.h>
+#include <sys/types.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include "sigev_thread.h"
+
+/*
+ * System call wrappers found elsewhere in libc (common/sys/__clock_timer.s).
+ */
+extern int __clock_getres(clockid_t, timespec_t *);
+extern int __clock_gettime(clockid_t, timespec_t *);
+extern int __clock_settime(clockid_t, const timespec_t *);
+extern int __timer_create(clockid_t, struct sigevent *, timer_t *);
+extern int __timer_delete(timer_t);
+extern int __timer_getoverrun(timer_t);
+extern int __timer_gettime(timer_t, itimerspec_t *);
+extern int __timer_settime(timer_t, int, const itimerspec_t *, itimerspec_t *);
+
+/*
+ * Array of pointers to tcd's, indexed by timer id.
+ * No more than 'timer_max' timers can be created by any process.
+ */
+int timer_max = 0;
+thread_communication_data_t **timer_tcd;
+static pthread_once_t timer_once = PTHREAD_ONCE_INIT;
+
+static void
+timer_init(void)
+{
+	timer_max = (int)_sysconf(_SC_TIMER_MAX);
+	timer_tcd = malloc(timer_max * sizeof (*timer_tcd));
+	(void) memset(timer_tcd, 0, timer_max * sizeof (*timer_tcd));
+}
+
+int
+_clock_getres(clockid_t clock_id, timespec_t *res)
+{
+	return (__clock_getres(clock_id, res));
+}
+
+int
+_clock_gettime(clockid_t clock_id, timespec_t *tp)
+{
+	return (__clock_gettime(clock_id, tp));
+}
+
+int
+_clock_settime(clockid_t clock_id, const timespec_t *tp)
+{
+	return (__clock_settime(clock_id, tp));
+}
+
+int
+_timer_create(clockid_t clock_id, struct sigevent *sigevp, timer_t *timerid)
+{
+	struct sigevent sigevent;
+	port_notify_t port_notify;
+	thread_communication_data_t *tcdp;
+	int sigev_thread = 0;
+	int rc;
+
+	(void) pthread_once(&timer_once, timer_init);
+
+	if (sigevp != NULL &&
+	    sigevp->sigev_notify == SIGEV_THREAD &&
+	    sigevp->sigev_notify_function != NULL) {
+		sigev_thread = 1;
+		tcdp = setup_sigev_handler(sigevp, TIMER);
+		if (tcdp == NULL)
+			return (-1);
+		/* copy the sigevent structure so we can modify it */
+		sigevent = *sigevp;
+		sigevp = &sigevent;
+		port_notify.portnfy_port = tcdp->tcd_port;
+		port_notify.portnfy_user = NULL;
+		sigevp->sigev_value.sival_ptr = &port_notify;
+	}
+
+	rc = __timer_create(clock_id, sigevp, timerid);
+
+	if (sigev_thread) {
+		if (rc == 0) {
+			if ((rc = launch_spawner(tcdp)) != 0)
+				__timer_delete(*timerid);
+			else
+				timer_tcd[*timerid] = tcdp;
+		}
+		if (rc != 0)
+			free_sigev_handler(tcdp);
+	}
+
+	return (rc);
+}
+
+int
+_timer_delete(timer_t timerid)
+{
+	int rc;
+
+	if ((rc = del_sigev_timer(timerid)) == 0)
+		return (__timer_delete(timerid));
+	else
+		return (rc);
+}
+
+int
+_timer_getoverrun(timer_t timerid)
+{
+	return (__timer_getoverrun(timerid) + sigev_timer_getoverrun(timerid));
+}
+
+int
+_timer_gettime(timer_t timerid, itimerspec_t *value)
+{
+	return (__timer_gettime(timerid, value));
+}
+
+int
+_timer_settime(timer_t timerid, int flags, const itimerspec_t *value,
+	itimerspec_t *ovalue)
+{
+	return (__timer_settime(timerid, flags, value, ovalue));
+}
+
+/*
+ * Cleanup after fork1() in the child process.
+ */
+void
+postfork1_child_sigev_timer(void)
+{
+	thread_communication_data_t *tcdp;
+	int timer;
+
+	for (timer = 0; timer < timer_max; timer++) {
+		if ((tcdp = timer_tcd[timer]) != NULL) {
+			timer_tcd[timer] = NULL;
+			tcd_teardown(tcdp);
+		}
+	}
+}
diff --git a/usr/src/lib/libc/port/rt/fallocate.c b/usr/src/lib/libc/port/rt/fallocate.c
new file mode 100644
index 0000000000..17b9088052
--- /dev/null
+++ b/usr/src/lib/libc/port/rt/fallocate.c
@@ -0,0 +1,72 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "synonyms.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/types.h>
+
+#include <stdio.h>
+
+int
+posix_fallocate(int fd, off_t offset, off_t len)
+{
+	struct flock lck;
+
+	lck.l_whence = 0;
+	lck.l_start = offset;
+	lck.l_len = len;
+	lck.l_type = F_WRLCK;
+
+	if (fcntl(fd, F_ALLOCSP, &lck) == -1) {
+		return (-1);
+	}
+
+	return (0);
+}
+
+#if !defined(_LP64)
+
+int
+posix_fallocate64(int fd, off64_t offset, off64_t len)
+{
+	struct flock64 lck;
+
+	lck.l_whence = 0;
+	lck.l_start = offset;
+	lck.l_len = len;
+	lck.l_type = F_WRLCK;
+
+	if (fcntl(fd, F_ALLOCSP64, &lck) == -1) {
+		return (-1);
+	}
+
+	return (0);
+}
+
+#endif
diff --git a/usr/src/lib/libc/port/rt/mqueue.c b/usr/src/lib/libc/port/rt/mqueue.c
new file mode 100644
index 0000000000..ebab58a259
--- /dev/null
+++ b/usr/src/lib/libc/port/rt/mqueue.c
@@ -0,0 +1,1101 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#pragma weak mq_open = _mq_open
+#pragma weak mq_close = _mq_close
+#pragma weak mq_unlink = _mq_unlink
+#pragma weak mq_send = _mq_send
+#pragma weak mq_timedsend = _mq_timedsend
+#pragma weak mq_reltimedsend_np = _mq_reltimedsend_np
+#pragma weak mq_receive = _mq_receive
+#pragma weak mq_timedreceive = _mq_timedreceive
+#pragma weak mq_reltimedreceive_np = _mq_reltimedreceive_np
+#pragma weak mq_notify = _mq_notify
+#pragma weak mq_setattr = _mq_setattr
+#pragma weak mq_getattr = _mq_getattr
+
+#include "synonyms.h"
+#include "mtlib.h"
+#define	_KMEMUSER
+#include <sys/param.h>		/* _MQ_OPEN_MAX, _MQ_PRIO_MAX, _SEM_VALUE_MAX */
+#undef	_KMEMUSER
+#include <mqueue.h>
+#include <sys/types.h>
+#include <sys/file.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <limits.h>
+#include <pthread.h>
+#include <assert.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <inttypes.h>
+#include "sigev_thread.h"
+#include "pos4obj.h"
+
+/*
+ * Default values per message queue
+ */
+#define	MQ_MAXMSG	128
+#define	MQ_MAXSIZE	1024
+
+#define	MQ_MAGIC	0x4d534751		/* "MSGQ" */
+
+/*
+ * Message header which is part of messages in link list
+ */
+typedef struct {
+	uint64_t 	msg_next;	/* offset of next message in the link */
+	uint64_t	msg_len;	/* length of the message */
+} msghdr_t;
+
+/*
+ * message queue description
+ */
+struct mq_dn {
+	size_t		mqdn_flags;	/* open description flags */
+};
+
+/*
+ * message queue descriptor structure
+ */
+typedef struct mq_des {
+	struct mq_des	*mqd_next;	/* list of all open mq descriptors, */
+	struct mq_des	*mqd_prev;	/* needed for fork-safety */
+	int		mqd_magic;	/* magic # to identify mq_des */
+	int		mqd_flags;	/* operation flag per open */
+	struct mq_header *mqd_mq;	/* address pointer of message Q */
+	struct mq_dn	*mqd_mqdn;	/* open	description */
+	thread_communication_data_t *mqd_tcd;	/* SIGEV_THREAD notification */
+} mqdes_t;
+
+/*
+ * message queue common header, part of the mmap()ed file.
+ * Since message queues may be shared between 32- and 64-bit processes,
+ * care must be taken to make sure that the elements of this structure
+ * are identical for both _LP64 and _ILP32 cases.
+ */
+typedef struct mq_header {
+	/* first field must be mq_totsize, DO NOT insert before this	*/
+	int64_t		mq_totsize;	/* total size of the Queue */
+	int64_t		mq_maxsz;	/* max size of each message */
+	uint32_t	mq_maxmsg;	/* max messages in the queue */
+	uint32_t	mq_maxprio;	/* maximum mqueue priority */
+	uint32_t	mq_curmaxprio;	/* current maximum MQ priority */
+	uint32_t	mq_mask;	/* priority bitmask */
+	uint64_t	mq_freep;	/* free message's head pointer */
+	uint64_t	mq_headpp;	/* pointer to head pointers */
+	uint64_t	mq_tailpp;	/* pointer to tail pointers */
+	signotify_id_t	mq_sigid;	/* notification id (3 int's) */
+	uint32_t	mq_ntype;	/* notification type (SIGEV_*) */
+	uint64_t	mq_des;		/* pointer to msg Q descriptor */
+	mutex_t		mq_exclusive;	/* acquire for exclusive access */
+	sem_t		mq_rblocked;	/* number of processes rblocked */
+	sem_t		mq_notfull;	/* mq_send()'s block on this */
+	sem_t		mq_notempty;	/* mq_receive()'s block on this */
+	sem_t		mq_spawner;	/* spawner thread blocks on this */
+} mqhdr_t;
+
+/*
+ * The code assumes that _MQ_OPEN_MAX == -1 or "no fixed implementation limit".
+ * If this assumption is somehow invalidated, mq_open() needs to be changed
+ * back to the old version which kept a count and enforced a limit.
+ * We make sure that this is pointed out to those changing <sys/param.h>
+ * by checking _MQ_OPEN_MAX at compile time.
+ */
+#if _MQ_OPEN_MAX != -1
+#error "mq_open() no longer enforces _MQ_OPEN_MAX and needs fixing."
+#endif
+
+#define	MQ_ALIGNSIZE	8	/* 64-bit alignment */
+
+#ifdef DEBUG
+#define	MQ_ASSERT(x)	assert(x);
+
+#define	MQ_ASSERT_PTR(_m, _p) \
+	assert((_p) != NULL && !((uintptr_t)(_p) & (MQ_ALIGNSIZE -1)) && \
+	    !((uintptr_t)_m + (uintptr_t)(_p) >= (uintptr_t)_m + \
+	    _m->mq_totsize));
+
+#define	MQ_ASSERT_SEMVAL_LEQ(sem, val) { \
+	int _val; \
+	(void) sem_getvalue((sem), &_val); \
+	assert((_val) <= val); }
+#else
+#define	MQ_ASSERT(x)
+#define	MQ_ASSERT_PTR(_m, _p)
+#define	MQ_ASSERT_SEMVAL_LEQ(sem, val)
+#endif
+
+#define	MQ_PTR(m, n)	((msghdr_t *)((uintptr_t)m + (uintptr_t)n))
+#define	HEAD_PTR(m, n)	((uint64_t *)((uintptr_t)m + \
+			(uintptr_t)m->mq_headpp + n * sizeof (uint64_t)))
+#define	TAIL_PTR(m, n)	((uint64_t *)((uintptr_t)m + \
+			(uintptr_t)m->mq_tailpp + n * sizeof (uint64_t)))
+
+#define	MQ_RESERVED	((mqdes_t *)-1)
+
+#define	ABS_TIME	0
+#define	REL_TIME	1
+
+static mutex_t mq_list_lock = DEFAULTMUTEX;
+static mqdes_t *mq_list = NULL;
+
+extern int __signotify(int cmd, siginfo_t *sigonfo, signotify_id_t *sn_id);
+
+static int
+mq_is_valid(mqdes_t *mqdp)
+{
+	/*
+	 * Any use of a message queue after it was closed is
+	 * undefined.  But the standard strongly favours EBADF
+	 * returns.  Before we dereference which could be fatal,
+	 * we first do some pointer sanity checks.
+	 */
+	if (mqdp != NULL && mqdp != MQ_RESERVED &&
+	    ((uintptr_t)mqdp & 0x7) == 0) {
+		return (mqdp->mqd_magic == MQ_MAGIC);
+	}
+
+	return (0);
+}
+
+static void
+mq_init(mqhdr_t *mqhp, size_t msgsize, ssize_t maxmsg)
+{
+	int		i;
+	uint64_t	temp;
+	uint64_t	currentp;
+	uint64_t	nextp;
+
+	/*
+	 * We only need to initialize the non-zero fields.  The use of
+	 * ftruncate() on the message queue file assures that the
+	 * pages will be zfod.
+	 */
+	(void) mutex_init(&mqhp->mq_exclusive, USYNC_PROCESS, NULL);
+	(void) sem_init(&mqhp->mq_rblocked, 1, 0);
+	(void) sem_init(&mqhp->mq_notempty, 1, 0);
+	(void) sem_init(&mqhp->mq_spawner, 1, 0);
+	(void) sem_init(&mqhp->mq_notfull, 1, (uint_t)maxmsg);
+
+	mqhp->mq_maxsz = msgsize;
+	mqhp->mq_maxmsg = maxmsg;
+
+	/*
+	 * As of this writing (1997), there are 32 message queue priorities.
+	 * If this is to change, then the size of the mq_mask will
+	 * also have to change.  If DEBUG is defined, assert that
+	 * _MQ_PRIO_MAX hasn't changed.
+	 */
+	mqhp->mq_maxprio = _MQ_PRIO_MAX;
+#if defined(DEBUG)
+	/* LINTED always true */
+	MQ_ASSERT(sizeof (mqhp->mq_mask) * 8 >= _MQ_PRIO_MAX);
+#endif
+
+	/*
+	 * Since the message queue can be mapped into different
+	 * virtual address ranges by different processes, we don't
+	 * keep track of pointers, only offsets into the shared region.
+	 */
+	mqhp->mq_headpp = sizeof (mqhdr_t);
+	mqhp->mq_tailpp = mqhp->mq_headpp +
+		mqhp->mq_maxprio * sizeof (uint64_t);
+	mqhp->mq_freep = mqhp->mq_tailpp +
+		mqhp->mq_maxprio * sizeof (uint64_t);
+
+	currentp = mqhp->mq_freep;
+	MQ_PTR(mqhp, currentp)->msg_next = 0;
+
+	temp = (mqhp->mq_maxsz + MQ_ALIGNSIZE - 1) & ~(MQ_ALIGNSIZE - 1);
+	for (i = 1; i < mqhp->mq_maxmsg; i++) {
+		nextp = currentp + sizeof (msghdr_t) + temp;
+		MQ_PTR(mqhp, currentp)->msg_next = nextp;
+		MQ_PTR(mqhp, nextp)->msg_next = 0;
+		currentp = nextp;
+	}
+}
+
+static size_t
+mq_getmsg(mqhdr_t *mqhp, char *msgp, uint_t *msg_prio)
+{
+	uint64_t currentp;
+	msghdr_t *curbuf;
+	uint64_t *headpp;
+	uint64_t *tailpp;
+
+	MQ_ASSERT(MUTEX_HELD(&mqhp->mq_exclusive));
+
+	/*
+	 * Get the head and tail pointers for the queue of maximum
+	 * priority.  We shouldn't be here unless there is a message for
+	 * us, so it's fair to assert that both the head and tail
+	 * pointers are non-NULL.
+	 */
+	headpp = HEAD_PTR(mqhp, mqhp->mq_curmaxprio);
+	tailpp = TAIL_PTR(mqhp, mqhp->mq_curmaxprio);
+
+	if (msg_prio != NULL)
+		*msg_prio = mqhp->mq_curmaxprio;
+
+	currentp = *headpp;
+	MQ_ASSERT_PTR(mqhp, currentp);
+	curbuf = MQ_PTR(mqhp, currentp);
+
+	if ((*headpp = curbuf->msg_next) == NULL) {
+		/*
+		 * We just nuked the last message in this priority's queue.
+		 * Twiddle this priority's bit, and then find the next bit
+		 * tipped.
+		 */
+		uint_t prio = mqhp->mq_curmaxprio;
+
+		mqhp->mq_mask &= ~(1u << prio);
+
+		for (; prio != 0; prio--)
+			if (mqhp->mq_mask & (1u << prio))
+				break;
+		mqhp->mq_curmaxprio = prio;
+
+		*tailpp = NULL;
+	}
+
+	/*
+	 * Copy the message, and put the buffer back on the free list.
+	 */
+	(void) memcpy(msgp, (char *)&curbuf[1], curbuf->msg_len);
+	curbuf->msg_next = mqhp->mq_freep;
+	mqhp->mq_freep = currentp;
+
+	return (curbuf->msg_len);
+}
+
+
+static void
+mq_putmsg(mqhdr_t *mqhp, const char *msgp, ssize_t len, uint_t prio)
+{
+	uint64_t currentp;
+	msghdr_t *curbuf;
+	uint64_t *headpp;
+	uint64_t *tailpp;
+
+	MQ_ASSERT(MUTEX_HELD(&mqhp->mq_exclusive));
+
+	/*
+	 * Grab a free message block, and link it in.  We shouldn't
+	 * be here unless there is room in the queue for us;  it's
+	 * fair to assert that the free pointer is non-NULL.
+	 */
+	currentp = mqhp->mq_freep;
+	MQ_ASSERT_PTR(mqhp, currentp);
+	curbuf = MQ_PTR(mqhp, currentp);
+
+	/*
+	 * Remove a message from the free list, and copy in the new contents.
+	 */
+	mqhp->mq_freep = curbuf->msg_next;
+	curbuf->msg_next = NULL;
+	(void) memcpy((char *)&curbuf[1], msgp, len);
+	curbuf->msg_len = len;
+
+	headpp = HEAD_PTR(mqhp, prio);
+	tailpp = TAIL_PTR(mqhp, prio);
+
+	if (*tailpp == 0) {
+		/*
+		 * This is the first message on this queue.  Set the
+		 * head and tail pointers, and tip the appropriate bit
+		 * in the priority mask.
+		 */
+		*headpp = currentp;
+		*tailpp = currentp;
+		mqhp->mq_mask |= (1u << prio);
+		if (prio > mqhp->mq_curmaxprio)
+			mqhp->mq_curmaxprio = prio;
+	} else {
+		MQ_ASSERT_PTR(mqhp, *tailpp);
+		MQ_PTR(mqhp, *tailpp)->msg_next = currentp;
+		*tailpp = currentp;
+	}
+}
+
+mqd_t
+_mq_open(const char *path, int oflag, /* mode_t mode, mq_attr *attr */ ...)
+{
+	va_list		ap;
+	mode_t		mode;
+	struct mq_attr	*attr;
+	int		fd;
+	int		err;
+	int		cr_flag = 0;
+	int		locked = 0;
+	uint64_t	total_size;
+	size_t		msgsize;
+	ssize_t		maxmsg;
+	uint64_t	temp;
+	void		*ptr;
+	mqdes_t		*mqdp;
+	mqhdr_t		*mqhp;
+	struct mq_dn	*mqdnp;
+
+	if (__pos4obj_check(path) == -1)
+		return ((mqd_t)-1);
+
+	/* acquire MSGQ lock to have atomic operation */
+	if (__pos4obj_lock(path, MQ_LOCK_TYPE) < 0)
+		goto out;
+	locked = 1;
+
+	va_start(ap, oflag);
+	/* filter oflag to have READ/WRITE/CREATE modes only */
+	oflag = oflag & (O_RDONLY|O_WRONLY|O_RDWR|O_CREAT|O_EXCL|O_NONBLOCK);
+	if ((oflag & O_CREAT) != 0) {
+		mode = va_arg(ap, mode_t);
+		attr = va_arg(ap, struct mq_attr *);
+	}
+	va_end(ap);
+
+	if ((fd = __pos4obj_open(path, MQ_PERM_TYPE, oflag,
+	    mode, &cr_flag)) < 0)
+		goto out;
+
+	/* closing permission file */
+	(void) __close_nc(fd);
+
+	/* Try to open/create data file */
+	if (cr_flag) {
+		cr_flag = PFILE_CREATE;
+		if (attr == NULL) {
+			maxmsg = MQ_MAXMSG;
+			msgsize = MQ_MAXSIZE;
+		} else if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0) {
+			errno = EINVAL;
+			goto out;
+		} else if (attr->mq_maxmsg > _SEM_VALUE_MAX) {
+			errno = ENOSPC;
+			goto out;
+		} else {
+			maxmsg = attr->mq_maxmsg;
+			msgsize = attr->mq_msgsize;
+		}
+
+		/* adjust for message size at word boundary */
+		temp = (msgsize + MQ_ALIGNSIZE - 1) & ~(MQ_ALIGNSIZE - 1);
+
+		total_size = sizeof (mqhdr_t) +
+			maxmsg * (temp + sizeof (msghdr_t)) +
+			2 * _MQ_PRIO_MAX * sizeof (uint64_t);
+
+		if (total_size > SSIZE_MAX) {
+			errno = ENOSPC;
+			goto out;
+		}
+
+		/*
+		 * data file is opened with read/write to those
+		 * who have read or write permission
+		 */
+		mode = mode | (mode & 0444) >> 1 | (mode & 0222) << 1;
+		if ((fd = __pos4obj_open(path, MQ_DATA_TYPE,
+		    (O_RDWR|O_CREAT|O_EXCL), mode, &err)) < 0)
+			goto out;
+
+		cr_flag |= DFILE_CREATE | DFILE_OPEN;
+
+		/* force permissions to avoid umask effect */
+		if (fchmod(fd, mode) < 0)
+			goto out;
+
+		if (ftruncate64(fd, (off64_t)total_size) < 0)
+			goto out;
+	} else {
+		if ((fd = __pos4obj_open(path, MQ_DATA_TYPE,
+		    O_RDWR, 0666, &err)) < 0)
+			goto out;
+		cr_flag = DFILE_OPEN;
+
+		/* Message queue has not been initialized yet */
+		if (read(fd, &total_size, sizeof (total_size)) !=
+		    sizeof (total_size) || total_size == 0) {
+			errno = ENOENT;
+			goto out;
+		}
+
+		/* Message queue too big for this process to handle */
+		if (total_size > SSIZE_MAX) {
+			errno = EFBIG;
+			goto out;
+		}
+	}
+
+	if ((mqdp = (mqdes_t *)malloc(sizeof (mqdes_t))) == NULL) {
+		errno = ENOMEM;
+		goto out;
+	}
+	cr_flag |= ALLOC_MEM;
+
+	if ((ptr = mmap64(NULL, total_size, PROT_READ|PROT_WRITE,
+	    MAP_SHARED, fd, (off64_t)0)) == MAP_FAILED)
+		goto out;
+	mqhp = ptr;
+	cr_flag |= DFILE_MMAP;
+
+	/* closing data file */
+	(void) __close_nc(fd);
+	cr_flag &= ~DFILE_OPEN;
+
+	/*
+	 * create, unlink, size, mmap, and close description file
+	 * all for a flag word in anonymous shared memory
+	 */
+	if ((fd = __pos4obj_open(path, MQ_DSCN_TYPE, O_RDWR | O_CREAT,
+	    0666, &err)) < 0)
+		goto out;
+	cr_flag |= DFILE_OPEN;
+	(void) __pos4obj_unlink(path, MQ_DSCN_TYPE);
+	if (ftruncate64(fd, (off64_t)sizeof (struct mq_dn)) < 0)
+		goto out;
+
+	if ((ptr = mmap64(NULL, sizeof (struct mq_dn),
+	    PROT_READ | PROT_WRITE, MAP_SHARED, fd, (off64_t)0)) == MAP_FAILED)
+		goto out;
+	mqdnp = ptr;
+	cr_flag |= MQDNP_MMAP;
+
+	(void) __close_nc(fd);
+	cr_flag &= ~DFILE_OPEN;
+
+	/*
+	 * we follow the same strategy as filesystem open() routine,
+	 * where fcntl.h flags are changed to flags defined in file.h.
+	 */
+	mqdp->mqd_flags = (oflag - FOPEN) & (FREAD|FWRITE);
+	mqdnp->mqdn_flags = (oflag - FOPEN) & (FNONBLOCK);
+
+	/* new message queue requires initialization */
+	if ((cr_flag & DFILE_CREATE) != 0) {
+		/* message queue header has to be initialized */
+		mq_init(mqhp, msgsize, maxmsg);
+		mqhp->mq_totsize = total_size;
+	}
+	mqdp->mqd_mq = mqhp;
+	mqdp->mqd_mqdn = mqdnp;
+	mqdp->mqd_magic = MQ_MAGIC;
+	mqdp->mqd_tcd = NULL;
+	if (__pos4obj_unlock(path, MQ_LOCK_TYPE) == 0) {
+		lmutex_lock(&mq_list_lock);
+		mqdp->mqd_next = mq_list;
+		mqdp->mqd_prev = NULL;
+		if (mq_list)
+			mq_list->mqd_prev = mqdp;
+		mq_list = mqdp;
+		lmutex_unlock(&mq_list_lock);
+		return ((mqd_t)mqdp);
+	}
+
+	locked = 0;	/* fall into the error case */
+out:
+	err = errno;
+	if ((cr_flag & DFILE_OPEN) != 0)
+		(void) __close_nc(fd);
+	if ((cr_flag & DFILE_CREATE) != 0)
+		(void) __pos4obj_unlink(path, MQ_DATA_TYPE);
+	if ((cr_flag & PFILE_CREATE) != 0)
+		(void) __pos4obj_unlink(path, MQ_PERM_TYPE);
+	if ((cr_flag & ALLOC_MEM) != 0)
+		free((void *)mqdp);
+	if ((cr_flag & DFILE_MMAP) != 0)
+		(void) munmap((caddr_t)mqhp, (size_t)total_size);
+	if ((cr_flag & MQDNP_MMAP) != 0)
+		(void) munmap((caddr_t)mqdnp, sizeof (struct mq_dn));
+	if (locked)
+		(void) __pos4obj_unlock(path, MQ_LOCK_TYPE);
+	errno = err;
+	return ((mqd_t)-1);
+}
+
+static void
+mq_close_cleanup(mqdes_t *mqdp)
+{
+	mqhdr_t *mqhp = mqdp->mqd_mq;
+	struct mq_dn *mqdnp = mqdp->mqd_mqdn;
+
+	/* invalidate the descriptor before freeing it */
+	mqdp->mqd_magic = 0;
+	(void) mutex_unlock(&mqhp->mq_exclusive);
+
+	lmutex_lock(&mq_list_lock);
+	if (mqdp->mqd_next)
+		mqdp->mqd_next->mqd_prev = mqdp->mqd_prev;
+	if (mqdp->mqd_prev)
+		mqdp->mqd_prev->mqd_next = mqdp->mqd_next;
+	if (mq_list == mqdp)
+		mq_list = mqdp->mqd_next;
+	lmutex_unlock(&mq_list_lock);
+
+	free(mqdp);
+	(void) munmap((caddr_t)mqdnp, sizeof (struct mq_dn));
+	(void) munmap((caddr_t)mqhp, (size_t)mqhp->mq_totsize);
+}
+
+int
+_mq_close(mqd_t mqdes)
+{
+	mqdes_t *mqdp = (mqdes_t *)mqdes;
+	mqhdr_t *mqhp;
+	thread_communication_data_t *tcdp;
+
+	if (!mq_is_valid(mqdp)) {
+		errno = EBADF;
+		return (-1);
+	}
+
+	mqhp = mqdp->mqd_mq;
+	(void) mutex_lock(&mqhp->mq_exclusive);
+
+	if (mqhp->mq_des == (uintptr_t)mqdp &&
+	    mqhp->mq_sigid.sn_pid == getpid()) {
+		/* notification is set for this descriptor, remove it */
+		(void) __signotify(SN_CANCEL, NULL, &mqhp->mq_sigid);
+		mqhp->mq_ntype = 0;
+		mqhp->mq_des = 0;
+	}
+
+	pthread_cleanup_push(mq_close_cleanup, mqdp);
+	if ((tcdp = mqdp->mqd_tcd) != NULL) {
+		mqdp->mqd_tcd = NULL;
+		del_sigev_mq(tcdp);	/* possible cancellation point */
+	}
+	pthread_cleanup_pop(1);		/* finish in the cleanup handler */
+
+	return (0);
+}
+
+int
+_mq_unlink(const char *path)
+{
+	int err;
+
+	if (__pos4obj_check(path) < 0)
+		return (-1);
+
+	if (__pos4obj_lock(path, MQ_LOCK_TYPE) < 0) {
+		return (-1);
+	}
+
+	err = __pos4obj_unlink(path, MQ_PERM_TYPE);
+
+	if (err == 0 || (err == -1 && errno == EEXIST)) {
+		errno = 0;
+		err = __pos4obj_unlink(path, MQ_DATA_TYPE);
+	}
+
+	if (__pos4obj_unlock(path, MQ_LOCK_TYPE) < 0)
+		return (-1);
+
+	return (err);
+
+}
+
+static int
+__mq_timedsend(mqd_t mqdes, const char *msg_ptr, size_t msg_len,
+	uint_t msg_prio, const timespec_t *timeout, int abs_rel)
+{
+	mqdes_t *mqdp = (mqdes_t *)mqdes;
+	mqhdr_t *mqhp;
+	int err;
+	int notify = 0;
+
+	/*
+	 * sem_*wait() does cancellation, if called.
+	 * pthread_testcancel() ensures that cancellation takes place if
+	 * there is a cancellation pending when mq_*send() is called.
+	 */
+	pthread_testcancel();
+
+	if (!mq_is_valid(mqdp) || (mqdp->mqd_flags & FWRITE) == 0) {
+		errno = EBADF;
+		return (-1);
+	}
+
+	mqhp = mqdp->mqd_mq;
+
+	if (msg_prio >= mqhp->mq_maxprio) {
+		errno = EINVAL;
+		return (-1);
+	}
+	if (msg_len > mqhp->mq_maxsz) {
+		errno = EMSGSIZE;
+		return (-1);
+	}
+
+	if (mqdp->mqd_mqdn->mqdn_flags & O_NONBLOCK)
+		err = sem_trywait(&mqhp->mq_notfull);
+	else {
+		/*
+		 * We might get cancelled here...
+		 */
+		if (timeout == NULL)
+			err = sem_wait(&mqhp->mq_notfull);
+		else if (abs_rel == ABS_TIME)
+			err = sem_timedwait(&mqhp->mq_notfull, timeout);
+		else
+			err = sem_reltimedwait_np(&mqhp->mq_notfull, timeout);
+	}
+	if (err == -1) {
+		/*
+		 * errno has been set to EAGAIN / EINTR / ETIMEDOUT
+		 * by sem_*wait(), so we can just return.
+		 */
+		return (-1);
+	}
+
+	/*
+	 * By the time we're here, we know that we've got the capacity
+	 * to add to the queue...now acquire the exclusive lock.
+	 */
+	(void) mutex_lock(&mqhp->mq_exclusive);
+
+	/*
+	 * Now determine if we want to kick the notification.  POSIX
+	 * requires that if a process has registered for notification,
+	 * we must kick it when the queue makes an empty to non-empty
+	 * transition, and there are no blocked receivers.  Note that
+	 * this mechanism does _not_ guarantee that the kicked process
+	 * will be able to receive a message without blocking;
+	 * another receiver could intervene in the meantime.  Thus,
+	 * the notification mechanism is inherently racy; all we can
+	 * do is hope to minimize the window as much as possible.
+	 * In general, we want to avoid kicking the notification when
+	 * there are clearly receivers blocked.  We'll determine if
+	 * we want to kick the notification before the mq_putmsg(),
+	 * but the actual signotify() won't be done until the message
+	 * is on the queue.
+	 */
+	if (mqhp->mq_sigid.sn_pid != 0) {
+		int nmessages, nblocked;
+
+		(void) sem_getvalue(&mqhp->mq_notempty, &nmessages);
+		(void) sem_getvalue(&mqhp->mq_rblocked, &nblocked);
+
+		if (nmessages == 0 && nblocked == 0)
+			notify = 1;
+	}
+
+	mq_putmsg(mqhp, msg_ptr, (ssize_t)msg_len, msg_prio);
+	(void) sem_post(&mqhp->mq_notempty);
+
+	if (notify) {
+		/* notify and also delete the registration */
+		(void) __signotify(SN_SEND, NULL, &mqhp->mq_sigid);
+		if (mqhp->mq_ntype == SIGEV_THREAD ||
+		    mqhp->mq_ntype == SIGEV_PORT)
+			(void) sem_post(&mqhp->mq_spawner);
+		mqhp->mq_ntype = 0;
+		mqhp->mq_des = 0;
+	}
+
+	MQ_ASSERT_SEMVAL_LEQ(&mqhp->mq_notempty, ((int)mqhp->mq_maxmsg));
+	(void) mutex_unlock(&mqhp->mq_exclusive);
+
+	return (0);
+}
+
+int
+_mq_send(mqd_t mqdes, const char *msg_ptr, size_t msg_len, uint_t msg_prio)
+{
+	return (__mq_timedsend(mqdes, msg_ptr, msg_len, msg_prio,
+		NULL, ABS_TIME));
+}
+
+int
+_mq_timedsend(mqd_t mqdes, const char *msg_ptr, size_t msg_len,
+	uint_t msg_prio, const timespec_t *abs_timeout)
+{
+	return (__mq_timedsend(mqdes, msg_ptr, msg_len, msg_prio,
+		abs_timeout, ABS_TIME));
+}
+
+int
+_mq_reltimedsend_np(mqd_t mqdes, const char *msg_ptr, size_t msg_len,
+	uint_t msg_prio, const timespec_t *rel_timeout)
+{
+	return (__mq_timedsend(mqdes, msg_ptr, msg_len, msg_prio,
+		rel_timeout, REL_TIME));
+}
+
+static void
+decrement_rblocked(mqhdr_t *mqhp)
+{
+	int canstate;
+
+	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &canstate);
+	while (sem_wait(&mqhp->mq_rblocked) == -1)
+		continue;
+	(void) pthread_setcancelstate(canstate, NULL);
+}
+
+static ssize_t
+__mq_timedreceive(mqd_t mqdes, char *msg_ptr, size_t msg_len,
+	uint_t *msg_prio, const timespec_t *timeout, int abs_rel)
+{
+	mqdes_t *mqdp = (mqdes_t *)mqdes;
+	mqhdr_t *mqhp;
+	ssize_t	msg_size;
+	int err;
+
+	/*
+	 * sem_*wait() does cancellation, if called.
+	 * pthread_testcancel() ensures that cancellation takes place if
+	 * there is a cancellation pending when mq_*receive() is called.
+	 */
+	pthread_testcancel();
+
+	if (!mq_is_valid(mqdp) || (mqdp->mqd_flags & FREAD) == 0) {
+		errno = EBADF;
+		return (ssize_t)(-1);
+	}
+
+	mqhp = mqdp->mqd_mq;
+
+	if (msg_len < mqhp->mq_maxsz) {
+		errno = EMSGSIZE;
+		return (ssize_t)(-1);
+	}
+
+	/*
+	 * The semaphoring scheme for mq_[timed]receive is a little hairy
+	 * thanks to POSIX.1b's arcane notification mechanism.  First,
+	 * we try to take the common case and do a sem_trywait().
+	 * If that doesn't work, and O_NONBLOCK hasn't been set,
+	 * then note that we're going to sleep by incrementing the rblocked
+	 * semaphore.  We decrement that semaphore after waking up.
+	 */
+	if (sem_trywait(&mqhp->mq_notempty) == -1) {
+		if ((mqdp->mqd_mqdn->mqdn_flags & O_NONBLOCK) != 0) {
+			/*
+			 * errno has been set to EAGAIN or EINTR by
+			 * sem_trywait(), so we can just return.
+			 */
+			return (-1);
+		}
+		/*
+		 * If we're here, then we're probably going to block...
+		 * increment the rblocked semaphore.  If we get
+		 * cancelled, decrement_rblocked() will decrement it.
+		 */
+		(void) sem_post(&mqhp->mq_rblocked);
+
+		pthread_cleanup_push(decrement_rblocked, mqhp);
+		if (timeout == NULL)
+			err = sem_wait(&mqhp->mq_notempty);
+		else if (abs_rel == ABS_TIME)
+			err = sem_timedwait(&mqhp->mq_notempty, timeout);
+		else
+			err = sem_reltimedwait_np(&mqhp->mq_notempty, timeout);
+		pthread_cleanup_pop(1);
+
+		if (err == -1) {
+			/*
+			 * We took a signal or timeout while waiting
+			 * on mq_notempty...
+			 */
+			return (-1);
+		}
+	}
+
+	(void) mutex_lock(&mqhp->mq_exclusive);
+	msg_size = mq_getmsg(mqhp, msg_ptr, msg_prio);
+	(void) sem_post(&mqhp->mq_notfull);
+	MQ_ASSERT_SEMVAL_LEQ(&mqhp->mq_notfull, ((int)mqhp->mq_maxmsg));
+	(void) mutex_unlock(&mqhp->mq_exclusive);
+
+	return (msg_size);
+}
+
+ssize_t
+_mq_receive(mqd_t mqdes, char *msg_ptr, size_t msg_len, uint_t *msg_prio)
+{
+	return (__mq_timedreceive(mqdes, msg_ptr, msg_len, msg_prio,
+		NULL, ABS_TIME));
+}
+
+ssize_t
+_mq_timedreceive(mqd_t mqdes, char *msg_ptr, size_t msg_len,
+	uint_t *msg_prio, const timespec_t *abs_timeout)
+{
+	return (__mq_timedreceive(mqdes, msg_ptr, msg_len, msg_prio,
+		abs_timeout, ABS_TIME));
+}
+
+ssize_t
+_mq_reltimedreceive_np(mqd_t mqdes, char *msg_ptr, size_t msg_len,
+	uint_t *msg_prio, const timespec_t *rel_timeout)
+{
+	return (__mq_timedreceive(mqdes, msg_ptr, msg_len, msg_prio,
+		rel_timeout, REL_TIME));
+}
+
+/*
+ * Only used below, in _mq_notify().
+ * We already have a spawner thread.
+ * Verify that the attributes match; cancel it if necessary.
+ */
+static int
+cancel_if_necessary(thread_communication_data_t *tcdp,
+	const struct sigevent *sigevp)
+{
+	int do_cancel = !_pthread_attr_equal(tcdp->tcd_attrp,
+			    sigevp->sigev_notify_attributes);
+
+	if (do_cancel) {
+		/*
+		 * Attributes don't match, cancel the spawner thread.
+		 */
+		(void) pthread_cancel(tcdp->tcd_server_id);
+	} else {
+		/*
+		 * Reuse the existing spawner thread with possibly
+		 * changed notification function and value.
+		 */
+		tcdp->tcd_notif.sigev_notify = SIGEV_THREAD;
+		tcdp->tcd_notif.sigev_signo = 0;
+		tcdp->tcd_notif.sigev_value = sigevp->sigev_value;
+		tcdp->tcd_notif.sigev_notify_function =
+			sigevp->sigev_notify_function;
+	}
+
+	return (do_cancel);
+}
+
+int
+_mq_notify(mqd_t mqdes, const struct sigevent *sigevp)
+{
+	mqdes_t *mqdp = (mqdes_t *)mqdes;
+	mqhdr_t *mqhp;
+	thread_communication_data_t *tcdp;
+	siginfo_t mq_siginfo;
+	struct sigevent sigevent;
+	struct stat64 statb;
+	port_notify_t *pn;
+	void *userval;
+	int rval = -1;
+	int ntype;
+	int port;
+
+	if (!mq_is_valid(mqdp)) {
+		errno = EBADF;
+		return (-1);
+	}
+
+	mqhp = mqdp->mqd_mq;
+
+	(void) mutex_lock(&mqhp->mq_exclusive);
+
+	if (sigevp == NULL) {		/* remove notification */
+		if (mqhp->mq_des == (uintptr_t)mqdp &&
+		    mqhp->mq_sigid.sn_pid == getpid()) {
+			/* notification is set for this descriptor, remove it */
+			(void) __signotify(SN_CANCEL, NULL, &mqhp->mq_sigid);
+			if ((tcdp = mqdp->mqd_tcd) != NULL) {
+				sig_mutex_lock(&tcdp->tcd_lock);
+				if (tcdp->tcd_msg_enabled) {
+					/* cancel the spawner thread */
+					tcdp = mqdp->mqd_tcd;
+					mqdp->mqd_tcd = NULL;
+					(void) pthread_cancel(
+					    tcdp->tcd_server_id);
+				}
+				sig_mutex_unlock(&tcdp->tcd_lock);
+			}
+			mqhp->mq_ntype = 0;
+			mqhp->mq_des = 0;
+		} else {
+			/* notification is not set for this descriptor */
+			errno = EBUSY;
+			goto bad;
+		}
+	} else {		/* register notification with this process */
+		switch (ntype = sigevp->sigev_notify) {
+		case SIGEV_THREAD:
+			userval = sigevp->sigev_value.sival_ptr;
+			port = -1;
+			break;
+		case SIGEV_PORT:
+			pn = sigevp->sigev_value.sival_ptr;
+			userval = pn->portnfy_user;
+			port = pn->portnfy_port;
+			if (fstat64(port, &statb) != 0 ||
+			    !S_ISPORT(statb.st_mode)) {
+				errno = EBADF;
+				goto bad;
+			}
+			(void) memset(&sigevent, 0, sizeof (sigevent));
+			sigevent.sigev_notify = SIGEV_PORT;
+			sigevp = &sigevent;
+			break;
+		}
+		switch (ntype) {
+		case SIGEV_NONE:
+			mq_siginfo.si_signo = 0;
+			mq_siginfo.si_code = SI_MESGQ;
+			break;
+		case SIGEV_SIGNAL:
+			mq_siginfo.si_signo = sigevp->sigev_signo;
+			mq_siginfo.si_value = sigevp->sigev_value;
+			mq_siginfo.si_code = SI_MESGQ;
+			break;
+		case SIGEV_THREAD:
+			if ((tcdp = mqdp->mqd_tcd) != NULL &&
+			    cancel_if_necessary(tcdp, sigevp))
+				mqdp->mqd_tcd = NULL;
+			/* FALLTHROUGH */
+		case SIGEV_PORT:
+			if ((tcdp = mqdp->mqd_tcd) == NULL) {
+				/* we must create a spawner thread */
+				tcdp = setup_sigev_handler(sigevp, MQ);
+				if (tcdp == NULL) {
+					errno = EBADF;
+					goto bad;
+				}
+				tcdp->tcd_msg_enabled = 0;
+				tcdp->tcd_msg_closing = 0;
+				tcdp->tcd_msg_avail = &mqhp->mq_spawner;
+				if (launch_spawner(tcdp) != 0) {
+					free_sigev_handler(tcdp);
+					goto bad;
+				}
+				mqdp->mqd_tcd = tcdp;
+			}
+			mq_siginfo.si_signo = 0;
+			mq_siginfo.si_code = SI_MESGQ;
+			break;
+		default:
+			errno = EINVAL;
+			goto bad;
+		}
+
+		/* register notification */
+		if (__signotify(SN_PROC, &mq_siginfo, &mqhp->mq_sigid) < 0)
+			goto bad;
+		mqhp->mq_ntype = ntype;
+		mqhp->mq_des = (uintptr_t)mqdp;
+		switch (ntype) {
+		case SIGEV_THREAD:
+		case SIGEV_PORT:
+			tcdp->tcd_port = port;
+			tcdp->tcd_msg_object = mqdp;
+			tcdp->tcd_msg_userval = userval;
+			sig_mutex_lock(&tcdp->tcd_lock);
+			tcdp->tcd_msg_enabled = ntype;
+			sig_mutex_unlock(&tcdp->tcd_lock);
+			(void) cond_broadcast(&tcdp->tcd_cv);
+			break;
+		}
+	}
+
+	rval = 0;	/* success */
+bad:
+	(void) mutex_unlock(&mqhp->mq_exclusive);
+	return (rval);
+}
+
+int
+_mq_setattr(mqd_t mqdes, const struct mq_attr *mqstat, struct mq_attr *omqstat)
+{
+	mqdes_t *mqdp = (mqdes_t *)mqdes;
+	mqhdr_t *mqhp;
+	uint_t	flag = 0;
+
+	if (!mq_is_valid(mqdp)) {
+		errno = EBADF;
+		return (-1);
+	}
+
+	/* store current attributes */
+	if (omqstat != NULL) {
+		int	count;
+
+		mqhp = mqdp->mqd_mq;
+		omqstat->mq_flags = mqdp->mqd_mqdn->mqdn_flags;
+		omqstat->mq_maxmsg = (long)mqhp->mq_maxmsg;
+		omqstat->mq_msgsize = (long)mqhp->mq_maxsz;
+		(void) sem_getvalue(&mqhp->mq_notempty, &count);
+		omqstat->mq_curmsgs = count;
+	}
+
+	/* set description attributes */
+	if ((mqstat->mq_flags & O_NONBLOCK) != 0)
+		flag = FNONBLOCK;
+	mqdp->mqd_mqdn->mqdn_flags = flag;
+
+	return (0);
+}
+
+int
+_mq_getattr(mqd_t mqdes, struct mq_attr *mqstat)
+{
+	mqdes_t *mqdp = (mqdes_t *)mqdes;
+	mqhdr_t *mqhp;
+	int count;
+
+	if (!mq_is_valid(mqdp)) {
+		errno = EBADF;
+		return (-1);
+	}
+
+	mqhp = mqdp->mqd_mq;
+
+	mqstat->mq_flags = mqdp->mqd_mqdn->mqdn_flags;
+	mqstat->mq_maxmsg = (long)mqhp->mq_maxmsg;
+	mqstat->mq_msgsize = (long)mqhp->mq_maxsz;
+	(void) sem_getvalue(&mqhp->mq_notempty, &count);
+	mqstat->mq_curmsgs = count;
+	return (0);
+}
+
+/*
+ * Cleanup after fork1() in the child process.
+ */
+void
+postfork1_child_sigev_mq(void)
+{
+	thread_communication_data_t *tcdp;
+	mqdes_t *mqdp;
+
+	for (mqdp = mq_list; mqdp; mqdp = mqdp->mqd_next) {
+		if ((tcdp = mqdp->mqd_tcd) != NULL) {
+			mqdp->mqd_tcd = NULL;
+			tcd_teardown(tcdp);
+		}
+	}
+}
diff --git a/usr/src/lib/libc/port/rt/pos4obj.c b/usr/src/lib/libc/port/rt/pos4obj.c
new file mode 100644
index 0000000000..86f5a07595
--- /dev/null
+++ b/usr/src/lib/libc/port/rt/pos4obj.c
@@ -0,0 +1,482 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "synonyms.h"
+#include "mtlib.h"
+#include <sys/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <pthread.h>
+#include <thread.h>
+#include <string.h>
+#include <dirent.h>
+#include <stdio.h>
+#include <dlfcn.h>
+#include <md5.h>
+#include "pos4obj.h"
+
+#define	HASHSTRLEN	32
+
+static	char	*__pos4obj_name(const char *, const char *);
+static	void	__pos4obj_md5toa(unsigned char *, unsigned char *);
+static	void	__pos4obj_clean(char *);
+
+static	char	objroot[] = "/tmp/";
+static	long int	name_max = 0;
+
+int
+__open_nc(const char *path, int oflag, mode_t mode)
+{
+	int		canstate, val;
+	struct stat64	statbuf;
+
+	/*
+	 * Ensure path is not a symlink to somewhere else. This provides
+	 * a modest amount of protection against easy security attacks.
+	 */
+	if (lstat64(path, &statbuf) == 0) {
+		if (S_ISLNK(statbuf.st_mode)) {
+			errno = EINVAL;
+			return (-1);
+		}
+	}
+
+	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &canstate);
+	val = open64(path, oflag, mode);
+	(void) pthread_setcancelstate(canstate, &canstate);
+
+	return (val);
+}
+
+int
+__close_nc(int fildes)
+{
+	int	canstate, val;
+
+	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &canstate);
+	val = close(fildes);
+	(void) pthread_setcancelstate(canstate, &canstate);
+
+	return (val);
+}
+
+/*
+ * This is to avoid loading libmd.so.1 unless we absolutely have to.
+ */
+typedef void (*md5_calc_t)(unsigned char *, unsigned char *, unsigned int);
+static void *md5_handle = NULL;
+static md5_calc_t real_md5_calc = NULL;
+static mutex_t md5_lock = DEFAULTMUTEX;
+
+static void
+load_md5_calc(void)
+{
+	lmutex_lock(&md5_lock);
+	if (real_md5_calc == NULL) {
+		md5_handle = dlopen("libmd.so.1", RTLD_LAZY);
+		if (md5_handle == NULL)
+			real_md5_calc = (md5_calc_t)(-1);
+		else {
+			real_md5_calc =
+			    (md5_calc_t)dlsym(md5_handle, "md5_calc");
+			if (real_md5_calc == NULL) {
+				(void) dlclose(md5_handle);
+				md5_handle = NULL;
+				real_md5_calc = (md5_calc_t)(-1);
+			}
+		}
+	}
+	lmutex_unlock(&md5_lock);
+}
+
+static char *
+__pos4obj_name(const char *path, const char *type)
+{
+	int	shortpath = 1;
+	int	olderrno;
+	size_t	len;
+	char	*dfile;
+	unsigned char	hashbuf[HASHSTRLEN + 1];
+	unsigned char	md5_digest[MD5_DIGEST_LENGTH];
+
+	/*
+	 * If the path is path_max - strlen(type) characters or less,
+	 * the name of the file to use will be the path prefixed by
+	 * the type.
+	 *
+	 * In the special case where the path is longer than
+	 * path_max - strlen(type) characters, we create a string based on the
+	 * MD5 hash of the path. We prefix that string with a '.' to
+	 * make it obscure, and create a directory in objroot with
+	 * that name. In that directory, we create a directory named
+	 * after the type of object requested.  Inside the type
+	 * directory, the filename will be the path of the object. This
+	 * prevents collisions in all namespaces.
+	 *
+	 * Example:
+	 * Let objroot = "/tmp/", path = "/<longpath>", and type = ".MQD"
+	 * Let the MD5 hash of "<longpath>" = "<hash>"
+	 *
+	 * The desired file is /tmp/.<hash>/.MQD/<longpath>
+	 */
+
+	/*
+	 * Do not include the leading '/' in the path length.
+	 * Assumes __pos4obj_check(path) has already been called.
+	 */
+	if ((strlen(path) - 1) > (name_max - strlen(type)))
+		shortpath = 0;
+
+	if (shortpath) {
+		/*
+		 * strlen(path) includes leading slash as space for NUL.
+		 */
+		len = strlen(objroot) + strlen(type) + strlen(path);
+	} else {
+		/*
+		 * Long path name. Add 3 for extra '/', '.' and '\0'
+		 */
+		len = strlen(objroot) + HASHSTRLEN + strlen(type) +
+		    strlen(path) + 3;
+	}
+
+	if ((dfile = malloc(len)) == NULL)
+		return (NULL);
+
+	(void) memset(dfile, 0, len);
+	(void) strcpy(dfile, objroot);
+
+	if (shortpath) {
+		(void) strcat(dfile, type);
+		(void) strcat(dfile, path + 1);
+		return (dfile);
+	}
+
+	/*
+	 * If we can successfully load it, call md5_calc().
+	 * Otherwise, (this "can't happen") return NULL.
+	 */
+	if (real_md5_calc == NULL)
+		load_md5_calc();
+	if (real_md5_calc == (md5_calc_t)(-1)) {
+		free(dfile);
+		return (NULL);
+	}
+
+	real_md5_calc(md5_digest, (unsigned char *)path + 1, strlen(path + 1));
+	__pos4obj_md5toa(hashbuf, md5_digest);
+	(void) strcat(dfile, ".");
+	(void) strcat(dfile, (const char *)hashbuf);
+
+	/*
+	 * Errno must be preserved across the following calls to
+	 * mkdir.  This needs to be done to prevent incorrect error
+	 * reporting in certain cases. When we attempt to open a
+	 * non-existent object without the O_CREAT flag, it will
+	 * always create a lock file first.  The lock file is created
+	 * and then the open is attempted, but fails with ENOENT. The
+	 * lock file is then destroyed. In the following code path, we
+	 * are finding the absolute path to the lock file after
+	 * already having attempted the open (which set errno to
+	 * ENOENT). The following calls to mkdir will return -1 and
+	 * set errno to EEXIST, since the hash and type directories
+	 * were created when the lock file was created. The correct
+	 * errno is the ENOENT from the attempted open of the desired
+	 * object.
+	 */
+	olderrno = errno;
+
+	/*
+	 * Create hash directory. Use 777 permissions so everyone can use it.
+	 */
+	if (mkdir(dfile, S_IRWXU|S_IRWXG|S_IRWXO) == 0) {
+		if (chmod(dfile, S_IRWXU|S_IRWXG|S_IRWXO) == -1) {
+			free(dfile);
+			return (NULL);
+		}
+	} else {
+		if (errno != EEXIST) {
+			free(dfile);
+			return (NULL);
+		}
+	}
+
+	(void) strcat(dfile, "/");
+	(void) strcat(dfile, type);
+
+	/*
+	 * Create directory for requested type. Use 777 perms so everyone
+	 * can use it.
+	 */
+	if (mkdir(dfile, S_IRWXU|S_IRWXG|S_IRWXO) == 0) {
+		if (chmod(dfile, S_IRWXU|S_IRWXG|S_IRWXO) == -1) {
+			free(dfile);
+			return (NULL);
+		}
+	} else {
+		if (errno != EEXIST) {
+			free(dfile);
+			return (NULL);
+		}
+	}
+
+	errno = olderrno;
+	(void) strcat(dfile, path);
+	return (dfile);
+}
+
+/*
+ * Takes a 128-bit MD5 digest and transforms to a sequence of 32 ASCII
+ * characters. Output is the hexadecimal representation of the digest.
+ *
+ * The output buffer must be at least HASHSTRLEN + 1 characters
+ * long.  HASHSTRLEN is the size of the MD5 digest (128 bits)
+ * divided by the number of bits used per char of output (4). The
+ * extra character at the end is for the NUL terminating character.
+ */
+
+static void
+__pos4obj_md5toa(unsigned char *dest, unsigned char *src)
+{
+	int i;
+	uint32_t *p;
+
+	/* LINTED pointer cast may result in improper alignment */
+	p = (uint32_t *)src;
+
+	for (i = 0; i < (MD5_DIGEST_LENGTH / 4); i++)
+		(void) snprintf((char *)dest + (i * 8), 9, "%.8x", *p++);
+
+	dest[HASHSTRLEN] = '\0';
+}
+
+/*
+ * This open function assume that there is no simultaneous
+ * open/unlink operation is going on. The caller is supposed
+ * to ensure that both open in O_CREAT mode happen atomically.
+ * It returns the crflag as 1 if file is created else 0.
+ */
+int
+__pos4obj_open(const char *name, char *type, int oflag,
+		mode_t mode, int *crflag)
+{
+	int fd;
+	char *dfile;
+
+	errno = 0;
+	*crflag = 0;
+
+	if ((dfile = __pos4obj_name(name, type)) == NULL) {
+		return (-1);
+	}
+
+	if (!(oflag & O_CREAT)) {
+		if ((fd = __open_nc(dfile, oflag, mode)) == -1)
+			__pos4obj_clean(dfile);
+
+		free(dfile);
+		return (fd);
+	}
+
+	/*
+	 * We need to make sure that crflag is set iff we actually create
+	 * the file.  We do this by or'ing in O_EXCL, and attempting an
+	 * open.  If that fails with an EEXIST, and O_EXCL wasn't specified
+	 * by the caller, then the file seems to exist;  we'll try an
+	 * open with O_CREAT cleared.  If that succeeds, then the file
+	 * did indeed exist.  If that fails with an ENOENT, however, the
+	 * file was removed between the opens;  we need to take another
+	 * lap.
+	 */
+	for (;;) {
+		if ((fd = __open_nc(dfile, (oflag | O_EXCL), mode)) == -1) {
+			if (errno == EEXIST && !(oflag & O_EXCL)) {
+				fd = __open_nc(dfile, oflag & ~O_CREAT, mode);
+
+				if (fd == -1 && errno == ENOENT)
+					continue;
+				break;
+			}
+		} else {
+			*crflag = 1;
+		}
+		break;
+	}
+
+	free(dfile);
+	return (fd);
+}
+
+
+int
+__pos4obj_unlink(const char *name, const char *type)
+{
+	int	err;
+	char	*dfile;
+
+	if ((dfile = __pos4obj_name(name, type)) == NULL) {
+		return (-1);
+	}
+
+	err = unlink(dfile);
+
+	__pos4obj_clean(dfile);
+
+	free(dfile);
+
+	return (err);
+}
+
+/*
+ * This function opens the lock file for each named object
+ * the presence of this file in the file system is the lock
+ */
+int
+__pos4obj_lock(const char *name, const char *ltype)
+{
+	char	*dfile;
+	int	fd;
+	int	limit = 64;
+
+	if ((dfile = __pos4obj_name(name, ltype)) == NULL) {
+		return (-1);
+	}
+
+	while (limit-- > 0) {
+		if ((fd = __open_nc(dfile, O_RDWR | O_CREAT | O_EXCL, 0666))
+		    < 0) {
+			if (errno != EEXIST)
+				break;
+			(void) sleep(1);
+			continue;
+		}
+
+		(void) __close_nc(fd);
+		free(dfile);
+		return (1);
+	}
+
+	free(dfile);
+	return (-1);
+}
+
+/*
+ * Unlocks the file by unlinking it from the filesystem
+ */
+int
+__pos4obj_unlock(const char *path, const char *type)
+{
+	return (__pos4obj_unlink(path, type));
+}
+
+/*
+ * Removes unused hash and type directories that may exist in specified path.
+ */
+static void
+__pos4obj_clean(char *path)
+{
+	char	*p;
+	int	olderrno;
+
+	/*
+	 * path is either
+	 * 1) /<objroot>/<type><path>  or
+	 * 2) /<objroot>/.<hash>/<type>/<path>
+	 *
+	 * In case 1, there is nothing to clean.
+	 *
+	 * Detect case 2 by looking for a '/' after /objroot/ and
+	 * remove the two trailing directories, if empty.
+	 */
+	if (strchr(path + strlen(objroot), '/') == NULL)
+		return;
+
+	/*
+	 * Preserve errno across calls to rmdir. See block comment in
+	 * __pos4obj_name() for explanation.
+	 */
+	olderrno = errno;
+
+	if ((p = strrchr(path, '/')) == NULL)
+		return;
+	*p = '\0';
+
+	(void) rmdir(path);
+
+	if ((p = strrchr(path, '/')) == NULL)
+		return;
+	*p = '\0';
+
+	(void) rmdir(path);
+
+	errno = olderrno;
+}
+
+
+/*
+ * Check that path starts with a /, does not contain a / within it
+ * and is not longer than PATH_MAX or NAME_MAX
+ */
+int
+__pos4obj_check(const char *path)
+{
+	long int	i;
+
+	/*
+	 * This assumes that __pos4obj_check() is called before
+	 * any of the other functions in this file
+	 */
+	if (name_max == 0 || name_max == -1) {
+		name_max = pathconf(objroot, _PC_NAME_MAX);
+		if (name_max == -1)
+			return (-1);
+	}
+
+	if (*path++ != '/') {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	for (i = 0; *path != '\0'; i++) {
+		if (*path++ == '/') {
+			errno = EINVAL;
+			return (-1);
+		}
+	}
+
+	if (i > PATH_MAX || i > name_max) {
+		errno = ENAMETOOLONG;
+		return (-1);
+	}
+
+	return (0);
+}
diff --git a/usr/src/lib/libc/port/rt/pos4obj.h b/usr/src/lib/libc/port/rt/pos4obj.h
new file mode 100644
index 0000000000..609a43f64c
--- /dev/null
+++ b/usr/src/lib/libc/port/rt/pos4obj.h
@@ -0,0 +1,78 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_POS4OBJ_H
+#define	_POS4OBJ_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * pos4obj.h - Header file for POSIX.4 related object names
+ */
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/* flags used to indicate current state of open */
+#define	DFILE_CREATE	0x01
+#define	DFILE_OPEN	0x02
+#define	ALLOC_MEM	0x04
+#define	DFILE_MMAP	0x08
+#define	PFILE_CREATE	0x10
+#define	NFILE_CREATE	0x20
+#define	MQDNP_MMAP	0x40
+
+/* semaphore object types - used in constructing file name */
+#define	SEM_DATA_TYPE	".SEMD"
+#define	SEM_LOCK_TYPE	".SEML"
+
+/* message queue object types - used in constructing file name */
+#define	MQ_DATA_TYPE	".MQD"
+#define	MQ_PERM_TYPE	".MQP"
+#define	MQ_DSCN_TYPE	".MQN"
+#define	MQ_LOCK_TYPE	".MQL"
+
+/* shared memory object types - used in constructing file name */
+#define	SHM_DATA_TYPE	".SHMD"
+#define	SHM_LOCK_TYPE	".SHML"
+
+/* functions defined related to object names in POSIX.4 */
+extern	int	__pos4obj_lock(const char *, const char *);
+extern	int	__pos4obj_unlock(const char *, const char *);
+extern	int	__pos4obj_unlink(const char *, const char *);
+extern	int	__pos4obj_open(const char *, char *, int, mode_t, int *);
+extern	int	__pos4obj_check(const char *);
+
+/* non-cancelable file operations */
+int	__open_nc(const char *, int, mode_t);
+int	__close_nc(int);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _POS4OBJ_H */
diff --git a/usr/src/lib/libc/port/rt/sched.c b/usr/src/lib/libc/port/rt/sched.c
new file mode 100644
index 0000000000..58b793f2e2
--- /dev/null
+++ b/usr/src/lib/libc/port/rt/sched.c
@@ -0,0 +1,552 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "synonyms.h"
+#include "mtlib.h"
+#include <sys/types.h>
+#include <sched.h>
+#include <errno.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/priocntl.h>
+#include <sys/rtpriocntl.h>
+#include <sys/tspriocntl.h>
+#include <sys/rt.h>
+#include <sys/ts.h>
+#include <thread.h>
+#include <string.h>
+#include <stdlib.h>
+#include "rtsched.h"
+
+/*
+ * The following variables are used for caching information
+ * for priocntl scheduling classes.
+ */
+struct pcclass ts_class;
+struct pcclass rt_class;
+struct pcclass ia_class;
+struct pcclass sys_class;
+
+static rtdpent_t	*rt_dptbl;	/* RT class parameter table */
+
+typedef struct { /* type definition for generic class-specific parameters */
+	int	pc_clparms[PC_CLINFOSZ];
+} pc_clparms_t;
+
+static int	map_gp_to_rtpri(pri_t);
+
+/*
+ * cache priocntl information on scheduling classes by policy
+ */
+int
+get_info_by_policy(int policy)
+{
+	char		*pccname;
+	struct pcclass	*pccp;
+
+	if (policy < 0) {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	switch (policy) {
+	case SCHED_FIFO:
+	case SCHED_RR:
+		pccp = &rt_class;
+		pccname = "RT";
+		break;
+	case SCHED_OTHER:
+		pccp = &ts_class;
+		pccname = "TS";
+		break;
+	case SCHED_SYS:
+		pccp = &sys_class;
+		pccname = "sys";
+		break;
+	case SCHED_IA:
+		pccp = &ia_class;
+		pccname = "IA";
+		break;
+	default:
+		return (policy);
+	}
+	if (pccp->pcc_state != 0) {
+		if (pccp->pcc_state < 0)
+			errno = ENOSYS;
+		return (pccp->pcc_state);
+	}
+
+	/* get class's info */
+	(void) strcpy(pccp->pcc_info.pc_clname, pccname);
+	if (policy == SCHED_SYS)
+		pccp->pcc_info.pc_cid = 0;
+	else if (priocntl(P_PID, 0, PC_GETCID, (caddr_t)&(pccp->pcc_info)) < 0)
+		return (-1);
+
+	if (policy == SCHED_FIFO || policy == SCHED_RR) {
+		pcadmin_t	pcadmin;
+		rtadmin_t	rtadmin;
+		size_t		rtdpsize;
+
+		/* get RT class dispatch table in rt_dptbl */
+		pcadmin.pc_cid = rt_class.pcc_info.pc_cid;
+		pcadmin.pc_cladmin = (caddr_t)&rtadmin;
+		rtadmin.rt_cmd = RT_GETDPSIZE;
+		if (priocntl(P_PID, 0, PC_ADMIN, (caddr_t)&pcadmin) < 0)
+			return (-1);
+		rtdpsize = (size_t)(rtadmin.rt_ndpents * sizeof (rtdpent_t));
+		if (rt_dptbl == NULL &&
+		    (rt_dptbl = lmalloc(rtdpsize)) == NULL) {
+			errno = EAGAIN;
+			return (-1);
+		}
+		rtadmin.rt_dpents = rt_dptbl;
+		rtadmin.rt_cmd = RT_GETDPTBL;
+		if (priocntl(P_PID, 0, PC_ADMIN, (caddr_t)&pcadmin) < 0)
+			return (-1);
+		pccp->pcc_primin = 0;
+		pccp->pcc_primax = ((rtinfo_t *)rt_class.pcc_info.pc_clinfo)->
+		    rt_maxpri;
+	} else if (policy == SCHED_OTHER) {
+		pri_t		prio;
+
+		prio = ((tsinfo_t *)ts_class.pcc_info.pc_clinfo)->ts_maxupri/3;
+		pccp->pcc_primin = -prio;
+		pccp->pcc_primax = prio;
+	} else {
+		/* non-RT scheduling class */
+		pcpri_t		pcpri;
+
+		/* need RT class info before we can translate priorities */
+		if (get_info_by_policy(SCHED_FIFO) < 0)
+			return (-1);
+		/*
+		 * get class's global priority's min, max, and
+		 * translate them into RT priority level (index) via rt_dptbl.
+		 */
+		pcpri.pc_cid = pccp->pcc_info.pc_cid;
+		if (priocntl(0, 0, PC_GETPRIRANGE, (caddr_t)&pcpri) < 0)
+			return (-1);
+		pccp->pcc_primax = map_gp_to_rtpri(pcpri.pc_clpmax);
+		pccp->pcc_primin = map_gp_to_rtpri(pcpri.pc_clpmin);
+	}
+
+	pccp->pcc_state = 1;
+	return (1);
+}
+
+/*
+ * Translate global scheduling priority to RT class's user priority.
+ * Use the gp values in the rt_dptbl to do a reverse mapping
+ * of a given gpri value relative to the index range of rt_dptbl.
+ */
+static int
+map_gp_to_rtpri(pri_t gpri)
+{
+	rtdpent_t	*rtdp;
+	pri_t		pri;
+
+	if (gpri <= rt_dptbl[rt_class.pcc_primin].rt_globpri) {
+		pri = gpri - rt_dptbl[rt_class.pcc_primin].rt_globpri + \
+		    rt_class.pcc_primin;
+	} else if (gpri >= rt_dptbl[rt_class.pcc_primax].rt_globpri) {
+		pri = gpri - rt_dptbl[rt_class.pcc_primax].rt_globpri + \
+		    rt_class.pcc_primax;
+	} else {
+		pri = rt_class.pcc_primin + 1;
+		for (rtdp = rt_dptbl+1; rtdp->rt_globpri < gpri; ++rtdp, ++pri)
+			;
+		if (rtdp->rt_globpri > gpri)
+			--pri;
+	}
+
+	return (pri);
+}
+
+/*
+ * Translate RT class's user priority to global scheduling priority.
+ */
+pri_t
+map_rtpri_to_gp(pri_t pri)
+{
+	rtdpent_t	*rtdp;
+	pri_t		gpri;
+
+	if (rt_class.pcc_state == 0)
+		(void) get_info_by_policy(SCHED_FIFO);
+
+	/* First case is the default case, other two are seldomly taken */
+	if (pri <= rt_dptbl[rt_class.pcc_primin].rt_globpri) {
+		gpri = pri + rt_dptbl[rt_class.pcc_primin].rt_globpri -
+		    rt_class.pcc_primin;
+	} else if (pri >= rt_dptbl[rt_class.pcc_primax].rt_globpri) {
+		gpri = pri + rt_dptbl[rt_class.pcc_primax].rt_globpri -
+		    rt_class.pcc_primax;
+	} else {
+		gpri =  rt_dptbl[rt_class.pcc_primin].rt_globpri + 1;
+		for (rtdp = rt_dptbl+1; rtdp->rt_globpri < pri; ++rtdp, ++gpri)
+			;
+		if (rtdp->rt_globpri > pri)
+			--gpri;
+	}
+	return (gpri);
+}
+
+static int
+get_info_by_class(id_t classid)
+{
+	pcinfo_t	pcinfo;
+
+	/* determine if we already know this classid */
+	if (rt_class.pcc_state > 0 && rt_class.pcc_info.pc_cid == classid)
+		return (1);
+	if (ts_class.pcc_state > 0 && ts_class.pcc_info.pc_cid == classid)
+		return (1);
+	if (sys_class.pcc_state > 0 && sys_class.pcc_info.pc_cid == classid)
+		return (1);
+	if (ia_class.pcc_state > 0 && ia_class.pcc_info.pc_cid == classid)
+		return (1);
+
+	pcinfo.pc_cid = classid;
+	if (priocntl(0, 0, PC_GETCLINFO, (caddr_t)&pcinfo) < 0) {
+		if (classid == 0)	/* no kernel info for sys class */
+			return (get_info_by_policy(SCHED_SYS));
+		return (-1);
+	}
+
+	if (rt_class.pcc_state == 0 && strcmp(pcinfo.pc_clname, "RT") == 0)
+		return (get_info_by_policy(SCHED_FIFO));
+	if (ts_class.pcc_state == 0 && strcmp(pcinfo.pc_clname, "TS") == 0)
+		return (get_info_by_policy(SCHED_OTHER));
+	if (ia_class.pcc_state == 0 && strcmp(pcinfo.pc_clname, "IA") == 0)
+		return (get_info_by_policy(SCHED_IA));
+
+	return (1);
+}
+
+int
+sched_setparam(pid_t pid, const struct sched_param *param)
+{
+	pri_t		prio = param->sched_priority;
+	pcparms_t	pcparm;
+	tsparms_t	*tsp;
+	tsinfo_t	*tsi;
+	int		scale;
+
+	if (pid < 0) {
+		errno = ESRCH;
+		return (-1);
+	}
+	if (pid == 0)
+		pid = P_MYID;
+
+	/* get process's current scheduling policy */
+	pcparm.pc_cid = PC_CLNULL;
+	if (priocntl(P_PID, pid, PC_GETPARMS, (caddr_t)&pcparm) == -1)
+		return (-1);
+	if (get_info_by_class(pcparm.pc_cid) < 0)
+		return (-1);
+
+	if (pcparm.pc_cid == rt_class.pcc_info.pc_cid) {
+		/* SCHED_FIFO or SCHED_RR policy */
+		if (prio < rt_class.pcc_primin || prio > rt_class.pcc_primax) {
+			errno = EINVAL;
+			return (-1);
+		}
+		((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = RT_NOCHANGE;
+		((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
+	} else if (pcparm.pc_cid == ts_class.pcc_info.pc_cid) {
+		/* SCHED_OTHER policy */
+		tsi = (tsinfo_t *)ts_class.pcc_info.pc_clinfo;
+		scale = tsi->ts_maxupri;
+		tsp = (tsparms_t *)pcparm.pc_clparms;
+		tsp->ts_uprilim = tsp->ts_upri = -(scale * prio) / 20;
+	} else {
+		/*
+		 * policy is not defined by POSIX.4.
+		 * just pass parameter data through to priocntl.
+		 * param should contain an image of class-specific parameters
+		 * (after the sched_priority member).
+		 */
+		*((pc_clparms_t *)pcparm.pc_clparms) =
+		    *((pc_clparms_t *)(&(param->sched_priority)+1));
+	}
+
+	return ((int)priocntl(P_PID, pid, PC_SETPARMS, (caddr_t)&pcparm));
+}
+
+int
+sched_getparam(pid_t pid, struct sched_param *param)
+{
+	pcparms_t	pcparm;
+	pri_t		prio;
+	int		scale;
+	tsinfo_t	*tsi;
+
+	if (pid < 0) {
+		errno = ESRCH;
+		return (-1);
+	}
+	if (pid == 0)
+		pid = P_MYID;
+
+	pcparm.pc_cid = PC_CLNULL;
+	if (priocntl(P_PID, pid, PC_GETPARMS, (caddr_t)&pcparm) == -1)
+		return (-1);
+	if (get_info_by_class(pcparm.pc_cid) < 0)
+		return (-1);
+
+	if (pcparm.pc_cid == rt_class.pcc_info.pc_cid) {
+		param->sched_priority =
+			((rtparms_t *)pcparm.pc_clparms)->rt_pri;
+	} else if (pcparm.pc_cid == ts_class.pcc_info.pc_cid) {
+		param->sched_nicelim =
+			((tsparms_t *)pcparm.pc_clparms)->ts_uprilim;
+		prio = param->sched_nice =
+			((tsparms_t *)pcparm.pc_clparms)->ts_upri;
+		tsi = (tsinfo_t *)ts_class.pcc_info.pc_clinfo;
+		scale = tsi->ts_maxupri;
+		if (scale == 0)
+			param->sched_priority = 0;
+		else
+			param->sched_priority = -(prio * 20) / scale;
+	} else {
+		/*
+		 * policy is not defined by POSIX.4
+		 * just return a copy of pcparams_t image in param.
+		 */
+		*((pc_clparms_t *)(&(param->sched_priority)+1)) =
+		    *((pc_clparms_t *)pcparm.pc_clparms);
+		param->sched_priority =
+		    sched_get_priority_min((int)(pcparm.pc_cid + _SCHED_NEXT));
+	}
+
+	return (0);
+}
+
+int
+sched_setscheduler(pid_t pid, int policy, const struct sched_param *param)
+{
+	pri_t		prio = param->sched_priority;
+	pcparms_t	pcparm;
+	int		oldpolicy;
+	tsinfo_t	*tsi;
+	tsparms_t	*tsp;
+	int		scale;
+
+	if ((oldpolicy = sched_getscheduler(pid)) < 0)
+		return (-1);
+
+	if (pid == 0)
+		pid = P_MYID;
+
+	if (get_info_by_policy(policy) < 0) {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	switch (policy) {
+	case SCHED_FIFO:
+	case SCHED_RR:
+		if (prio < rt_class.pcc_primin || prio > rt_class.pcc_primax) {
+			errno = EINVAL;
+			return (-1);
+		}
+		pcparm.pc_cid = rt_class.pcc_info.pc_cid;
+		((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
+		((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs =
+		    (policy == SCHED_RR ? RT_TQDEF : RT_TQINF);
+		break;
+
+	case SCHED_OTHER:
+		pcparm.pc_cid = ts_class.pcc_info.pc_cid;
+		tsi = (tsinfo_t *)ts_class.pcc_info.pc_clinfo;
+		scale = tsi->ts_maxupri;
+		tsp = (tsparms_t *)pcparm.pc_clparms;
+		tsp->ts_uprilim = tsp->ts_upri = -(scale * prio) / 20;
+		break;
+
+	default:
+		switch (policy) {
+		case SCHED_SYS:
+			pcparm.pc_cid = sys_class.pcc_info.pc_cid;
+			break;
+		case SCHED_IA:
+			pcparm.pc_cid = ia_class.pcc_info.pc_cid;
+			break;
+		default:
+			pcparm.pc_cid = policy - _SCHED_NEXT;
+			break;
+		}
+		/*
+		 * policy is not defined by POSIX.4.
+		 * just pass parameter data through to priocntl.
+		 * param should contain an image of class-specific parameters
+		 * (after the sched_priority member).
+		 */
+		*((pc_clparms_t *)pcparm.pc_clparms) =
+		    *((pc_clparms_t *)&(param->sched_priority)+1);
+	}
+
+	/* setting scheduling policy & parameters for the process */
+	if (priocntl(P_PID, pid, PC_SETPARMS, (caddr_t)&pcparm) == -1)
+		return (-1);
+
+	return (oldpolicy);
+}
+
+int
+sched_getscheduler(pid_t pid)
+{
+	pcparms_t	pcparm;
+	int		policy;
+
+	if (pid < 0) {
+		errno = ESRCH;
+		return (-1);
+	}
+	if (pid == 0)
+		pid = P_MYID;
+
+	/* get scheduling policy & parameters for the process */
+	pcparm.pc_cid = PC_CLNULL;
+	if (priocntl(P_PID, pid, PC_GETPARMS, (caddr_t)&pcparm) == -1)
+		return (-1);
+	if (get_info_by_class(pcparm.pc_cid) < 0)
+		return (-1);
+
+	if (pcparm.pc_cid == rt_class.pcc_info.pc_cid)
+		policy = ((((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
+		    RT_TQINF ? SCHED_FIFO : SCHED_RR));
+	else if (pcparm.pc_cid == ts_class.pcc_info.pc_cid)
+		policy = SCHED_OTHER;
+	else if (pcparm.pc_cid == sys_class.pcc_info.pc_cid)
+		policy = SCHED_SYS;
+	else if (pcparm.pc_cid == ia_class.pcc_info.pc_cid)
+		policy = SCHED_IA;
+	else {
+		/*
+		 * policy is not defined by POSIX.4
+		 * return a unique dot4 policy id.
+		 */
+		policy = (int)(_SCHED_NEXT + pcparm.pc_cid);
+	}
+
+	return (policy);
+}
+
+int
+sched_yield(void)
+{
+	thr_yield();
+	return (0);
+}
+
+int
+sched_get_priority_max(int policy)
+{
+	pcpri_t	pcpri;
+
+	if (get_info_by_policy(policy) < 0)
+		return (-1);
+
+	if (policy == SCHED_FIFO || policy == SCHED_RR)
+		return (rt_class.pcc_primax);
+	else if (policy == SCHED_OTHER)
+		return (ts_class.pcc_primax);
+	else if (policy == SCHED_SYS)
+		return (sys_class.pcc_primax);
+	else if (policy == SCHED_IA)
+		return (ia_class.pcc_primax);
+	else { /* policy not in POSIX.4 */
+		pcpri.pc_cid = policy - _SCHED_NEXT;
+		if (priocntl(0, 0, PC_GETPRIRANGE, (caddr_t)&pcpri) == 0)
+			return (map_gp_to_rtpri(pcpri.pc_clpmax));
+	}
+
+	errno = EINVAL;
+	return (-1);
+}
+
+int
+sched_get_priority_min(int policy)
+{
+	pcpri_t pcpri;
+
+	if (get_info_by_policy(policy) < 0)
+		return (-1);
+
+	if (policy == SCHED_FIFO || policy == SCHED_RR)
+		return (rt_class.pcc_primin);
+	else if (policy == SCHED_OTHER)
+		return (ts_class.pcc_primin);
+	else if (policy == SCHED_SYS)
+		return (sys_class.pcc_primin);
+	else if (policy == SCHED_IA)
+		return (ia_class.pcc_primin);
+	else { /* policy not in POSIX.4 */
+		pcpri.pc_cid = policy - _SCHED_NEXT;
+		if (priocntl(0, 0, PC_GETPRIRANGE, (caddr_t)&pcpri) == 0)
+			return (map_gp_to_rtpri(pcpri.pc_clpmin));
+	}
+
+	errno = EINVAL;
+	return (-1);
+}
+
+int
+sched_rr_get_interval(pid_t pid, timespec_t *interval)
+{
+	pcparms_t pcparm;
+
+	if (pid < 0) {
+		errno = ESRCH;
+		return (-1);
+	}
+	if (pid == 0)
+		pid = P_MYID;
+
+	if (get_info_by_policy(SCHED_RR) < 0)
+		return (-1);
+
+	pcparm.pc_cid = PC_CLNULL;
+	if (priocntl(P_PID, pid, PC_GETPARMS, (caddr_t)&pcparm) == -1)
+		return (-1);
+
+	if (pcparm.pc_cid == rt_class.pcc_info.pc_cid &&
+	    (((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs != RT_TQINF)) {
+		/* SCHED_RR */
+		interval->tv_sec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqsecs;
+		interval->tv_nsec =
+		    ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs;
+		return (0);
+	}
+
+	errno = EINVAL;
+	return (-1);
+}
diff --git a/usr/src/lib/libc/port/rt/sem.c b/usr/src/lib/libc/port/rt/sem.c
new file mode 100644
index 0000000000..af3bdcc06a
--- /dev/null
+++ b/usr/src/lib/libc/port/rt/sem.c
@@ -0,0 +1,367 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#pragma weak	sem_open = _sem_open
+#pragma weak	sem_close = _sem_close
+#pragma weak	sem_unlink = _sem_unlink
+#pragma weak	sem_init = _sem_init
+#pragma weak	sem_destroy = _sem_destroy
+#pragma weak	sem_wait = _sem_wait
+#pragma weak	sem_timedwait = _sem_timedwait
+#pragma weak	sem_reltimedwait_np = _sem_reltimedwait_np
+#pragma weak	sem_trywait = _sem_trywait
+#pragma weak	sem_post = _sem_post
+#pragma weak	sem_getvalue = _sem_getvalue
+
+#include "synonyms.h"
+#include "mtlib.h"
+#include <sys/types.h>
+#include <semaphore.h>
+#include <synch.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <thread.h>
+#include "pos4obj.h"
+
+typedef	struct	semaddr {
+	struct	semaddr	*sad_next;	/* next in the link */
+	char		sad_name[PATH_MAX + 1]; /* name of sem object */
+	sem_t		*sad_addr;	/* mmapped address of semaphore */
+	ino64_t		sad_inode;	/* inode # of the mmapped file */
+} semaddr_t;
+
+static long semvaluemax = 0;
+static semaddr_t *semheadp = NULL;
+static mutex_t semlock = DEFAULTMUTEX;
+
+sem_t *
+_sem_open(const char *path, int oflag, /* mode_t mode, int value */ ...)
+{
+	va_list	ap;
+	mode_t	crmode = 0;
+	sem_t	*sem = NULL;
+	struct	stat64 statbuf;
+	semaddr_t *next = NULL;
+	int	fd = 0;
+	int	error = 0;
+	int	cr_flag = 0;
+	uint_t	value = 0;
+
+	if (__pos4obj_check(path) == -1)
+		return (SEM_FAILED);
+
+	/* acquire semaphore lock to have atomic operation */
+	if (__pos4obj_lock(path, SEM_LOCK_TYPE) < 0)
+		return (SEM_FAILED);
+
+	/* modify oflag to have RDWR and filter CREATE mode only */
+	oflag = (oflag & (O_CREAT|O_EXCL)) | (O_RDWR);
+	if (oflag & O_CREAT) {
+		if (semvaluemax == 0 &&
+		    (semvaluemax = _sysconf(_SC_SEM_VALUE_MAX)) <= 0)
+			semvaluemax = -1;
+		va_start(ap, oflag);
+		crmode = va_arg(ap, mode_t);
+		value = va_arg(ap, uint_t);
+		va_end(ap);
+		/* check value < the max for a named semaphore */
+		if (semvaluemax < 0 ||
+		    (ulong_t)value > (ulong_t)semvaluemax) {
+			errno = EINVAL;
+			goto out;
+		}
+	}
+
+	errno = 0;
+
+	if ((fd = __pos4obj_open(path, SEM_DATA_TYPE,
+				oflag, crmode, &cr_flag)) < 0)
+		goto out;
+
+	if (cr_flag)
+		cr_flag = DFILE_CREATE | DFILE_OPEN;
+	else
+		cr_flag = DFILE_OPEN;
+
+	/* find out inode # for the opened file */
+	if (fstat64(fd, &statbuf) < 0)
+		goto out;
+
+	/* if created, acquire total_size in the file */
+	if ((cr_flag & DFILE_CREATE) != 0) {
+		if (ftruncate64(fd, (off64_t)sizeof (sem_t)) < 0)
+			goto out;
+	} else {
+		/*
+		 * if this semaphore has already been opened, inode
+		 * will indicate then return the same semaphore address
+		 */
+		lmutex_lock(&semlock);
+		for (next = semheadp; next != NULL; next = next->sad_next) {
+			if (statbuf.st_ino == next->sad_inode &&
+			    strcmp(path, next->sad_name) == 0) {
+				(void) __close_nc(fd);
+				lmutex_unlock(&semlock);
+				(void) __pos4obj_unlock(path, SEM_LOCK_TYPE);
+				return (next->sad_addr);
+			}
+		}
+		lmutex_unlock(&semlock);
+	}
+
+
+	/* new sem descriptor to be allocated and new address to be mapped */
+	if ((next = malloc(sizeof (semaddr_t))) == NULL) {
+		errno = ENOMEM;
+		goto out;
+	}
+	cr_flag |= ALLOC_MEM;
+
+	/* LINTED */
+	sem = (sem_t *)mmap64(NULL, sizeof (sem_t), PROT_READ|PROT_WRITE,
+				MAP_SHARED, fd, (off64_t)0);
+	(void) __close_nc(fd);
+	cr_flag &= ~DFILE_OPEN;
+	if (sem == MAP_FAILED)
+		goto out;
+	cr_flag |= DFILE_MMAP;
+
+	/* if created, initialize */
+	if (cr_flag & DFILE_CREATE) {
+		error = sema_init((sema_t *)sem, value, USYNC_PROCESS, 0);
+		if (error) {
+			errno = error;
+			goto out;
+		}
+	}
+
+	if (__pos4obj_unlock(path, SEM_LOCK_TYPE) == 0) {
+		/* add to the list pointed by semheadp */
+		lmutex_lock(&semlock);
+		next->sad_next = semheadp;
+		semheadp = next;
+		next->sad_addr = sem;
+		next->sad_inode = statbuf.st_ino;
+		(void) strcpy(next->sad_name, path);
+		lmutex_unlock(&semlock);
+		return (sem);
+	}
+	/* fall into the error case */
+out:
+	error = errno;
+	if ((cr_flag & DFILE_OPEN) != 0)
+		(void) __close_nc(fd);
+	if ((cr_flag & DFILE_CREATE) != 0)
+		(void) __pos4obj_unlink(path, SEM_DATA_TYPE);
+	if ((cr_flag & ALLOC_MEM) != 0)
+		free(next);
+	if ((cr_flag & DFILE_MMAP) != 0)
+		(void) munmap((caddr_t)sem, sizeof (sem_t));
+	(void) __pos4obj_unlock(path, SEM_LOCK_TYPE);
+	errno = error;
+	return (SEM_FAILED);
+}
+
+int
+_sem_close(sem_t *sem)
+{
+	semaddr_t	**next;
+	semaddr_t	*freeit;
+
+	lmutex_lock(&semlock);
+	for (next = &semheadp; (freeit = *next) != NULL;
+	    next = &(freeit->sad_next)) {
+		if (freeit->sad_addr == sem) {
+			*next = freeit->sad_next;
+			lmutex_unlock(&semlock);
+			free(freeit);
+			return (munmap((caddr_t)sem, sizeof (sem_t)));
+		}
+	}
+	lmutex_unlock(&semlock);
+	errno = EINVAL;
+	return (-1);
+}
+
+int
+_sem_unlink(const char *path)
+{
+	int	error;
+	int	oerrno;
+
+	if (__pos4obj_check(path) < 0)
+		return (-1);
+
+	if (__pos4obj_lock(path, SEM_LOCK_TYPE) < 0)
+		return (-1);
+
+	error =  __pos4obj_unlink(path, SEM_DATA_TYPE);
+
+	oerrno = errno;
+
+	(void) __pos4obj_unlock(path, SEM_LOCK_TYPE);
+
+	errno = oerrno;
+
+	return (error);
+}
+
+/*
+ * SUSV3 requires ("shall fail") an EINVAL failure for operations
+ * on invalid semaphores, including uninitialized unnamed semaphores.
+ * The best we can do is check that the magic number is correct.
+ * This is not perfect, but it allows the test suite to pass.
+ * (Standards bodies are filled with fools and idiots.)
+ */
+static int
+sem_invalid(sem_t *sem)
+{
+	if (sem->sem_magic != SEMA_MAGIC) {
+		errno = EINVAL;
+		return (-1);
+	}
+	return (0);
+}
+
+int
+_sem_init(sem_t *sem, int pshared, uint_t value)
+{
+	int	error;
+
+	if ((error = sema_init((sema_t *)sem, value,
+	    pshared ? USYNC_PROCESS : USYNC_THREAD, NULL)) != 0) {
+		errno = error;
+		return (-1);
+	}
+	return (0);
+}
+
+int
+_sem_destroy(sem_t *sem)
+{
+	int	error;
+
+	if (sem_invalid(sem))
+		return (-1);
+	if ((error = sema_destroy((sema_t *)sem)) != 0) {
+		errno = error;
+		return (-1);
+	}
+	return (0);
+}
+
+int
+_sem_post(sem_t *sem)
+{
+	int	error;
+
+	if (sem_invalid(sem))
+		return (-1);
+	if ((error = sema_post((sema_t *)sem)) != 0) {
+		errno = error;
+		return (-1);
+	}
+	return (0);
+}
+
+int
+_sem_wait(sem_t *sem)
+{
+	int	error;
+
+	if (sem_invalid(sem))
+		return (-1);
+	if ((error = sema_wait((sema_t *)sem)) != 0) {
+		errno = error;
+		return (-1);
+	}
+	return (0);
+}
+
+int
+_sem_timedwait(sem_t *sem, const timespec_t *abstime)
+{
+	int	error;
+
+	if (sem_invalid(sem))
+		return (-1);
+	if ((error = sema_timedwait((sema_t *)sem, abstime)) != 0) {
+		if (error == ETIME)
+			error = ETIMEDOUT;
+		errno = error;
+		return (-1);
+	}
+	return (0);
+}
+
+int
+_sem_reltimedwait_np(sem_t *sem, const timespec_t *reltime)
+{
+	int	error;
+
+	if (sem_invalid(sem))
+		return (-1);
+	if ((error = sema_reltimedwait((sema_t *)sem, reltime)) != 0) {
+		if (error == ETIME)
+			error = ETIMEDOUT;
+		errno = error;
+		return (-1);
+	}
+	return (0);
+}
+
+int
+_sem_trywait(sem_t *sem)
+{
+	int	error;
+
+	if (sem_invalid(sem))
+		return (-1);
+	if ((error = sema_trywait((sema_t *)sem)) != 0) {
+		if (error == EBUSY)
+			error = EAGAIN;
+		errno = error;
+		return (-1);
+	}
+	return (0);
+}
+
+int
+_sem_getvalue(sem_t *sem, int *sval)
+{
+	if (sem_invalid(sem))
+		return (-1);
+	*sval = (int)sem->sem_count;
+	return (0);
+}
diff --git a/usr/src/lib/libc/port/rt/shm.c b/usr/src/lib/libc/port/rt/shm.c
new file mode 100644
index 0000000000..53c59d9424
--- /dev/null
+++ b/usr/src/lib/libc/port/rt/shm.c
@@ -0,0 +1,95 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "synonyms.h"
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <errno.h>
+#include "pos4obj.h"
+
+int
+shm_open(const char *path, int oflag, mode_t mode)
+{
+	int crflag;
+	int fd;
+	int flags;
+
+	if (__pos4obj_check(path) == -1)
+		return (-1);
+
+	/* acquire semaphore lock to have atomic operation */
+	if (__pos4obj_lock(path, SHM_LOCK_TYPE) < 0)
+		return (-1);
+
+	fd = __pos4obj_open(path, SHM_DATA_TYPE, oflag, mode, &crflag);
+
+	if (fd < 0) {
+		(void) __pos4obj_unlock(path, SHM_LOCK_TYPE);
+		return (-1);
+	}
+
+	if ((flags = fcntl(fd, F_GETFD)) < 0 ||
+	    fcntl(fd, F_SETFD, flags | FD_CLOEXEC) < 0) {
+		(void) __pos4obj_unlock(path, SHM_LOCK_TYPE);
+		(void) __close_nc(fd);
+		return (-1);
+	}
+
+	/* relase semaphore lock operation */
+	if (__pos4obj_unlock(path, SHM_LOCK_TYPE) < 0) {
+		(void) __close_nc(fd);
+		return (-1);
+	}
+
+	return (fd);
+}
+
+int
+shm_unlink(const char *path)
+{
+	int	oerrno;
+	int	err;
+
+	if (__pos4obj_check(path) < 0)
+		return (-1);
+
+	if (__pos4obj_lock(path, SHM_LOCK_TYPE) < 0)
+		return (-1);
+
+	err = __pos4obj_unlink(path, SHM_DATA_TYPE);
+
+	oerrno = errno;
+
+	(void) __pos4obj_unlock(path, SHM_LOCK_TYPE);
+
+	errno = oerrno;
+	return (err);
+
+}
diff --git a/usr/src/lib/libc/port/rt/sigev_thread.c b/usr/src/lib/libc/port/rt/sigev_thread.c
new file mode 100644
index 0000000000..0ab6eaccdf
--- /dev/null
+++ b/usr/src/lib/libc/port/rt/sigev_thread.c
@@ -0,0 +1,715 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "synonyms.h"
+#include "thr_uberdata.h"
+#include <sys/types.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <thread.h>
+#include <pthread.h>
+#include <synch.h>
+#include <port.h>
+#include <signal.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <string.h>
+#include <sys/aiocb.h>
+#include <time.h>
+#include <signal.h>
+#include <fcntl.h>
+#include "sigev_thread.h"
+
+/*
+ * There is but one spawner for all aio operations.
+ */
+thread_communication_data_t *sigev_aio_tcd = NULL;
+
+/*
+ * Set non-zero via _RT_DEBUG to enable debugging printf's.
+ */
+static int _rt_debug = 0;
+
+void
+init_sigev_thread(void)
+{
+	char *ldebug;
+
+	if ((ldebug = getenv("_RT_DEBUG")) != NULL)
+		_rt_debug = atoi(ldebug);
+}
+
+/*
+ * Routine to print debug messages:
+ * If _rt_debug is set, printf the debug message to stderr
+ * with an appropriate prefix.
+ */
+/*PRINTFLIKE1*/
+static void
+dprintf(const char *format, ...)
+{
+	if (_rt_debug) {
+		va_list alist;
+
+		va_start(alist, format);
+		flockfile(stderr);
+		(void) fputs("DEBUG: ", stderr);
+		(void) vfprintf(stderr, format, alist);
+		funlockfile(stderr);
+		va_end(alist);
+	}
+}
+
+/*
+ * The notify_thread() function can be used as the start function of a new
+ * thread but it is normally called from notifier(), below, in the context
+ * of a thread pool worker thread.  It is used as the start function of a
+ * new thread only when individual pthread attributes differ from those
+ * that are common to all workers.  This only occurs in the AIO case.
+ */
+static void *
+notify_thread(void *arg)
+{
+	sigev_thread_data_t *stdp = arg;
+	void (*function)(union sigval) = stdp->std_func;
+	union sigval argument = stdp->std_arg;
+
+	lfree(stdp, sizeof (*stdp));
+	function(argument);
+	return (NULL);
+}
+
+/*
+ * Thread pool interface to call the user-supplied notification function.
+ */
+static void
+notifier(void *arg)
+{
+	(void) notify_thread(arg);
+}
+
+/*
+ * This routine adds a new work request, described by function
+ * and argument, to the list of outstanding jobs.
+ * It returns 0 indicating success.  A value != 0 indicates an error.
+ */
+static int
+sigev_add_work(thread_communication_data_t *tcdp,
+	void (*function)(union sigval), union sigval argument)
+{
+	tpool_t *tpool = tcdp->tcd_poolp;
+	sigev_thread_data_t *stdp;
+
+	if (tpool == NULL)
+		return (EINVAL);
+	if ((stdp = lmalloc(sizeof (*stdp))) == NULL)
+		return (errno);
+	stdp->std_func = function;
+	stdp->std_arg = argument;
+	if (tpool_dispatch(tpool, notifier, stdp) != 0) {
+		lfree(stdp, sizeof (*stdp));
+		return (errno);
+	}
+	return (0);
+}
+
+static void
+sigev_destroy_pool(thread_communication_data_t *tcdp)
+{
+	if (tcdp->tcd_poolp != NULL)
+		tpool_abandon(tcdp->tcd_poolp);
+	tcdp->tcd_poolp = NULL;
+
+	if (tcdp->tcd_subsystem == MQ) {
+		/*
+		 * synchronize with del_sigev_mq()
+		 */
+		sig_mutex_lock(&tcdp->tcd_lock);
+		tcdp->tcd_server_id = 0;
+		if (tcdp->tcd_msg_closing) {
+			(void) cond_broadcast(&tcdp->tcd_cv);
+			sig_mutex_unlock(&tcdp->tcd_lock);
+			return;		/* del_sigev_mq() will free the tcd */
+		}
+		sig_mutex_unlock(&tcdp->tcd_lock);
+	}
+
+	/*
+	 * now delete everything
+	 */
+	free_sigev_handler(tcdp);
+}
+
+/*
+ * timer_spawner(), mqueue_spawner(), and aio_spawner() are the main
+ * functions for the daemon threads that get the event(s) for the
+ * respective SIGEV_THREAD subsystems.  There is one timer spawner for
+ * each timer_create(), one mqueue spawner for every mq_open(), and
+ * exactly one aio spawner for all aio requests.  These spawners add
+ * work requests to be done by a pool of daemon worker threads.  In case
+ * the event requires creation of a worker thread with different pthread
+ * attributes than those from the pool of workers, a new daemon thread
+ * with these attributes is spawned apart from the pool of workers.
+ * If the spawner fails to add work or fails to create an additional
+ * thread because of lacking resources, it puts the event back into
+ * the kernel queue and re-tries some time later.
+ */
+
+void *
+timer_spawner(void *arg)
+{
+	thread_communication_data_t *tcdp = (thread_communication_data_t *)arg;
+	port_event_t port_event;
+
+	/* destroy the pool if we are cancelled */
+	pthread_cleanup_push(sigev_destroy_pool, tcdp);
+
+	for (;;) {
+		if (port_get(tcdp->tcd_port, &port_event, NULL) != 0) {
+			dprintf("port_get on port %d failed with %d <%s>\n",
+			    tcdp->tcd_port, errno, strerror(errno));
+			break;
+		}
+		switch (port_event.portev_source) {
+		case PORT_SOURCE_TIMER:
+			break;
+		case PORT_SOURCE_ALERT:
+			if (port_event.portev_events != SIGEV_THREAD_TERM)
+				errno = EPROTO;
+			goto out;
+		default:
+			dprintf("port_get on port %d returned %u "
+			    "(not PORT_SOURCE_TIMER)\n",
+			    tcdp->tcd_port, port_event.portev_source);
+			errno = EPROTO;
+			goto out;
+		}
+
+		tcdp->tcd_overruns = port_event.portev_events - 1;
+		if (sigev_add_work(tcdp,
+		    tcdp->tcd_notif.sigev_notify_function,
+		    tcdp->tcd_notif.sigev_value) != 0)
+			break;
+		/* wait until job is done before looking for another */
+		tpool_wait(tcdp->tcd_poolp);
+	}
+out:
+	pthread_cleanup_pop(1);
+	return (NULL);
+}
+
+void *
+mqueue_spawner(void *arg)
+{
+	thread_communication_data_t *tcdp = (thread_communication_data_t *)arg;
+	int ret = 0;
+	int ntype;
+	void (*function)(union sigval);
+	union sigval argument;
+
+	/* destroy the pool if we are cancelled */
+	pthread_cleanup_push(sigev_destroy_pool, tcdp);
+
+	while (ret == 0) {
+		sig_mutex_lock(&tcdp->tcd_lock);
+		pthread_cleanup_push(sig_mutex_unlock, &tcdp->tcd_lock);
+		while ((ntype = tcdp->tcd_msg_enabled) == 0)
+			(void) sig_cond_wait(&tcdp->tcd_cv, &tcdp->tcd_lock);
+		pthread_cleanup_pop(1);
+
+		while (sem_wait(tcdp->tcd_msg_avail) == -1)
+			continue;
+
+		sig_mutex_lock(&tcdp->tcd_lock);
+		tcdp->tcd_msg_enabled = 0;
+		sig_mutex_unlock(&tcdp->tcd_lock);
+
+		/* ASSERT(ntype == SIGEV_THREAD || ntype == SIGEV_PORT); */
+		if (ntype == SIGEV_THREAD) {
+			function = tcdp->tcd_notif.sigev_notify_function;
+			argument.sival_ptr = tcdp->tcd_msg_userval;
+			ret = sigev_add_work(tcdp, function, argument);
+		} else {	/* ntype == SIGEV_PORT */
+			ret = _port_dispatch(tcdp->tcd_port, 0, PORT_SOURCE_MQ,
+			    0, (uintptr_t)tcdp->tcd_msg_object,
+			    tcdp->tcd_msg_userval);
+		}
+	}
+	sig_mutex_unlock(&tcdp->tcd_lock);
+
+	pthread_cleanup_pop(1);
+	return (NULL);
+}
+
+void *
+aio_spawner(void *arg)
+{
+	thread_communication_data_t *tcdp = (thread_communication_data_t *)arg;
+	int error = 0;
+	void (*function)(union sigval);
+	union sigval argument;
+	port_event_t port_event;
+	struct sigevent *sigevp;
+	timespec_t delta;
+	pthread_attr_t *attrp;
+
+	/* destroy the pool if we are cancelled */
+	pthread_cleanup_push(sigev_destroy_pool, tcdp);
+
+	while (error == 0) {
+		if (port_get(tcdp->tcd_port, &port_event, NULL) != 0) {
+			error = errno;
+			dprintf("port_get on port %d failed with %d <%s>\n",
+			    tcdp->tcd_port, error, strerror(error));
+			break;
+		}
+		switch (port_event.portev_source) {
+		case PORT_SOURCE_AIO:
+			break;
+		case PORT_SOURCE_ALERT:
+			if (port_event.portev_events != SIGEV_THREAD_TERM)
+				errno = EPROTO;
+			goto out;
+		default:
+			dprintf("port_get on port %d returned %u "
+			    "(not PORT_SOURCE_AIO)\n",
+			    tcdp->tcd_port, port_event.portev_source);
+			errno = EPROTO;
+			goto out;
+		}
+		argument.sival_ptr = port_event.portev_user;
+		switch (port_event.portev_events) {
+		case AIOLIO:
+#if !defined(_LP64)
+		case AIOLIO64:
+#endif
+			sigevp = (struct sigevent *)port_event.portev_object;
+			function = sigevp->sigev_notify_function;
+			attrp = sigevp->sigev_notify_attributes;
+			break;
+		case AIOAREAD:
+		case AIOAWRITE:
+		case AIOFSYNC:
+		    {
+			aiocb_t *aiocbp =
+			    (aiocb_t *)port_event.portev_object;
+			function = aiocbp->aio_sigevent.sigev_notify_function;
+			attrp = aiocbp->aio_sigevent.sigev_notify_attributes;
+			break;
+		    }
+#if !defined(_LP64)
+		case AIOAREAD64:
+		case AIOAWRITE64:
+		case AIOFSYNC64:
+		    {
+			aiocb64_t *aiocbp =
+			    (aiocb64_t *)port_event.portev_object;
+			function = aiocbp->aio_sigevent.sigev_notify_function;
+			attrp = aiocbp->aio_sigevent.sigev_notify_attributes;
+			break;
+		    }
+#endif
+		default:
+			function = NULL;
+			attrp = NULL;
+			break;
+		}
+
+		if (function == NULL)
+			error = EINVAL;
+		else if (_pthread_attr_equal(attrp, tcdp->tcd_attrp))
+			error = sigev_add_work(tcdp, function, argument);
+		else {
+			/*
+			 * The attributes don't match.
+			 * Spawn a thread with the non-matching attributes.
+			 */
+			pthread_attr_t local_attr;
+			sigev_thread_data_t *stdp;
+
+			if ((stdp = lmalloc(sizeof (*stdp))) == NULL)
+				error = ENOMEM;
+			else
+				error = _pthread_attr_clone(&local_attr, attrp);
+
+			if (error == 0) {
+				(void) pthread_attr_setdetachstate(
+				    &local_attr, PTHREAD_CREATE_DETACHED);
+				(void) _pthread_attr_setdaemonstate_np(
+				    &local_attr, PTHREAD_CREATE_DAEMON_NP);
+				stdp->std_func = function;
+				stdp->std_arg = argument;
+				error = pthread_create(NULL, &local_attr,
+				    notify_thread, stdp);
+				(void) pthread_attr_destroy(&local_attr);
+			}
+			if (error && stdp != NULL)
+				lfree(stdp, sizeof (*stdp));
+		}
+
+		if (error) {
+			dprintf("Cannot add work, error=%d <%s>.\n",
+			    error, strerror(error));
+			if (error == EAGAIN || error == ENOMEM) {
+				/* (Temporary) no resources are available. */
+				if (_port_dispatch(tcdp->tcd_port, 0,
+				    PORT_SOURCE_AIO, port_event.portev_events,
+				    port_event.portev_object,
+				    port_event.portev_user) != 0)
+					break;
+				error = 0;
+				delta.tv_sec = 0;
+				delta.tv_nsec = NANOSEC / 20;	/* 50 msec */
+				(void) nanosleep(&delta, NULL);
+			}
+		}
+	}
+out:
+	pthread_cleanup_pop(1);
+	return (NULL);
+}
+
+/*
+ * Allocate a thread_communication_data_t block.
+ */
+static thread_communication_data_t *
+alloc_sigev_handler(subsystem_t caller)
+{
+	thread_communication_data_t *tcdp;
+
+	if ((tcdp = lmalloc(sizeof (*tcdp))) != NULL) {
+		tcdp->tcd_subsystem = caller;
+		tcdp->tcd_port = -1;
+		(void) mutex_init(&tcdp->tcd_lock, USYNC_THREAD, NULL);
+		(void) cond_init(&tcdp->tcd_cv, USYNC_THREAD, NULL);
+	}
+	return (tcdp);
+}
+
+/*
+ * Free a thread_communication_data_t block.
+ */
+void
+free_sigev_handler(thread_communication_data_t *tcdp)
+{
+	if (tcdp->tcd_attrp) {
+		(void) pthread_attr_destroy(tcdp->tcd_attrp);
+		tcdp->tcd_attrp = NULL;
+	}
+	(void) memset(&tcdp->tcd_notif, 0, sizeof (tcdp->tcd_notif));
+
+	switch (tcdp->tcd_subsystem) {
+	case TIMER:
+	case AIO:
+		if (tcdp->tcd_port >= 0)
+			(void) close(tcdp->tcd_port);
+		break;
+	case MQ:
+		tcdp->tcd_msg_avail = NULL;
+		tcdp->tcd_msg_object = NULL;
+		tcdp->tcd_msg_userval = NULL;
+		tcdp->tcd_msg_enabled = 0;
+		break;
+	}
+
+	lfree(tcdp, sizeof (*tcdp));
+}
+
+/*
+ * Initialize data structure and create the port.
+ */
+thread_communication_data_t *
+setup_sigev_handler(const struct sigevent *sigevp, subsystem_t caller)
+{
+	thread_communication_data_t *tcdp;
+	int error;
+
+	if (sigevp == NULL) {
+		errno = EINVAL;
+		return (NULL);
+	}
+
+	if ((tcdp = alloc_sigev_handler(caller)) == NULL) {
+		errno = ENOMEM;
+		return (NULL);
+	}
+
+	if (sigevp->sigev_notify_attributes == NULL)
+		tcdp->tcd_attrp = NULL;		/* default attributes */
+	else {
+		/*
+		 * We cannot just copy the sigevp->sigev_notify_attributes
+		 * pointer.  We need to initialize a new pthread_attr_t
+		 * structure with the values from the user-supplied
+		 * pthread_attr_t.
+		 */
+		tcdp->tcd_attrp = &tcdp->tcd_user_attr;
+		error = _pthread_attr_clone(tcdp->tcd_attrp,
+			    sigevp->sigev_notify_attributes);
+		if (error) {
+			tcdp->tcd_attrp = NULL;
+			free_sigev_handler(tcdp);
+			errno = error;
+			return (NULL);
+		}
+	}
+	tcdp->tcd_notif = *sigevp;
+	tcdp->tcd_notif.sigev_notify_attributes = tcdp->tcd_attrp;
+
+	if (caller == TIMER || caller == AIO) {
+		if ((tcdp->tcd_port = port_create()) < 0 ||
+		    fcntl(tcdp->tcd_port, FD_CLOEXEC) == -1) {
+			free_sigev_handler(tcdp);
+			errno = EBADF;
+			return (NULL);
+		}
+	}
+	return (tcdp);
+}
+
+/*
+ * Create a thread pool and launch the spawner.
+ */
+int
+launch_spawner(thread_communication_data_t *tcdp)
+{
+	int ret;
+	int maxworkers;
+	void *(*spawner)(void *);
+	sigset_t set;
+	sigset_t oset;
+
+	switch (tcdp->tcd_subsystem) {
+	case TIMER:
+		spawner = timer_spawner;
+		maxworkers = 1;
+		break;
+	case MQ:
+		spawner = mqueue_spawner;
+		maxworkers = 1;
+		break;
+	case AIO:
+		spawner = aio_spawner;
+		maxworkers = 100;
+		break;
+	default:
+		return (-1);
+	}
+	tcdp->tcd_poolp = tpool_create(1, maxworkers, 20,
+	    tcdp->tcd_notif.sigev_notify_attributes);
+	if (tcdp->tcd_poolp == NULL)
+		return (-1);
+	/* create the spawner with all signals blocked */
+	(void) sigfillset(&set);
+	(void) thr_sigsetmask(SIG_SETMASK, &set, &oset);
+	ret = thr_create(NULL, 0, spawner, tcdp,
+	    THR_DETACHED | THR_DAEMON, &tcdp->tcd_server_id);
+	(void) thr_sigsetmask(SIG_SETMASK, &oset, NULL);
+	if (ret != 0) {
+		tpool_destroy(tcdp->tcd_poolp);
+		tcdp->tcd_poolp = NULL;
+		return (-1);
+	}
+	return (0);
+}
+
+/*
+ * Delete the data associated with the sigev_thread timer, if timer is
+ * associated with such a notification option.
+ * Destroy the timer_spawner thread.
+ */
+int
+del_sigev_timer(timer_t timer)
+{
+	int rc = 0;
+	thread_communication_data_t *tcdp;
+
+	if ((uint_t)timer < timer_max && (tcdp = timer_tcd[timer]) != NULL) {
+		sig_mutex_lock(&tcdp->tcd_lock);
+		if (tcdp->tcd_port >= 0) {
+			if ((rc = port_alert(tcdp->tcd_port,
+			    PORT_ALERT_SET, SIGEV_THREAD_TERM, NULL)) == 0) {
+				dprintf("del_sigev_timer(%d) OK.\n", timer);
+			}
+		}
+		timer_tcd[timer] = NULL;
+		sig_mutex_unlock(&tcdp->tcd_lock);
+	}
+	return (rc);
+}
+
+int
+sigev_timer_getoverrun(timer_t timer)
+{
+	thread_communication_data_t *tcdp;
+
+	if ((uint_t)timer < timer_max && (tcdp = timer_tcd[timer]) != NULL)
+		return (tcdp->tcd_overruns);
+	return (0);
+}
+
+static void
+del_sigev_mq_cleanup(thread_communication_data_t *tcdp)
+{
+	sig_mutex_unlock(&tcdp->tcd_lock);
+	free_sigev_handler(tcdp);
+}
+
+/*
+ * Delete the data associated with the sigev_thread message queue,
+ * if the message queue is associated with such a notification option.
+ * Destroy the mqueue_spawner thread.
+ */
+void
+del_sigev_mq(thread_communication_data_t *tcdp)
+{
+	pthread_t server_id;
+	int rc;
+
+	sig_mutex_lock(&tcdp->tcd_lock);
+
+	server_id = tcdp->tcd_server_id;
+	tcdp->tcd_msg_closing = 1;
+	if ((rc = pthread_cancel(server_id)) != 0) {	/* "can't happen" */
+		sig_mutex_unlock(&tcdp->tcd_lock);
+		dprintf("Fail to cancel %u with error %d <%s>.\n",
+		    server_id, rc, strerror(rc));
+		return;
+	}
+
+	/*
+	 * wait for sigev_destroy_pool() to finish
+	 */
+	pthread_cleanup_push(del_sigev_mq_cleanup, tcdp);
+	while (tcdp->tcd_server_id == server_id)
+		(void) sig_cond_wait(&tcdp->tcd_cv, &tcdp->tcd_lock);
+	pthread_cleanup_pop(1);
+}
+
+/*
+ * POSIX aio:
+ * If the notification type is SIGEV_THREAD, set up
+ * the port number for notifications.  Create the
+ * thread pool and launch the spawner if necessary.
+ * If the notification type is not SIGEV_THREAD, do nothing.
+ */
+int
+_aio_sigev_thread_init(struct sigevent *sigevp)
+{
+	static mutex_t sigev_aio_lock = DEFAULTMUTEX;
+	static cond_t sigev_aio_cv = DEFAULTCV;
+	static int sigev_aio_busy = 0;
+
+	thread_communication_data_t *tcdp;
+	int port;
+	int rc = 0;
+
+	if (sigevp == NULL ||
+	    sigevp->sigev_notify != SIGEV_THREAD ||
+	    sigevp->sigev_notify_function == NULL)
+		return (0);
+
+	lmutex_lock(&sigev_aio_lock);
+	while (sigev_aio_busy)
+		(void) _cond_wait(&sigev_aio_cv, &sigev_aio_lock);
+	if ((tcdp = sigev_aio_tcd) != NULL)
+		port = tcdp->tcd_port;
+	else {
+		sigev_aio_busy = 1;
+		lmutex_unlock(&sigev_aio_lock);
+
+		tcdp = setup_sigev_handler(sigevp, AIO);
+		if (tcdp == NULL) {
+			port = -1;
+			rc = -1;
+		} else if (launch_spawner(tcdp) != 0) {
+			free_sigev_handler(tcdp);
+			tcdp = NULL;
+			port = -1;
+			rc = -1;
+		} else {
+			port = tcdp->tcd_port;
+		}
+
+		lmutex_lock(&sigev_aio_lock);
+		sigev_aio_tcd = tcdp;
+		sigev_aio_busy = 0;
+		(void) cond_broadcast(&sigev_aio_cv);
+	}
+	lmutex_unlock(&sigev_aio_lock);
+	sigevp->sigev_signo = port;
+	return (rc);
+}
+
+int
+_aio_sigev_thread(aiocb_t *aiocbp)
+{
+	if (aiocbp == NULL)
+		return (0);
+	return (_aio_sigev_thread_init(&aiocbp->aio_sigevent));
+}
+
+#if !defined(_LP64)
+int
+_aio_sigev_thread64(aiocb64_t *aiocbp)
+{
+	if (aiocbp == NULL)
+		return (0);
+	return (_aio_sigev_thread_init(&aiocbp->aio_sigevent));
+}
+#endif
+
+/*
+ * Cleanup POSIX aio after fork1() in the child process.
+ */
+void
+postfork1_child_sigev_aio(void)
+{
+	thread_communication_data_t *tcdp;
+
+	if ((tcdp = sigev_aio_tcd) != NULL) {
+		sigev_aio_tcd = NULL;
+		tcd_teardown(tcdp);
+	}
+}
+
+/*
+ * Utility function for the various postfork1_child_sigev_*() functions.
+ * Clean up the tcdp data structure and close the port.
+ */
+void
+tcd_teardown(thread_communication_data_t *tcdp)
+{
+	if (tcdp->tcd_poolp != NULL)
+		tpool_abandon(tcdp->tcd_poolp);
+	tcdp->tcd_poolp = NULL;
+	tcdp->tcd_server_id = 0;
+	free_sigev_handler(tcdp);
+}
diff --git a/usr/src/lib/libc/port/rt/sigev_thread.h b/usr/src/lib/libc/port/rt/sigev_thread.h
new file mode 100644
index 0000000000..943cb8ab23
--- /dev/null
+++ b/usr/src/lib/libc/port/rt/sigev_thread.h
@@ -0,0 +1,117 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SIGEV_THREAD_H
+#define	_SIGEV_THREAD_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#include <signal.h>
+#include <port.h>
+#include <mqueue.h>
+#include <time.h>
+#include <limits.h>
+#include <semaphore.h>
+#include <thread_pool.h>
+
+#define	SIGEV_THREAD_TERM	1
+
+typedef enum {TIMER = 1, MQ, AIO} subsystem_t;	/* Calling sub-system */
+
+typedef struct {
+	void (*std_func)(union sigval);	/* User-defined notification function */
+	union sigval std_arg;	/* Parameter of user-defined notification fct */
+} sigev_thread_data_t;
+
+typedef struct thread_communication_data {
+	struct thread_communication_data *tcd_next;
+	struct sigevent	tcd_notif;	/* encapsulates usr fct and usr vals */
+	pthread_attr_t	tcd_user_attr;	/* copy of caller's attributes */
+	pthread_attr_t	*tcd_attrp;	/* NULL if caller passed NULL */
+	int		tcd_port;	/* port this spawner is controlling */
+	thread_t	tcd_server_id;	/* thread id of server thread */
+	subsystem_t	tcd_subsystem;	/* event generating subsystem */
+	tpool_t		*tcd_poolp;	/* worker thread pool */
+	/* for creation/termination synchronization protocol */
+	mutex_t		tcd_lock;
+	cond_t		tcd_cv;
+	/* subsystem-specific data */
+	union {
+		struct {
+			int	overruns;	/* number of overruns */
+		} timer;
+		struct {
+			int	msg_enabled;	/* notification enabled */
+			int	msg_closing;	/* mq_close() is waiting */
+			sem_t	*msg_avail;	/* wait for message available */
+			void	*msg_object;	/* mqd_t */
+			void	*msg_userval;	/* notification user value */
+		} mqueue;
+	} tcd_object;
+} thread_communication_data_t;
+
+#define	tcd_overruns	tcd_object.timer.overruns
+
+#define	tcd_msg_enabled	tcd_object.mqueue.msg_enabled
+#define	tcd_msg_closing	tcd_object.mqueue.msg_closing
+#define	tcd_msg_avail	tcd_object.mqueue.msg_avail
+#define	tcd_msg_object	tcd_object.mqueue.msg_object
+#define	tcd_msg_userval	tcd_object.mqueue.msg_userval
+
+/* Generic functions common to all entities */
+extern thread_communication_data_t *setup_sigev_handler(
+		const struct sigevent *, subsystem_t);
+extern void free_sigev_handler(thread_communication_data_t *);
+extern int launch_spawner(thread_communication_data_t *);
+extern void tcd_teardown(thread_communication_data_t *);
+
+/* Additional functions for different entities */
+extern void *timer_spawner(void *);
+extern int del_sigev_timer(timer_t);
+extern int sigev_timer_getoverrun(timer_t);
+extern void *mqueue_spawner(void *);
+extern void del_sigev_mq(thread_communication_data_t *);
+extern void *aio_spawner(void *);
+
+/* Private interfaces elsewhere in libc */
+extern int _pthread_attr_clone(pthread_attr_t *, const pthread_attr_t *);
+extern int _pthread_attr_equal(const pthread_attr_t *, const pthread_attr_t *);
+extern int _port_dispatch(int, int, int, int, uintptr_t, void *);
+
+extern thread_communication_data_t *sigev_aio_tcd;
+
+extern int timer_max;
+extern thread_communication_data_t **timer_tcd;
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SIGEV_THREAD_H */
diff --git a/usr/src/lib/libc/port/sys/fsync.c b/usr/src/lib/libc/port/sys/fsync.c
index d6827f60f3..f727d5914f 100644
--- a/usr/src/lib/libc/port/sys/fsync.c
+++ b/usr/src/lib/libc/port/sys/fsync.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -31,15 +31,20 @@
 
 /*
  * fsync(int fd)
- *
+ * fdatasync(int fd)
  */
 #include "synonyms.h"
-#include <sys/types.h>
 #include "libc.h"
-#include "sys/file.h"
+#include <sys/file.h>
 
 int
 _fsync(int fd)
 {
 	return (__fdsync(fd, FSYNC));
 }
+
+int
+fdatasync(int fd)
+{
+	return (__fdsync(fd, FDSYNC));
+}
diff --git a/usr/src/lib/libc/port/sys/sigstack.c b/usr/src/lib/libc/port/sys/sigstack.c
index cf4335f2a2..9f34b2386a 100644
--- a/usr/src/lib/libc/port/sys/sigstack.c
+++ b/usr/src/lib/libc/port/sys/sigstack.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -30,6 +29,8 @@
 
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
+#pragma weak sigstack = _sigstack
+
 #include "synonyms.h"
 #include <sys/types.h>
 #include <sys/ucontext.h>
diff --git a/usr/src/lib/libc/port/threads/assfail.c b/usr/src/lib/libc/port/threads/assfail.c
index e64aaa87a5..989a36923a 100644
--- a/usr/src/lib/libc/port/threads/assfail.c
+++ b/usr/src/lib/libc/port/threads/assfail.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -91,8 +91,8 @@ Abort(const char *msg)
  * Write a panic message w/o grabbing any locks other than assert_lock.
  * We have no idea what locks are held at this point.
  */
-void
-thr_panic(const char *why)
+static void
+common_panic(const char *head, const char *why)
 {
 	char msg[400];	/* no panic() message in the library is this long */
 	ulwp_t *self;
@@ -103,7 +103,7 @@ thr_panic(const char *why)
 	(void) _private_lwp_mutex_lock(&assert_lock);
 
 	(void) _private_memset(msg, 0, sizeof (msg));
-	(void) strcpy(msg, "*** libc thread failure: ");
+	(void) strcpy(msg, head);
 	len1 = strlen(msg);
 	len2 = strlen(why);
 	if (len1 + len2 >= sizeof (msg))
@@ -116,6 +116,18 @@ thr_panic(const char *why)
 	Abort(msg);
 }
 
+void
+thr_panic(const char *why)
+{
+	common_panic("*** libc thread failure: ", why);
+}
+
+void
+aio_panic(const char *why)
+{
+	common_panic("*** libc aio system failure: ", why);
+}
+
 /*
  * Utility function for converting a long integer to a string, avoiding stdio.
  * 'base' must be one of 10 or 16
@@ -370,7 +382,8 @@ thread_error(const char *msg)
  * We use __assfail() because the libc __assert() calls
  * gettext() which calls malloc() which grabs a mutex.
  * We do everything without calling standard i/o.
- * _assfail() is an exported function, __assfail() is private to libc.
+ * assfail() and _assfail() are exported functions;
+ * __assfail() is private to libc.
  */
 #pragma weak _assfail = __assfail
 void
@@ -416,3 +429,17 @@ __assfail(const char *assertion, const char *filename, int line_num)
 	 */
 	Abort(buf);
 }
+
+/*
+ * We define and export this version of assfail() just because libaio
+ * used to define and export it, needlessly.  Now that libaio is folded
+ * into libc, we need to continue this for ABI/version reasons.
+ * We don't use "#pragma weak assfail __assfail" in order to avoid
+ * warnings from the check_fnames utility at build time for libraries
+ * that define their own version of assfail().
+ */
+void
+assfail(const char *assertion, const char *filename, int line_num)
+{
+	__assfail(assertion, filename, line_num);
+}
diff --git a/usr/src/lib/libc/port/threads/pthr_attr.c b/usr/src/lib/libc/port/threads/pthr_attr.c
index 865c573dd0..bcae664e13 100644
--- a/usr/src/lib/libc/port/threads/pthr_attr.c
+++ b/usr/src/lib/libc/port/threads/pthr_attr.c
@@ -88,7 +88,6 @@ _pthread_attr_destroy(pthread_attr_t *attr)
 
 /*
  * _pthread_attr_clone: make a copy of a pthread_attr_t.
- * This is a consolidation-private interface, for librt.
  */
 int
 _pthread_attr_clone(pthread_attr_t *attr, const pthread_attr_t *old_attr)
@@ -231,7 +230,7 @@ _pthread_attr_getdetachstate(const pthread_attr_t *attr, int *detachstate)
 /*
  * pthread_attr_setdaemonstate_np: sets the daemon state to DAEMON or NONDAEMON.
  * PTHREAD_CREATE_DAEMON is equivalent to thr_create(THR_DAEMON).
- * For now, this is a consolidation-private interface for librt.
+ * For now, this is a private interface in libc.
  */
 int
 _pthread_attr_setdaemonstate_np(pthread_attr_t *attr, int daemonstate)
@@ -249,7 +248,7 @@ _pthread_attr_setdaemonstate_np(pthread_attr_t *attr, int daemonstate)
 
 /*
  * pthread_attr_getdaemonstate_np: gets the daemon state.
- * For now, this is a consolidation-private interface for librt.
+ * For now, this is a private interface in libc.
  */
 int
 _pthread_attr_getdaemonstate_np(const pthread_attr_t *attr, int *daemonstate)
diff --git a/usr/src/lib/libc/port/threads/pthread.c b/usr/src/lib/libc/port/threads/pthread.c
index 5838a5aff7..2215647391 100644
--- a/usr/src/lib/libc/port/threads/pthread.c
+++ b/usr/src/lib/libc/port/threads/pthread.c
@@ -84,7 +84,7 @@ _pthread_create(pthread_t *thread, const pthread_attr_t *attr,
 					return (EINVAL);
 				mapped = 1;
 				mappedpri = priority;
-				priority = _map_rtpri_to_gp(priority);
+				priority = map_rtpri_to_gp(priority);
 				ASSERT(priority >= THREAD_MIN_PRIORITY &&
 				    priority <= THREAD_MAX_PRIORITY);
 			}
@@ -236,7 +236,7 @@ _thread_setschedparam_main(pthread_t tid, int policy,
 			}
 			mapped = 1;
 			mappedprio = prio;
-			prio = _map_rtpri_to_gp(prio);
+			prio = map_rtpri_to_gp(prio);
 			ASSERT(prio >= THREAD_MIN_PRIORITY &&
 			    prio <= THREAD_MAX_PRIORITY);
 		}
diff --git a/usr/src/lib/libc/port/threads/rtsched.c b/usr/src/lib/libc/port/threads/rtsched.c
index 60d3357655..a85118dc5c 100644
--- a/usr/src/lib/libc/port/threads/rtsched.c
+++ b/usr/src/lib/libc/port/threads/rtsched.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -39,9 +39,6 @@
  * The following variables are used for caching information
  * for priocntl TS and RT scheduling classs.
  */
-struct pcclass ts_class, rt_class;
-
-static rtdpent_t *rt_dptbl;	/* RT class parameter table */
 static int rt_rrmin;
 static int rt_rrmax;
 static int rt_fifomin;
@@ -50,87 +47,6 @@ static int rt_othermin;
 static int rt_othermax;
 
 /*
- * Get the RT class parameter table
- */
-static void
-_get_rt_dptbl()
-{
-	struct pcclass	*pccp;
-	pcadmin_t	pcadmin;
-	rtadmin_t	rtadmin;
-	size_t		rtdpsize;
-
-	pccp = &ts_class;
-	/* get class's info */
-	(void) strcpy(pccp->pcc_info.pc_clname, "TS");
-	if (priocntl(P_PID, 0, PC_GETCID, (caddr_t)&(pccp->pcc_info)) < 0)
-		goto out;
-
-	pccp = &rt_class;
-	/* get class's info */
-	(void) strcpy(pccp->pcc_info.pc_clname, "RT");
-	if (priocntl(P_PID, 0, PC_GETCID, (caddr_t)&(pccp->pcc_info)) < 0)
-		goto out;
-
-	/* get RT class dispatch table in rt_dptbl */
-	pcadmin.pc_cid = rt_class.pcc_info.pc_cid;
-	pcadmin.pc_cladmin = (caddr_t)&rtadmin;
-	rtadmin.rt_cmd = RT_GETDPSIZE;
-	if (priocntl(P_PID, 0, PC_ADMIN, (caddr_t)&pcadmin) < 0)
-		goto out;
-	rtdpsize = rtadmin.rt_ndpents * sizeof (rtdpent_t);
-	if (rt_dptbl == NULL && (rt_dptbl = lmalloc(rtdpsize)) == NULL)
-		goto out;
-	rtadmin.rt_dpents = rt_dptbl;
-	rtadmin.rt_cmd = RT_GETDPTBL;
-	if (priocntl(P_PID, 0, PC_ADMIN, (caddr_t)&pcadmin) < 0)
-		goto out;
-	pccp->pcc_primin = 0;
-	pccp->pcc_primax = ((rtinfo_t *)rt_class.pcc_info.pc_clinfo)->rt_maxpri;
-	return;
-out:
-	thr_panic("get_rt_dptbl failed");
-}
-
-/*
- * Translate RT class's user priority to global scheduling priority.
- * This is for priorities coming from librt.
- */
-pri_t
-_map_rtpri_to_gp(pri_t pri)
-{
-	static mutex_t	map_lock = DEFAULTMUTEX;
-	static int	mapped = 0;
-	rtdpent_t	*rtdp;
-	pri_t		gpri;
-
-	if (!mapped) {
-		lmutex_lock(&map_lock);
-		if (!mapped) {		/* do this only once */
-			_get_rt_dptbl();
-			mapped = 1;
-		}
-		lmutex_unlock(&map_lock);
-	}
-
-	/* First case is the default case, other two are seldomly taken */
-	if (pri <= rt_dptbl[rt_class.pcc_primin].rt_globpri) {
-		gpri = pri + rt_dptbl[rt_class.pcc_primin].rt_globpri -
-		    rt_class.pcc_primin;
-	} else if (pri >= rt_dptbl[rt_class.pcc_primax].rt_globpri) {
-		gpri = pri + rt_dptbl[rt_class.pcc_primax].rt_globpri -
-		    rt_class.pcc_primax;
-	} else {
-		gpri =  rt_dptbl[rt_class.pcc_primin].rt_globpri + 1;
-		for (rtdp = rt_dptbl+1; rtdp->rt_globpri < pri; ++rtdp, ++gpri)
-			;
-		if (rtdp->rt_globpri > pri)
-			--gpri;
-	}
-	return (gpri);
-}
-
-/*
  * Set the RT priority/policy of a lwp/thread.
  */
 int
@@ -175,30 +91,16 @@ _thrp_setlwpprio(lwpid_t lwpid, int policy, int pri)
 static void
 _init_rt_prio_ranges()
 {
-	pcinfo_t info;
-
-	(void) strcpy(info.pc_clname, "RT");
-	if (priocntl(P_PID, 0, PC_GETCID, (caddr_t)&info) == -1L)
-		rt_fifomin = rt_rrmin = rt_fifomax = rt_rrmax = 0;
-	else {
-		rtinfo_t *rtinfop = (rtinfo_t *)info.pc_clinfo;
-		rt_fifomin = rt_rrmin = 0;
-		rt_fifomax = rt_rrmax = rtinfop->rt_maxpri;
-	}
-
-	(void) strcpy(info.pc_clname, "TS");
-	if (priocntl(P_PID, 0, PC_GETCID, (caddr_t)&info) == -1L)
-		rt_othermin = rt_othermax = 0;
-	else {
-		tsinfo_t *tsinfop = (tsinfo_t *)info.pc_clinfo;
-		pri_t pri = tsinfop->ts_maxupri / 3;
-		rt_othermin = -pri;
-		rt_othermax = pri;
-	}
+	rt_rrmin = sched_get_priority_min(SCHED_RR);
+	rt_rrmax = sched_get_priority_max(SCHED_RR);
+	rt_fifomin = sched_get_priority_min(SCHED_FIFO);
+	rt_fifomax = sched_get_priority_max(SCHED_FIFO);
+	rt_othermin = sched_get_priority_min(SCHED_OTHER);
+	rt_othermax = sched_get_priority_max(SCHED_OTHER);
 }
 
 /*
- * Validate priorities from librt.
+ * Validate priorities.
  */
 int
 _validate_rt_prio(int policy, int pri)
diff --git a/usr/src/lib/libc/port/threads/scalls.c b/usr/src/lib/libc/port/threads/scalls.c
index b3287040f1..67a2a6341f 100644
--- a/usr/src/lib/libc/port/threads/scalls.c
+++ b/usr/src/lib/libc/port/threads/scalls.c
@@ -206,7 +206,7 @@ _fork1(void)
 		self->ul_siginfo.si_signo = 0;
 		udp->pid = _private_getpid();
 		/* reset the library's data structures to reflect one thread */
-		_postfork1_child();
+		postfork1_child();
 		restore_signals(self);
 		_postfork_child_handler();
 	} else {
@@ -375,8 +375,8 @@ _forkall(void)
 }
 
 /*
- * Externally-callable cancellation prologue and epilogue
- * functions, for cancellation points outside of libc.
+ * Cancellation prologue and epilogue functions,
+ * for cancellation points too complex to include here.
  */
 void
 _cancel_prologue(void)
@@ -504,13 +504,14 @@ __xpg4_putpmsg(int fd, const struct strbuf *ctlptr,
 	PERFORM(_putpmsg(fd, ctlptr, dataptr, band, flags|MSG_XPG4))
 }
 
+#pragma weak nanosleep = _nanosleep
 int
-__nanosleep(const timespec_t *rqtp, timespec_t *rmtp)
+_nanosleep(const timespec_t *rqtp, timespec_t *rmtp)
 {
 	int error;
 
 	PROLOGUE
-	error = ___nanosleep(rqtp, rmtp);
+	error = __nanosleep(rqtp, rmtp);
 	EPILOGUE
 	if (error) {
 		errno = error;
@@ -519,8 +520,9 @@ __nanosleep(const timespec_t *rqtp, timespec_t *rmtp)
 	return (0);
 }
 
+#pragma weak clock_nanosleep = _clock_nanosleep
 int
-__clock_nanosleep(clockid_t clock_id, int flags,
+_clock_nanosleep(clockid_t clock_id, int flags,
 	const timespec_t *rqtp, timespec_t *rmtp)
 {
 	timespec_t reltime;
@@ -550,7 +552,7 @@ __clock_nanosleep(clockid_t clock_id, int flags,
 	}
 restart:
 	PROLOGUE
-	error = ___nanosleep(&reltime, rmtp);
+	error = __nanosleep(&reltime, rmtp);
 	EPILOGUE
 	if (error == 0 && clock_id == CLOCK_HIGHRES) {
 		/*
@@ -607,7 +609,7 @@ _sleep(unsigned int sec)
 	ts.tv_sec = (time_t)sec;
 	ts.tv_nsec = 0;
 	PROLOGUE
-	error = ___nanosleep(&ts, &tsr);
+	error = __nanosleep(&ts, &tsr);
 	EPILOGUE
 	if (error == EINTR) {
 		rem = (unsigned int)tsr.tv_sec;
@@ -626,7 +628,7 @@ _usleep(useconds_t usec)
 	ts.tv_sec = usec / MICROSEC;
 	ts.tv_nsec = (long)(usec % MICROSEC) * 1000;
 	PROLOGUE
-	(void) ___nanosleep(&ts, NULL);
+	(void) __nanosleep(&ts, NULL);
 	EPILOGUE
 	return (0);
 }
@@ -634,9 +636,11 @@ _usleep(useconds_t usec)
 int
 close(int fildes)
 {
+	extern void _aio_close(int);
 	extern int _close(int);
 	int rv;
 
+	_aio_close(fildes);
 	PERFORM(_close(fildes))
 }
 
@@ -856,17 +860,17 @@ _pollsys(struct pollfd *fds, nfds_t nfd, const timespec_t *timeout,
 	return (rv);
 }
 
+#pragma weak sigtimedwait = _sigtimedwait
 int
-__sigtimedwait(const sigset_t *set, siginfo_t *infop,
-	const timespec_t *timeout)
+_sigtimedwait(const sigset_t *set, siginfo_t *infop, const timespec_t *timeout)
 {
-	extern int ___sigtimedwait(const sigset_t *, siginfo_t *,
+	extern int __sigtimedwait(const sigset_t *, siginfo_t *,
 		const timespec_t *);
 	siginfo_t info;
 	int sig;
 
 	PROLOGUE
-	sig = ___sigtimedwait(set, &info, timeout);
+	sig = __sigtimedwait(set, &info, timeout);
 	if (sig == SIGCANCEL &&
 	    (SI_FROMKERNEL(&info) || info.si_code == SI_LWP)) {
 		do_sigcancel();
@@ -883,7 +887,23 @@ __sigtimedwait(const sigset_t *set, siginfo_t *infop,
 int
 _sigwait(sigset_t *set)
 {
-	return (__sigtimedwait(set, NULL, NULL));
+	return (_sigtimedwait(set, NULL, NULL));
+}
+
+#pragma weak sigwaitinfo = _sigwaitinfo
+int
+_sigwaitinfo(const sigset_t *set, siginfo_t *info)
+{
+	return (_sigtimedwait(set, info, NULL));
+}
+
+#pragma weak sigqueue = _sigqueue
+int
+_sigqueue(pid_t pid, int signo, const union sigval value)
+{
+	extern int __sigqueue(pid_t pid, int signo,
+		/* const union sigval */ void *value, int si_code, int block);
+	return (__sigqueue(pid, signo, value.sival_ptr, SI_QUEUE, 0));
 }
 
 int
diff --git a/usr/src/lib/libc/port/threads/sigaction.c b/usr/src/lib/libc/port/threads/sigaction.c
index 670598961f..101b730af3 100644
--- a/usr/src/lib/libc/port/threads/sigaction.c
+++ b/usr/src/lib/libc/port/threads/sigaction.c
@@ -28,6 +28,7 @@
 
 #include "lint.h"
 #include "thr_uberdata.h"
+#include "asyncio.h"
 #include <signal.h>
 #include <siginfo.h>
 #include <ucontext.h>
@@ -154,6 +155,22 @@ call_user_handler(int sig, siginfo_t *sip, ucontext_t *ucp)
 			do_sigcancel();
 			goto out;
 		}
+		/* SIGCANCEL is ignored by default */
+		if (uact.sa_sigaction == SIG_DFL ||
+		    uact.sa_sigaction == SIG_IGN)
+			goto out;
+	}
+
+	/*
+	 * If this thread has been sent SIGAIOCANCEL (SIGLWP) and
+	 * we are an aio worker thread, cancel the aio request.
+	 */
+	if (sig == SIGAIOCANCEL) {
+		aio_worker_t *aiowp = _pthread_getspecific(_aio_key);
+
+		if (sip != NULL && sip->si_code == SI_LWP && aiowp != NULL)
+			_siglongjmp(aiowp->work_jmp_buf, 1);
+		/* SIGLWP is ignored by default */
 		if (uact.sa_sigaction == SIG_DFL ||
 		    uact.sa_sigaction == SIG_IGN)
 			goto out;
@@ -289,10 +306,9 @@ sigacthandler(int sig, siginfo_t *sip, void *uvp)
 	thr_panic("sigacthandler(): __setcontext() returned");
 }
 
-#pragma weak sigaction = _libc_sigaction
-#pragma weak _sigaction = _libc_sigaction
+#pragma weak sigaction = _sigaction
 int
-_libc_sigaction(int sig, const struct sigaction *nact, struct sigaction *oact)
+_sigaction(int sig, const struct sigaction *nact, struct sigaction *oact)
 {
 	ulwp_t *self = curthread;
 	uberdata_t *udp = self->ul_uberdata;
@@ -341,10 +357,11 @@ _libc_sigaction(int sig, const struct sigaction *nact, struct sigaction *oact)
 		if (self->ul_vfork) {
 			if (tact.sa_sigaction != SIG_IGN)
 				tact.sa_sigaction = SIG_DFL;
-		} else if (sig == SIGCANCEL) {
+		} else if (sig == SIGCANCEL || sig == SIGAIOCANCEL) {
 			/*
-			 * Always catch SIGCANCEL.
-			 * We need it for pthread_cancel() to work.
+			 * Always catch these signals.
+			 * We need SIGCANCEL for pthread_cancel() to work.
+			 * We need SIGAIOCANCEL for aio_cancel() to work.
 			 */
 			udp->siguaction[sig].sig_uaction = tact;
 			if (tact.sa_sigaction == SIG_DFL ||
@@ -372,6 +389,16 @@ _libc_sigaction(int sig, const struct sigaction *nact, struct sigaction *oact)
 	    oact->sa_sigaction != SIG_IGN)
 		*oact = oaction;
 
+	/*
+	 * We detect setting the disposition of SIGIO just to set the
+	 * _sigio_enabled flag for the asynchronous i/o (aio) code.
+	 */
+	if (sig == SIGIO && rv == 0 && tactp != NULL) {
+		_sigio_enabled =
+		    (tactp->sa_handler != SIG_DFL &&
+		    tactp->sa_handler != SIG_IGN);
+	}
+
 	if (!self->ul_vfork)
 		lmutex_unlock(&udp->siguaction[sig].sig_lock);
 	return (rv);
@@ -619,18 +646,22 @@ do_sigcancel()
 }
 
 /*
- * Set up the SIGCANCEL handler for threads cancellation
- * (needed only when we have more than one thread).
- * We need no locks here because we are called from
- * finish_init() while still single-threaded.
+ * Set up the SIGCANCEL handler for threads cancellation,
+ * needed only when we have more than one thread,
+ * or the SIGAIOCANCEL handler for aio cancellation,
+ * called when aio is initialized, in __uaio_init().
  */
 void
-init_sigcancel()
+setup_cancelsig(int sig)
 {
 	uberdata_t *udp = curthread->ul_uberdata;
+	mutex_t *mp = &udp->siguaction[sig].sig_lock;
 	struct sigaction act;
 
-	act = udp->siguaction[SIGCANCEL].sig_uaction;
+	ASSERT(sig == SIGCANCEL || sig == SIGAIOCANCEL);
+	lmutex_lock(mp);
+	act = udp->siguaction[sig].sig_uaction;
+	lmutex_unlock(mp);
 	if (act.sa_sigaction == SIG_DFL ||
 	    act.sa_sigaction == SIG_IGN)
 		act.sa_flags = SA_SIGINFO;
@@ -640,5 +671,5 @@ init_sigcancel()
 	}
 	act.sa_sigaction = udp->sigacthandler;
 	act.sa_mask = maskset;
-	(void) __sigaction(SIGCANCEL, &act, NULL);
+	(void) __sigaction(sig, &act, NULL);
 }
diff --git a/usr/src/lib/libc/port/threads/spawn.c b/usr/src/lib/libc/port/threads/spawn.c
index 18a6d68e0b..143db8cf49 100644
--- a/usr/src/lib/libc/port/threads/spawn.c
+++ b/usr/src/lib/libc/port/threads/spawn.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -35,6 +35,7 @@
 #include <sys/ts.h>
 #include <alloca.h>
 #include <spawn.h>
+#include "rtsched.h"
 
 #define	ALL_POSIX_SPAWN_FLAGS			\
 		(POSIX_SPAWN_RESETIDS |		\
@@ -65,6 +66,8 @@ typedef struct file_attr {
 	int		fa_newfiledes;	/* new file descriptor for dup2() */
 } file_attr_t;
 
+extern struct pcclass ts_class, rt_class;
+
 extern	pid_t	_vfork(void);
 #pragma unknown_control_flow(_vfork)
 extern	void	*_private_memset(void *, int, size_t);
@@ -631,7 +634,10 @@ _posix_spawnattr_setflags(
 		 * Populate ts_class and rt_class.
 		 * We will need them in the child of vfork().
 		 */
-		(void) _map_rtpri_to_gp(0);
+		if (rt_class.pcc_state == 0)
+			(void) get_info_by_policy(SCHED_FIFO);
+		if (ts_class.pcc_state == 0)
+			(void) get_info_by_policy(SCHED_OTHER);
 	}
 
 	sap->sa_psflags = flags;
diff --git a/usr/src/lib/libc/port/threads/synch.c b/usr/src/lib/libc/port/threads/synch.c
index 6856ebcc6b..9c6e918620 100644
--- a/usr/src/lib/libc/port/threads/synch.c
+++ b/usr/src/lib/libc/port/threads/synch.c
@@ -2184,6 +2184,77 @@ lmutex_unlock(mutex_t *mp)
 	exit_critical(self);
 }
 
+/*
+ * For specialized code in libc, like the asynchronous i/o code,
+ * the following sig_*() locking primitives are used in order
+ * to make the code asynchronous signal safe.  Signals are
+ * deferred while locks acquired by these functions are held.
+ */
+void
+sig_mutex_lock(mutex_t *mp)
+{
+	sigoff(curthread);
+	(void) _private_mutex_lock(mp);
+}
+
+void
+sig_mutex_unlock(mutex_t *mp)
+{
+	(void) _private_mutex_unlock(mp);
+	sigon(curthread);
+}
+
+int
+sig_mutex_trylock(mutex_t *mp)
+{
+	int error;
+
+	sigoff(curthread);
+	if ((error = _private_mutex_trylock(mp)) != 0)
+		sigon(curthread);
+	return (error);
+}
+
+/*
+ * sig_cond_wait() is a cancellation point.
+ */
+int
+sig_cond_wait(cond_t *cv, mutex_t *mp)
+{
+	int error;
+
+	ASSERT(curthread->ul_sigdefer != 0);
+	_private_testcancel();
+	error = _cond_wait(cv, mp);
+	if (error == EINTR && curthread->ul_cursig) {
+		sig_mutex_unlock(mp);
+		/* take the deferred signal here */
+		sig_mutex_lock(mp);
+	}
+	_private_testcancel();
+	return (error);
+}
+
+/*
+ * sig_cond_reltimedwait() is a cancellation point.
+ */
+int
+sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts)
+{
+	int error;
+
+	ASSERT(curthread->ul_sigdefer != 0);
+	_private_testcancel();
+	error = _cond_reltimedwait(cv, mp, ts);
+	if (error == EINTR && curthread->ul_cursig) {
+		sig_mutex_unlock(mp);
+		/* take the deferred signal here */
+		sig_mutex_lock(mp);
+	}
+	_private_testcancel();
+	return (error);
+}
+
 static int
 shared_mutex_held(mutex_t *mparg)
 {
diff --git a/usr/src/lib/libc/port/threads/thr.c b/usr/src/lib/libc/port/threads/thr.c
index 37310cea56..2a9f9e89e1 100644
--- a/usr/src/lib/libc/port/threads/thr.c
+++ b/usr/src/lib/libc/port/threads/thr.c
@@ -1490,6 +1490,9 @@ libc_init(void)
 	if (self->ul_primarymap && __tnf_probe_notify != NULL)
 		__tnf_probe_notify();
 	/* PROBE_SUPPORT end */
+
+	init_sigev_thread();
+	init_aio();
 }
 
 #pragma fini(libc_fini)
@@ -1562,7 +1565,7 @@ finish_init()
 	/*
 	 * Set up the SIGCANCEL handler for threads cancellation.
 	 */
-	init_sigcancel();
+	setup_cancelsig(SIGCANCEL);
 
 	/*
 	 * Arrange to do special things on exit --
@@ -1596,7 +1599,7 @@ mark_dead_and_buried(ulwp_t *ulwp)
  * Reset our data structures to reflect one lwp.
  */
 void
-_postfork1_child()
+postfork1_child()
 {
 	ulwp_t *self = curthread;
 	uberdata_t *udp = self->ul_uberdata;
@@ -1668,6 +1671,15 @@ _postfork1_child()
 		udp->nzombies = 0;
 	}
 	trim_stack_cache(0);
+
+	/*
+	 * Do post-fork1 processing for subsystems that need it.
+	 */
+	postfork1_child_tpool();
+	postfork1_child_sigev_aio();
+	postfork1_child_sigev_mq();
+	postfork1_child_sigev_timer();
+	postfork1_child_aio();
 }
 
 #pragma weak thr_setprio = _thr_setprio
@@ -1761,7 +1773,7 @@ force_continue(ulwp_t *ulwp)
 		if (ulwp->ul_stopping) {	/* he is stopping himself */
 			ts.tv_sec = 0;		/* give him a chance to run */
 			ts.tv_nsec = 100000;	/* 100 usecs or clock tick */
-			(void) ___nanosleep(&ts, NULL);
+			(void) __nanosleep(&ts, NULL);
 		}
 		if (!ulwp->ul_stopping)		/* he is running now */
 			break;			/* so we are done */
@@ -2203,10 +2215,8 @@ _ti_bind_clear(int bindflag)
  * Also, signals are deferred at thread startup until TLS constructors
  * have all been called, at which time _thr_setup() calls sigon().
  *
- * _sigoff() and _sigon() are external consolidation-private interfaces
- * to sigoff() and sigon(), respectively, in libc.  _sigdeferred() is
- * a consolidation-private interface that returns the deferred signal
- * number, if any.  These are used in libnsl, librt, and libaio.
+ * _sigoff() and _sigon() are external consolidation-private interfaces to
+ * sigoff() and sigon(), respectively, in libc.  These are used in libnsl.
  * Also, _sigoff() and _sigon() are called from dbx's run-time checking
  * (librtc.so) to defer signals during its critical sections (not to be
  * confused with libc critical sections [see exit_critical() above]).
@@ -2223,12 +2233,6 @@ _sigon(void)
 	sigon(curthread);
 }
 
-int
-_sigdeferred(void)
-{
-	return (curthread->ul_cursig);
-}
-
 void
 sigon(ulwp_t *self)
 {
diff --git a/usr/src/lib/libc/port/tpool/thread_pool.c b/usr/src/lib/libc/port/tpool/thread_pool.c
new file mode 100644
index 0000000000..5042f60301
--- /dev/null
+++ b/usr/src/lib/libc/port/tpool/thread_pool.c
@@ -0,0 +1,560 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include "synonyms.h"
+#include "thr_uberdata.h"
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include "thread_pool_impl.h"
+
+static mutex_t thread_pool_lock = DEFAULTMUTEX;
+static tpool_t *thread_pools = NULL;
+
+static void
+delete_pool(tpool_t *tpool)
+{
+	tpool_job_t *job;
+
+	ASSERT(tpool->tp_current == 0 && tpool->tp_active == NULL);
+
+	/*
+	 * Unlink the pool from the global list of all pools.
+	 */
+	lmutex_lock(&thread_pool_lock);
+	if (thread_pools == tpool)
+		thread_pools = tpool->tp_forw;
+	if (thread_pools == tpool)
+		thread_pools = NULL;
+	else {
+		tpool->tp_back->tp_forw = tpool->tp_forw;
+		tpool->tp_forw->tp_back = tpool->tp_back;
+	}
+	lmutex_unlock(&thread_pool_lock);
+
+	/*
+	 * There should be no pending jobs, but just in case...
+	 */
+	for (job = tpool->tp_head; job != NULL; job = tpool->tp_head) {
+		tpool->tp_head = job->tpj_next;
+		lfree(job, sizeof (*job));
+	}
+	(void) pthread_attr_destroy(&tpool->tp_attr);
+	lfree(tpool, sizeof (*tpool));
+}
+
+/*
+ * Worker thread is terminating.
+ */
+static void
+worker_cleanup(tpool_t *tpool)
+{
+	ASSERT(MUTEX_HELD(&tpool->tp_mutex));
+
+	if (--tpool->tp_current == 0 &&
+	    (tpool->tp_flags & (TP_DESTROY | TP_ABANDON))) {
+		if (tpool->tp_flags & TP_ABANDON) {
+			sig_mutex_unlock(&tpool->tp_mutex);
+			delete_pool(tpool);
+			return;
+		}
+		if (tpool->tp_flags & TP_DESTROY)
+			(void) cond_broadcast(&tpool->tp_busycv);
+	}
+	sig_mutex_unlock(&tpool->tp_mutex);
+}
+
+static void
+notify_waiters(tpool_t *tpool)
+{
+	if (tpool->tp_head == NULL && tpool->tp_active == NULL) {
+		tpool->tp_flags &= ~TP_WAIT;
+		(void) cond_broadcast(&tpool->tp_waitcv);
+	}
+}
+
+/*
+ * Called by a worker thread on return from a tpool_dispatch()d job.
+ */
+static void
+job_cleanup(tpool_t *tpool)
+{
+	pthread_t my_tid = pthread_self();
+	tpool_active_t *activep;
+	tpool_active_t **activepp;
+
+	sig_mutex_lock(&tpool->tp_mutex);
+	/* CSTYLED */
+	for (activepp = &tpool->tp_active;; activepp = &activep->tpa_next) {
+		activep = *activepp;
+		if (activep->tpa_tid == my_tid) {
+			*activepp = activep->tpa_next;
+			break;
+		}
+	}
+	if (tpool->tp_flags & TP_WAIT)
+		notify_waiters(tpool);
+}
+
+static void *
+tpool_worker(void *arg)
+{
+	tpool_t *tpool = (tpool_t *)arg;
+	int elapsed;
+	tpool_job_t *job;
+	void (*func)(void *);
+	tpool_active_t active;
+
+	sig_mutex_lock(&tpool->tp_mutex);
+	pthread_cleanup_push(worker_cleanup, tpool);
+
+	/*
+	 * This is the worker's main loop.
+	 * It will only be left if a timeout or an error has occured.
+	 */
+	active.tpa_tid = pthread_self();
+	for (;;) {
+		elapsed = 0;
+		tpool->tp_idle++;
+		if (tpool->tp_flags & TP_WAIT)
+			notify_waiters(tpool);
+		while ((tpool->tp_head == NULL ||
+		    (tpool->tp_flags & TP_SUSPEND)) &&
+		    !(tpool->tp_flags & (TP_DESTROY | TP_ABANDON))) {
+			if (tpool->tp_current <= tpool->tp_minimum ||
+			    tpool->tp_linger == 0) {
+				(void) sig_cond_wait(&tpool->tp_workcv,
+				    &tpool->tp_mutex);
+			} else {
+				timestruc_t timeout;
+
+				timeout.tv_sec = tpool->tp_linger;
+				timeout.tv_nsec = 0;
+				if (sig_cond_reltimedwait(&tpool->tp_workcv,
+				    &tpool->tp_mutex, &timeout) != 0) {
+					elapsed = 1;
+					break;
+				}
+			}
+		}
+		tpool->tp_idle--;
+		if (tpool->tp_flags & TP_DESTROY)
+			break;
+		if (tpool->tp_flags & TP_ABANDON) {
+			/* can't abandon a suspended pool */
+			if (tpool->tp_flags & TP_SUSPEND) {
+				tpool->tp_flags &= ~TP_SUSPEND;
+				(void) cond_broadcast(&tpool->tp_workcv);
+			}
+			if (tpool->tp_head == NULL)
+				break;
+		}
+		if ((job = tpool->tp_head) != NULL &&
+		    !(tpool->tp_flags & TP_SUSPEND)) {
+			elapsed = 0;
+			func = job->tpj_func;
+			arg = job->tpj_arg;
+			tpool->tp_head = job->tpj_next;
+			if (job == tpool->tp_tail)
+				tpool->tp_tail = NULL;
+			tpool->tp_njobs--;
+			active.tpa_next = tpool->tp_active;
+			tpool->tp_active = &active;
+			sig_mutex_unlock(&tpool->tp_mutex);
+			pthread_cleanup_push(job_cleanup, tpool);
+			lfree(job, sizeof (*job));
+			/*
+			 * Call the specified function.
+			 */
+			func(arg);
+			/*
+			 * We don't know what this thread has been doing,
+			 * so we reset its signal mask and cancellation
+			 * state back to the initial values.
+			 */
+			(void) pthread_sigmask(SIG_SETMASK, &maskset, NULL);
+			(void) pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED,
+			    NULL);
+			(void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE,
+			    NULL);
+			pthread_cleanup_pop(1);
+		}
+		if (elapsed && tpool->tp_current > tpool->tp_minimum) {
+			/*
+			 * We timed out and there is no work to be done
+			 * and the number of workers exceeds the minimum.
+			 * Exit now to reduce the size of the pool.
+			 */
+			break;
+		}
+	}
+	pthread_cleanup_pop(1);
+	return (arg);
+}
+
+/*
+ * Create a worker thread, with all signals blocked.
+ */
+static int
+create_worker(tpool_t *tpool)
+{
+	sigset_t oset;
+	int error;
+
+	(void) pthread_sigmask(SIG_SETMASK, &maskset, &oset);
+	error = pthread_create(NULL, &tpool->tp_attr, tpool_worker, tpool);
+	(void) pthread_sigmask(SIG_SETMASK, &oset, NULL);
+	return (error);
+}
+
+tpool_t	*
+tpool_create(uint_t min_threads, uint_t max_threads, uint_t linger,
+	pthread_attr_t *attr)
+{
+	tpool_t	*tpool;
+	void *stackaddr;
+	size_t stacksize;
+	size_t minstack;
+	int error;
+
+	if (min_threads > max_threads || max_threads < 1) {
+		errno = EINVAL;
+		return (NULL);
+	}
+	if (attr != NULL) {
+		if (pthread_attr_getstack(attr, &stackaddr, &stacksize) != 0) {
+			errno = EINVAL;
+			return (NULL);
+		}
+		/*
+		 * Allow only one thread in the pool with a specified stack.
+		 * Require threads to have at least the minimum stack size.
+		 */
+		minstack = thr_min_stack();
+		if (stackaddr != NULL) {
+			if (stacksize < minstack || max_threads != 1) {
+				errno = EINVAL;
+				return (NULL);
+			}
+		} else if (stacksize != 0 && stacksize < minstack) {
+			errno = EINVAL;
+			return (NULL);
+		}
+	}
+
+	tpool = lmalloc(sizeof (*tpool));
+	if (tpool == NULL) {
+		errno = ENOMEM;
+		return (NULL);
+	}
+	(void) mutex_init(&tpool->tp_mutex, USYNC_THREAD, NULL);
+	(void) cond_init(&tpool->tp_busycv, USYNC_THREAD, NULL);
+	(void) cond_init(&tpool->tp_workcv, USYNC_THREAD, NULL);
+	(void) cond_init(&tpool->tp_waitcv, USYNC_THREAD, NULL);
+	tpool->tp_minimum = min_threads;
+	tpool->tp_maximum = max_threads;
+	tpool->tp_linger = linger;
+
+	/*
+	 * We cannot just copy the attribute pointer.
+	 * We need to initialize a new pthread_attr_t structure
+	 * with the values from the user-supplied pthread_attr_t.
+	 * If the attribute pointer is NULL, we need to initialize
+	 * the new pthread_attr_t structure with default values.
+	 */
+	error = _pthread_attr_clone(&tpool->tp_attr, attr);
+	if (error) {
+		lfree(tpool, sizeof (*tpool));
+		errno = error;
+		return (NULL);
+	}
+
+	/* make all pool threads be detached daemon threads */
+	(void) pthread_attr_setdetachstate(&tpool->tp_attr,
+	    PTHREAD_CREATE_DETACHED);
+	(void) _pthread_attr_setdaemonstate_np(&tpool->tp_attr,
+	    PTHREAD_CREATE_DAEMON_NP);
+
+	/* insert into the global list of all thread pools */
+	lmutex_lock(&thread_pool_lock);
+	if (thread_pools == NULL) {
+		tpool->tp_forw = tpool;
+		tpool->tp_back = tpool;
+		thread_pools = tpool;
+	} else {
+		thread_pools->tp_back->tp_forw = tpool;
+		tpool->tp_forw = thread_pools;
+		tpool->tp_back = thread_pools->tp_back;
+		thread_pools->tp_back = tpool;
+	}
+	lmutex_unlock(&thread_pool_lock);
+
+	return (tpool);
+}
+
+/*
+ * Dispatch a work request to the thread pool.
+ * If there are idle workers, awaken one.
+ * Else, if the maximum number of workers has
+ * not been reached, spawn a new worker thread.
+ * Else just return with the job added to the queue.
+ */
+int
+tpool_dispatch(tpool_t *tpool, void (*func)(void *), void *arg)
+{
+	tpool_job_t *job;
+
+	ASSERT(!(tpool->tp_flags & (TP_DESTROY | TP_ABANDON)));
+
+	if ((job = lmalloc(sizeof (*job))) == NULL)
+		return (-1);
+	job->tpj_next = NULL;
+	job->tpj_func = func;
+	job->tpj_arg = arg;
+
+	sig_mutex_lock(&tpool->tp_mutex);
+
+	if (tpool->tp_head == NULL)
+		tpool->tp_head = job;
+	else
+		tpool->tp_tail->tpj_next = job;
+	tpool->tp_tail = job;
+	tpool->tp_njobs++;
+
+	if (!(tpool->tp_flags & TP_SUSPEND)) {
+		if (tpool->tp_idle > 0)
+			(void) cond_signal(&tpool->tp_workcv);
+		else if (tpool->tp_current < tpool->tp_maximum &&
+		    create_worker(tpool) == 0)
+			tpool->tp_current++;
+	}
+
+	sig_mutex_unlock(&tpool->tp_mutex);
+	return (0);
+}
+
+/*
+ * Assumes: by the time tpool_destroy() is called no one will use this
+ * thread pool in any way and no one will try to dispatch entries to it.
+ * Calling tpool_destroy() from a job in the pool will cause deadlock.
+ */
+void
+tpool_destroy(tpool_t *tpool)
+{
+	tpool_active_t *activep;
+
+	ASSERT(!tpool_member(tpool));
+	ASSERT(!(tpool->tp_flags & (TP_DESTROY | TP_ABANDON)));
+
+	sig_mutex_lock(&tpool->tp_mutex);
+	pthread_cleanup_push(sig_mutex_unlock, &tpool->tp_mutex);
+
+	/* mark the pool as being destroyed; wakeup idle workers */
+	tpool->tp_flags |= TP_DESTROY;
+	tpool->tp_flags &= ~TP_SUSPEND;
+	(void) cond_broadcast(&tpool->tp_workcv);
+
+	/* cancel all active workers */
+	for (activep = tpool->tp_active; activep; activep = activep->tpa_next)
+		(void) pthread_cancel(activep->tpa_tid);
+
+	/* wait for all active workers to finish */
+	while (tpool->tp_active != NULL) {
+		tpool->tp_flags |= TP_WAIT;
+		(void) sig_cond_wait(&tpool->tp_waitcv, &tpool->tp_mutex);
+	}
+
+	/* the last worker to terminate will wake us up */
+	while (tpool->tp_current != 0)
+		(void) sig_cond_wait(&tpool->tp_busycv, &tpool->tp_mutex);
+
+	pthread_cleanup_pop(1);	/* sig_mutex_unlock(&tpool->tp_mutex); */
+	delete_pool(tpool);
+}
+
+/*
+ * Like tpool_destroy(), but don't cancel workers or wait for them to finish.
+ * The last worker to terminate will delete the pool.
+ */
+void
+tpool_abandon(tpool_t *tpool)
+{
+	ASSERT(!(tpool->tp_flags & (TP_DESTROY | TP_ABANDON)));
+
+	sig_mutex_lock(&tpool->tp_mutex);
+	if (tpool->tp_current == 0) {
+		/* no workers, just delete the pool */
+		sig_mutex_unlock(&tpool->tp_mutex);
+		delete_pool(tpool);
+	} else {
+		/* wake up all workers, last one will delete the pool */
+		tpool->tp_flags |= TP_ABANDON;
+		tpool->tp_flags &= ~TP_SUSPEND;
+		(void) cond_broadcast(&tpool->tp_workcv);
+		sig_mutex_unlock(&tpool->tp_mutex);
+	}
+}
+
+/*
+ * Wait for all jobs to complete.
+ * Calling tpool_wait() from a job in the pool will cause deadlock.
+ */
+void
+tpool_wait(tpool_t *tpool)
+{
+	ASSERT(!tpool_member(tpool));
+	ASSERT(!(tpool->tp_flags & (TP_DESTROY | TP_ABANDON)));
+
+	sig_mutex_lock(&tpool->tp_mutex);
+	pthread_cleanup_push(sig_mutex_unlock, &tpool->tp_mutex);
+	while (tpool->tp_head != NULL || tpool->tp_active != NULL) {
+		tpool->tp_flags |= TP_WAIT;
+		(void) sig_cond_wait(&tpool->tp_waitcv, &tpool->tp_mutex);
+		ASSERT(!(tpool->tp_flags & (TP_DESTROY | TP_ABANDON)));
+	}
+	pthread_cleanup_pop(1);	/* sig_mutex_unlock(&tpool->tp_mutex); */
+}
+
+void
+tpool_suspend(tpool_t *tpool)
+{
+	ASSERT(!(tpool->tp_flags & (TP_DESTROY | TP_ABANDON)));
+
+	sig_mutex_lock(&tpool->tp_mutex);
+	tpool->tp_flags |= TP_SUSPEND;
+	sig_mutex_unlock(&tpool->tp_mutex);
+}
+
+int
+tpool_suspended(tpool_t *tpool)
+{
+	int suspended;
+
+	ASSERT(!(tpool->tp_flags & (TP_DESTROY | TP_ABANDON)));
+
+	sig_mutex_lock(&tpool->tp_mutex);
+	suspended = (tpool->tp_flags & TP_SUSPEND) != 0;
+	sig_mutex_unlock(&tpool->tp_mutex);
+
+	return (suspended);
+}
+
+void
+tpool_resume(tpool_t *tpool)
+{
+	int excess;
+
+	ASSERT(!(tpool->tp_flags & (TP_DESTROY | TP_ABANDON)));
+
+	sig_mutex_lock(&tpool->tp_mutex);
+	if (!(tpool->tp_flags & TP_SUSPEND)) {
+		sig_mutex_unlock(&tpool->tp_mutex);
+		return;
+	}
+	tpool->tp_flags &= ~TP_SUSPEND;
+	(void) cond_broadcast(&tpool->tp_workcv);
+	excess = tpool->tp_njobs - tpool->tp_idle;
+	while (excess-- > 0 && tpool->tp_current < tpool->tp_maximum) {
+		if (create_worker(tpool) != 0)
+			break;		/* pthread_create() failed */
+		tpool->tp_current++;
+	}
+	sig_mutex_unlock(&tpool->tp_mutex);
+}
+
+int
+tpool_member(tpool_t *tpool)
+{
+	pthread_t my_tid = pthread_self();
+	tpool_active_t *activep;
+
+	ASSERT(!(tpool->tp_flags & (TP_DESTROY | TP_ABANDON)));
+
+	sig_mutex_lock(&tpool->tp_mutex);
+	for (activep = tpool->tp_active; activep; activep = activep->tpa_next) {
+		if (activep->tpa_tid == my_tid) {
+			sig_mutex_unlock(&tpool->tp_mutex);
+			return (1);
+		}
+	}
+	sig_mutex_unlock(&tpool->tp_mutex);
+	return (0);
+}
+
+void
+postfork1_child_tpool(void)
+{
+	pthread_t my_tid = pthread_self();
+	tpool_t *tpool;
+	tpool_job_t *job;
+
+	/*
+	 * All of the thread pool workers are gone, except possibly
+	 * for the current thread, if it is a thread pool worker thread.
+	 * Retain the thread pools, but make them all empty.  Whatever
+	 * jobs were queued or running belong to the parent process.
+	 */
+top:
+	if ((tpool = thread_pools) == NULL)
+		return;
+
+	do {
+		tpool_active_t *activep;
+
+		(void) mutex_init(&tpool->tp_mutex, USYNC_THREAD, NULL);
+		(void) cond_init(&tpool->tp_busycv, USYNC_THREAD, NULL);
+		(void) cond_init(&tpool->tp_workcv, USYNC_THREAD, NULL);
+		(void) cond_init(&tpool->tp_waitcv, USYNC_THREAD, NULL);
+		for (job = tpool->tp_head; job; job = tpool->tp_head) {
+			tpool->tp_head = job->tpj_next;
+			lfree(job, sizeof (*job));
+		}
+		tpool->tp_tail = NULL;
+		tpool->tp_njobs = 0;
+		for (activep = tpool->tp_active; activep;
+		    activep = activep->tpa_next) {
+			if (activep->tpa_tid == my_tid) {
+				activep->tpa_next = NULL;
+				break;
+			}
+		}
+		tpool->tp_idle = 0;
+		tpool->tp_current = 0;
+		if ((tpool->tp_active = activep) != NULL)
+			tpool->tp_current = 1;
+		tpool->tp_flags &= ~TP_WAIT;
+		if (tpool->tp_flags & (TP_DESTROY | TP_ABANDON)) {
+			tpool->tp_flags &= ~TP_DESTROY;
+			tpool->tp_flags |= TP_ABANDON;
+			if (tpool->tp_current == 0) {
+				delete_pool(tpool);
+				goto top;	/* start over */
+			}
+		}
+	} while ((tpool = tpool->tp_forw) != thread_pools);
+}
diff --git a/usr/src/lib/libc/port/tpool/thread_pool_impl.h b/usr/src/lib/libc/port/tpool/thread_pool_impl.h
new file mode 100644
index 0000000000..66611778a0
--- /dev/null
+++ b/usr/src/lib/libc/port/tpool/thread_pool_impl.h
@@ -0,0 +1,99 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _THREAD_POOL_IMPL_H
+#define	_THREAD_POOL_IMPL_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <thread_pool.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Thread pool implementation definitions.
+ * See <thread_pool.h> for interface declarations.
+ */
+
+/*
+ * FIFO queued job
+ */
+typedef struct tpool_job tpool_job_t;
+struct tpool_job {
+	tpool_job_t	*tpj_next;		/* list of jobs */
+	void		(*tpj_func)(void *);	/* function to call */
+	void		*tpj_arg;		/* its argument */
+};
+
+/*
+ * List of active threads, linked through their stacks.
+ */
+typedef struct tpool_active tpool_active_t;
+struct tpool_active {
+	tpool_active_t	*tpa_next;	/* list of active threads */
+	pthread_t	tpa_tid;	/* active thread id */
+};
+
+/*
+ * The thread pool.
+ */
+struct tpool {
+	tpool_t		*tp_forw;	/* circular list of all thread pools */
+	tpool_t		*tp_back;
+	mutex_t		tp_mutex;	/* protects the pool data */
+	cond_t		tp_busycv;	/* synchronization in tpool_dispatch */
+	cond_t		tp_workcv;	/* synchronization with workers */
+	cond_t		tp_waitcv;	/* synchronization in tpool_wait() */
+	tpool_active_t	*tp_active;	/* threads performing work */
+	tpool_job_t	*tp_head;	/* FIFO job queue */
+	tpool_job_t	*tp_tail;
+	pthread_attr_t	tp_attr;	/* attributes of the workers */
+	int		tp_flags;	/* see below */
+	uint_t		tp_linger;	/* seconds before idle workers exit */
+	int		tp_njobs;	/* number of jobs in job queue */
+	int		tp_minimum;	/* minimum number of worker threads */
+	int		tp_maximum;	/* maximum number of worker threads */
+	int		tp_current;	/* current number of worker threads */
+	int		tp_idle;	/* number of idle workers */
+};
+
+/* tp_flags */
+#define	TP_WAIT		0x01		/* waiting in tpool_wait() */
+#define	TP_SUSPEND	0x02		/* pool is being suspended */
+#define	TP_DESTROY	0x04		/* pool is being destroyed */
+#define	TP_ABANDON	0x08		/* pool is abandoned (auto-destroy) */
+
+extern int _pthread_attr_clone(pthread_attr_t *, const pthread_attr_t *);
+
+extern const sigset_t maskset;		/* set of all maskable signals */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _THREAD_POOL_IMPL_H */
diff --git a/usr/src/lib/libc/sparc/Makefile b/usr/src/lib/libc/sparc/Makefile
index 7ce71b3756..50fa5f8c45 100644
--- a/usr/src/lib/libc/sparc/Makefile
+++ b/usr/src/lib/libc/sparc/Makefile
@@ -777,6 +777,24 @@ PORTI18N_COND=			\
 	wcstol_longlong.o	\
 	wcstoul_longlong.o
 
+AIOOBJS=			\
+	aio.o			\
+	aio_alloc.o		\
+	posix_aio.o		\
+
+RTOBJS=				\
+	clock_timer.o		\
+	fallocate.o		\
+	mqueue.o		\
+	pos4obj.o		\
+	sched.o			\
+	sem.o			\
+	shm.o			\
+	sigev_thread.o
+
+TPOOLOBJS=			\
+	thread_pool.o
+
 THREADSOBJS=			\
 	alloc.o			\
 	assfail.o		\
@@ -899,6 +917,9 @@ MOSTOBJS=			\
 	$(PORTSTDIO_W)		\
 	$(PORTSYS)		\
 	$(PORTSYS64)		\
+	$(AIOOBJS)		\
+	$(RTOBJS)		\
+	$(TPOOLOBJS)		\
 	$(THREADSOBJS)		\
 	$(THREADSMACHOBJS)	\
 	$(THREADSASMOBJS)	\
@@ -1004,6 +1025,9 @@ SRCS=							\
 	$(PORTREGEX:%.o=../port/regex/%.c)		\
 	$(PORTSTDIO:%.o=../port/stdio/%.c)		\
 	$(PORTSYS:%.o=../port/sys/%.c)			\
+	$(AIOOBJS:%.o=../port/aio/%.c)			\
+	$(RTOBJS:%.o=../port/rt/%.c)			\
+	$(TPOOLOBJS:%.o=../port/tpool/%.c)		\
 	$(THREADSOBJS:%.o=../port/threads/%.c)		\
 	$(THREADSMACHOBJS:%.o=../$(MACH)/threads/%.c)	\
 	$(UNWINDMACHOBJS:%.o=../port/unwind/%.c)	\
@@ -1033,6 +1057,7 @@ $(MAPFILE):
 
 # Files which need the threads .il inline template
 TIL=				\
+	aio.o			\
 	alloc.o			\
 	assfail.o		\
 	atexit.o		\
@@ -1042,7 +1067,9 @@ TIL=				\
 	errno.o			\
 	getctxt.o		\
 	lwp.o			\
+	ma.o			\
 	machdep.o		\
+	posix_aio.o		\
 	pthr_attr.o		\
 	pthr_barrier.o		\
 	pthr_cond.o		\
@@ -1055,6 +1082,7 @@ TIL=				\
 	scalls.o		\
 	sema.o			\
 	sigaction.o		\
+	sigev_thread.o		\
 	spawn.o			\
 	stack.o			\
 	swapctxt.o		\
@@ -1062,6 +1090,7 @@ TIL=				\
 	tdb_agent.o		\
 	thr.o			\
 	thread_interface.o	\
+	thread_pool.o		\
 	tls.o			\
 	tsd.o			\
 	unwind.o
diff --git a/usr/src/lib/libc/sparcv9/Makefile b/usr/src/lib/libc/sparcv9/Makefile
index e5810b8bd2..3918386307 100644
--- a/usr/src/lib/libc/sparcv9/Makefile
+++ b/usr/src/lib/libc/sparcv9/Makefile
@@ -725,6 +725,24 @@ PORTI18N_COND=			\
 	wcstol_longlong.o	\
 	wcstoul_longlong.o
 
+AIOOBJS=			\
+	aio.o			\
+	aio_alloc.o		\
+	posix_aio.o		\
+
+RTOBJS=				\
+	clock_timer.o		\
+	fallocate.o		\
+	mqueue.o		\
+	pos4obj.o		\
+	sched.o			\
+	sem.o			\
+	shm.o			\
+	sigev_thread.o
+
+TPOOLOBJS=			\
+	thread_pool.o
+
 THREADSOBJS=			\
 	alloc.o			\
 	assfail.o		\
@@ -844,6 +862,9 @@ MOSTOBJS=			\
 	$(PORTSTDIO_W)		\
 	$(PORTSYS)		\
 	$(PORTSYS64)		\
+	$(AIOOBJS)		\
+	$(RTOBJS)		\
+	$(TPOOLOBJS)		\
 	$(THREADSOBJS)		\
 	$(THREADSMACHOBJS)	\
 	$(THREADSASMOBJS)	\
@@ -949,6 +970,9 @@ SRCS=							\
 	$(PORTREGEX:%.o=../port/regex/%.c)		\
 	$(PORTSTDIO:%.o=../port/stdio/%.c)		\
 	$(PORTSYS:%.o=../port/sys/%.c)			\
+	$(AIOOBJS:%.o=../port/aio/%.c)			\
+	$(RTOBJS:%.o=../port/rt/%.c)			\
+	$(TPOOLOBJS:%.o=../port/tpool/%.c)		\
 	$(THREADSOBJS:%.o=../port/threads/%.c)		\
 	$(THREADSMACHOBJS:%.o=../$(MACH)/threads/%.c)	\
 	$(UNWINDMACHOBJS:%.o=../port/unwind/%.c)	\
@@ -977,6 +1001,7 @@ $(MAPFILE):
 
 # Files which need the threads .il inline template
 TIL=				\
+	aio.o			\
 	alloc.o			\
 	assfail.o		\
 	atexit.o		\
@@ -986,7 +1011,9 @@ TIL=				\
 	errno.o			\
 	getctxt.o		\
 	lwp.o			\
+	ma.o			\
 	machdep.o		\
+	posix_aio.o		\
 	pthr_attr.o		\
 	pthr_barrier.o		\
 	pthr_cond.o		\
@@ -999,6 +1026,7 @@ TIL=				\
 	scalls.o		\
 	sema.o			\
 	sigaction.o		\
+	sigev_thread.o		\
 	spawn.o			\
 	stack.o			\
 	swapctxt.o		\
@@ -1006,6 +1034,7 @@ TIL=				\
 	tdb_agent.o		\
 	thr.o			\
 	thread_interface.o	\
+	thread_pool.o		\
 	tls.o			\
 	tsd.o			\
 	unwind.o
diff --git a/usr/src/lib/libc/spec/Makefile.targ b/usr/src/lib/libc/spec/Makefile.targ
index 7b8d73ce11..4243823247 100644
--- a/usr/src/lib/libc/spec/Makefile.targ
+++ b/usr/src/lib/libc/spec/Makefile.targ
@@ -2,9 +2,8 @@
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License").  You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 # ident	"%Z%%M%	%I%	%E% SMI"
@@ -30,7 +29,8 @@
 LIBRARY	=	libc.a
 VERS	=	.1
 
-OBJECTS	=	atomic.o	\
+OBJECTS	=	aio.o		\
+		atomic.o	\
 		data.o		\
 		door.o		\
 		fmtmsg.o	\
@@ -43,6 +43,7 @@ OBJECTS	=	atomic.o	\
 		private.o	\
 		privatedata.o	\
 		regex.o		\
+		rt.o		\
 		stdio.o		\
 		sys.o		\
 		threads.o	\
diff --git a/usr/src/lib/libc/spec/aio.spec b/usr/src/lib/libc/spec/aio.spec
new file mode 100644
index 0000000000..6b2612210e
--- /dev/null
+++ b/usr/src/lib/libc/spec/aio.spec
@@ -0,0 +1,83 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+function	aiocancel
+include		<sys/asynch.h>, <aio.h>
+declaration	int aiocancel(aio_result_t *resultp)
+version		SUNW_1.23
+errno		EACCES EFAULT EINVAL  
+exception	$return == -1
+end
+
+function	aioread
+include		<sys/types.h>, <sys/asynch.h>, <aio.h>
+declaration	int aioread(int fildes, char *bufp, int bufs, \
+			off_t offset, int whence, aio_result_t *resultp)
+version		SUNW_1.23
+errno		EAGAIN EBADF EFAULT EINVAL ENOMEM  
+exception	$return == -1
+end
+
+function	aioread64
+declaration	int aioread64(int fd, caddr_t buf, int bufsz, off64_t offset, \
+			int whence, aio_result_t *resultp)
+arch		i386 sparc
+version		SUNW_1.23
+end
+
+function	aiowait
+include		<sys/asynch.h>, <aio.h>, <sys/time.h>
+declaration	aio_result_t *aiowait(struct timeval *timeout)
+version		SUNW_1.23
+errno		EFAULT EINTR EINVAL  
+exception	$return == (aio_result_t *)-1
+end
+
+function	aiowrite
+include		<sys/types.h>, <sys/asynch.h>, <aio.h>
+declaration	int aiowrite(int fildes, char *bufp, int bufs, \
+			off_t offset, int whence, aio_result_t *resultp)
+version		SUNW_1.23
+errno		EAGAIN EBADF EFAULT EINVAL ENOMEM
+exception	$return == -1
+end
+
+function	aiowrite64
+include		<sys/types.h>, <sys/asynch.h>, <aio.h>
+declaration	int aiowrite64(int fildes, char *bufp, int bufs, \
+			off64_t offset, int whence, aio_result_t *resultp)
+arch		sparc i386
+version		SUNW_1.23
+errno		EAGAIN EBADF EFAULT EINVAL ENOMEM
+exception	$return == -1
+end
+
+function	assfail
+declaration	int assfail(char *a, char *f, int l)
+version		SUNW_1.23
+end
+
diff --git a/usr/src/lib/libc/spec/gen.spec b/usr/src/lib/libc/spec/gen.spec
index 9c547e1a37..2b14689e7a 100644
--- a/usr/src/lib/libc/spec/gen.spec
+++ b/usr/src/lib/libc/spec/gen.spec
@@ -1,7 +1,4 @@
 #
-# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
@@ -21,6 +18,10 @@
 #
 # CDDL HEADER END
 #
+#
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
 # ident	"%Z%%M%	%I%	%E% SMI"
 #
 
@@ -3281,6 +3282,11 @@ errno		EPERM
 exception	$return == -1
 end
 
+function	_sigstack
+weak		sigstack
+version		SUNWprivate_1.1
+end
+
 function	sleep
 include		<unistd.h>
 declaration	unsigned sleep(unsigned seconds)
@@ -4842,19 +4848,6 @@ weak		port_alert
 version		SUNWprivate_1.1
 end
 
-function	port_dispatch
-include		<port.h>
-declaration	int port_dispatch(int port, int flags, int source, int events, \
-		uintptr_t object, void *user)
-version		SUNWprivate_1.1
-errno		EBADF EBADFD EINTR
-end
-
-function	_port_dispatch
-weak		port_dispatch
-version		SUNWprivate_1.1
-end
-
 function	ucred_size
 include		<ucred.h>
 declaration	size_t ucred_size(void)
diff --git a/usr/src/lib/libc/spec/private.spec b/usr/src/lib/libc/spec/private.spec
index 2e26e10c8b..9868be02cf 100644
--- a/usr/src/lib/libc/spec/private.spec
+++ b/usr/src/lib/libc/spec/private.spec
@@ -41,26 +41,6 @@ function	__class_quadruple # used by Sun's old Fortran 77 runtime libraries
 version		SUNWprivate_1.1
 end
 
-function	__clock_getres
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
-function	__clock_gettime
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
-function	__clock_nanosleep
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
-function	__clock_settime
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
 function	__collate_init
 #Declaration	/* Unknown. */
 version		SUNWprivate_1.1
@@ -82,11 +62,6 @@ function	__eucpctowc_gen
 version		SUNWprivate_1.1
 end
 
-function	__fdsync
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
 function	__fgetwc_dense
 #Declaration	/* Unknown. */
 version		SUNWprivate_1.1
@@ -319,11 +294,6 @@ function	__multi_innetgr
 version		SUNWprivate_1.1
 end
 
-function	__nanosleep
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
 function	__nl_langinfo_std
 #Declaration	/* Unknown. */
 version		SUNWprivate_1.1
@@ -364,21 +334,6 @@ function	__regfree_std
 version		SUNWprivate_1.1
 end
 
-function	__signotify
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
-function	__sigqueue
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
-function	__sigtimedwait
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
 function	__strcoll_C
 #Declaration	/* Unknown. */
 version		SUNWprivate_1.1
@@ -436,31 +391,6 @@ function	__time_init
 version		SUNWprivate_1.1
 end
 
-function	__timer_create
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
-function	__timer_delete
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
-function	__timer_getoverrun
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
-function	__timer_gettime
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
-function	__timer_settime
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
 function	__towctrans_bc
 #Declaration	/* Unknown. */
 version		SUNWprivate_1.1
@@ -1376,11 +1306,6 @@ weak		jrand48
 version		SUNWprivate_1.1
 end
 
-function	_kaio
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
 function	_l64a # extends libc/spec/gen.spec l64a
 weak		l64a
 #Declaration	/* Unknown. */
@@ -1744,16 +1669,6 @@ weak		pthread_atfork
 version		SUNWprivate_1.1
 end
 
-function	_pthread_attr_clone
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
-function	_pthread_attr_equal
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
 function	_pthread_attr_destroy
 #Declaration	/* Unknown. */
 version		SUNWprivate_1.1
@@ -1764,11 +1679,6 @@ function	_pthread_attr_getdetachstate
 version		SUNWprivate_1.1
 end
 
-function	_pthread_attr_getdaemonstate_np
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
 function	_pthread_attr_getinheritsched
 #Declaration	/* Unknown. */
 version		SUNWprivate_1.1
@@ -1814,11 +1724,6 @@ function	_pthread_attr_setdetachstate
 version		SUNWprivate_1.1
 end
 
-function	_pthread_attr_setdaemonstate_np
-#Declaration	/* Unknown. */
-version		SUNWprivate_1.1
-end
-
 function	_pthread_attr_setinheritsched
 #Declaration	/* Unknown. */
 version		SUNWprivate_1.1
@@ -2992,10 +2897,6 @@ arch		sparc sparcv9
 version		SUNWprivate_1.1
 end
 
-function	kaio
-version		SUNWprivate_1.1
-end
-
 function	makeut
 version		SUNWprivate_1.1
 end
diff --git a/usr/src/lib/libc/spec/rt.spec b/usr/src/lib/libc/spec/rt.spec
new file mode 100644
index 0000000000..52de0469bd
--- /dev/null
+++ b/usr/src/lib/libc/spec/rt.spec
@@ -0,0 +1,641 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+function	aio_cancel
+include		<aio.h>
+declaration	int aio_cancel(int fildes, struct aiocb *aiocbp)
+version		SUNW_1.23
+errno		EBADF ENOSYS
+end
+
+function	aio_fsync
+include		<aio.h>
+declaration	int aio_fsync(int op, aiocb_t *aiocbp)
+version		SUNW_1.23
+errno		EAGAIN EBADF EINVAL ENOSYS
+end
+
+function	aio_read
+include		<aio.h>
+declaration	int aio_read(struct aiocb *aiocbp)
+version		SUNW_1.23
+errno		EAGAIN ENOSYS EBADF EINVAL ECANCELED EFBIG
+end
+
+function	aio_write
+include		<aio.h>
+declaration	int aio_write(struct aiocb *aiocbp)
+version		SUNW_1.23
+errno		EAGAIN ENOSYS EBADF EINVAL ECANCELED EFBIG
+end
+
+function	aio_return
+include		<aio.h>
+declaration	ssize_t aio_return(struct aiocb * aiocbp)
+version		SUNW_1.23
+errno		EINVAL ENOSYS
+end
+
+function	aio_error
+include		<aio.h>
+declaration	int aio_error(const struct aiocb *aiocbp)
+version		SUNW_1.23
+errno		EINVAL ENOSYS
+end
+
+function	aio_suspend
+include		<aio.h>
+declaration	int aio_suspend(const struct aiocb *const list[], int nent, \
+			const struct timespec *timeout)
+version		SUNW_1.23
+errno		EAGAIN EINTR ENOSYS
+end
+
+function	posix_fallocate
+include		<fcntl.h>
+declaration	int posix_fallocate(int fd, off_t offset, off_t len)
+version		SUNW_1.23
+errno		EBADF EFBIG EINTR EINVAL EIO ENODEV ENOSPC ESPIPE
+end
+
+function	fdatasync
+include		<unistd.h>
+declaration	int fdatasync(int fildes)
+version		SUNW_1.23
+errno		EBADF EINVAL ENOSYS
+end
+
+function	lio_listio
+include		<aio.h>
+declaration	int lio_listio(int mode, struct aiocb *const list[], int nent, \
+			struct sigevent *sig)
+version		SUNW_1.23
+errno		EAGAIN EINVAL EINTR EIO ENOSYS ECANCELED \
+			EINPROGRESS EOVERFLOW EFBIG
+end
+
+function	aio_waitn
+include		<aio.h>
+declaration	int aio_waitn(struct aiocb *list[], uint_t nent, \
+			uint_t *nwait, const struct timespec *timeout)
+version		SUNW_1.23
+errno		EAGAIN EINTR ETIME ENOMEM EFAULT EINVAL
+end
+
+function	aio_cancel64 extends libc/spec/rt.spec aio_cancel
+declaration	int aio_cancel64(int fildes, struct aiocb64 *aiocbp)
+arch		i386 sparc
+version		SUNW_1.23
+end
+
+function	aio_error64 extends libc/spec/rt.spec aio_error
+declaration	int aio_error64(const struct aiocb64 *aiocbp)
+arch		i386 sparc
+version		SUNW_1.23
+end
+
+function	aio_fsync64 extends libc/spec/rt.spec aio_fsync
+declaration	int aio_fsync64(int op, struct aiocb64 *aiocbp)
+arch		i386 sparc
+version		SUNW_1.23
+end
+
+function	aio_read64 extends libc/spec/rt.spec aio_read
+declaration	int aio_read64(struct aiocb64 *aiocbp)
+arch		i386 sparc
+version		SUNW_1.23
+end
+
+function	aio_return64 extends libc/spec/rt.spec aio_return
+declaration	ssize_t aio_return64(struct aiocb64 * aiocbp)
+arch		i386 sparc
+version		SUNW_1.23
+end
+
+function	aio_suspend64 extends libc/spec/rt.spec aio_suspend
+declaration	int aio_suspend64(const struct aiocb64 *const list[], \
+			int nent, const struct timespec *timeout)
+arch		i386 sparc
+version		SUNW_1.23
+end
+
+function	aio_write64 extends libc/spec/rt.spec aio_write
+declaration	int aio_write64(struct aiocb64 *aiocbp)
+arch		i386 sparc
+version		SUNW_1.23
+end
+
+function	lio_listio64 extends libc/spec/rt.spec lio_listio
+declaration	int lio_listio64(int mode, struct aiocb64 *const list[], \
+			int nent, struct sigevent *sig)
+arch		i386 sparc
+version		SUNW_1.23
+end
+
+function	aio_waitn64 extends libc/spec/rt.spec aio_waitn
+declaration	int aio_waitn64(struct aiocb64 *list[], uint_t nent, \
+			uint_t *nwait, const struct timespec *timeout)
+arch		i386 sparc
+version		SUNW_1.23
+end
+
+function	posix_fallocate64 extends libc/spec/rt.spec posix_fallocate
+declaration	int posix_fallocate64(int fd, off64_t offset, off64_t len)
+arch		i386 sparc
+version		SUNW_1.23
+end
+
+function	mq_close
+include		<mqueue.h>
+declaration	int mq_close(mqd_t mqdes)
+version		SUNW_1.23
+errno		EBADF ENOSYS
+exception	$return == -1
+end
+
+function	mq_notify
+include		<mqueue.h>
+declaration	int mq_notify(mqd_t mqdes, const struct sigevent *notification)
+version		SUNW_1.23
+errno		EBADF EBUSY ENOSYS
+exception	$return == -1
+end
+
+function	mq_open
+include		<mqueue.h>
+declaration	mqd_t mq_open(const char *name, int oflag, ...)
+version		SUNW_1.23
+errno		EACCESS EEXIST EINTR EINVAL EMFILE ENAMETOOLONG ENFILE \
+			ENOENT ENOSPC ENOSYS
+exception	$return == (mqd_t)(-1)
+end
+
+function	mq_receive
+include		<mqueue.h>
+declaration	ssize_t mq_receive(mqd_t mqdes, char *msg_ptr, \
+			size_t msg_len, unsigned int *msg_prio)
+version		SUNW_1.23
+errno		EAGAIN EBADF EMSGSIZE EINTR
+exception	$return == (ssize_t)(-1)
+end
+
+function	mq_timedreceive
+include		<mqueue.h>, <time.h>
+declaration	ssize_t mq_timedreceive(mqd_t mqdes, char *msg_ptr, \
+			size_t msg_len, unsigned int *msg_prio, \
+			const struct timespec *abs_timeout)
+version		SUNW_1.23
+errno		EAGAIN EBADF EMSGSIZE EINTR ETIMEDOUT
+exception	$return == (ssize_t)(-1)
+end
+
+function	mq_reltimedreceive_np
+include		<mqueue.h>, <time.h>
+declaration	ssize_t mq_reltimedreceive_np(mqd_t mqdes, char *msg_ptr, \
+			size_t msg_len, unsigned int *msg_prio, \
+			const struct timespec *rel_timeout)
+version		SUNW_1.23
+errno		EAGAIN EBADF EMSGSIZE EINTR ETIMEDOUT
+exception	$return == (ssize_t)(-1)
+end
+
+function	mq_send
+include		<mqueue.h>
+declaration	int mq_send(mqd_t mqdes, const char *msg_ptr, \
+			size_t msg_len, unsigned int msg_prio)
+version		SUNW_1.23
+errno		EAGAIN EBADF EINTR EMSGSIZE
+exception	$return == -1
+end
+
+function	mq_timedsend
+include		<mqueue.h>, <time.h>
+declaration	int mq_timedsend(mqd_t mqdes, const char *msg_ptr, \
+			size_t msg_len, unsigned int msg_prio, \
+			const struct timespec *abs_timeout)
+version		SUNW_1.23
+errno		EAGAIN EBADF EINTR EMSGSIZE ETIMEDOUT
+exception	$return == -1
+end
+
+function	mq_reltimedsend_np
+include		<mqueue.h>, <time.h>
+declaration	int mq_reltimedsend_np(mqd_t mqdes, const char *msg_ptr, \
+			size_t msg_len, unsigned int msg_prio, \
+			const struct timespec *rel_timeout)
+version		SUNW_1.23
+errno		EAGAIN EBADF EINTR EMSGSIZE ETIMEDOUT
+exception	$return == -1
+end
+
+function	mq_setattr
+include		<mqueue.h>
+declaration	int mq_setattr(mqd_t mqdes, \
+			const struct mq_attr *_RESTRICT_KYWD mqstat, \
+			struct mq_attr *_RESTRICT_KYWD omqstat)
+version		SUNW_1.23
+errno		EBADF ENOSYS
+exception	$return == -1
+end
+
+function	mq_getattr
+include		<mqueue.h>
+declaration	int mq_getattr(mqd_t mqdes, struct mq_attr *mqstat)
+version		SUNW_1.23
+errno		EBADF ENOSYS
+exception	$return == -1
+end
+
+function	mq_unlink
+include		<mqueue.h>
+declaration	int mq_unlink(const char *name)
+version		SUNW_1.23
+errno		EACCESS ENAMETOOLONG ENOENT ENOSYS
+exception	$return == -1
+end
+
+function	nanosleep
+include		<time.h>
+declaration	int nanosleep(const struct timespec *rqtp, \
+			struct timespec *rmtp)
+version		SUNW_1.23
+errno		EINTR EINVAL
+end
+
+function	clock_nanosleep
+include		<time.h>
+declaration	int clock_nanosleep(clockid_t clock_id, int flags, \
+			const struct timespec *rqtp, struct timespec *rmtp)
+version		SUNW_1.23
+errno		EINTR EINVAL
+end
+
+function	sched_get_priority_max
+include		<sched.h>
+declaration	int sched_get_priority_max(int policy)
+version		SUNW_1.23
+errno		EINVAL ENOSYS ESRCH
+end
+
+function	sched_get_priority_min
+include		<sched.h>
+declaration	int sched_get_priority_min(int policy)
+version		SUNW_1.23
+errno		EINVAL ENOSYS ESRCH
+end
+
+function	sched_rr_get_interval
+include		<sched.h>
+declaration	int sched_rr_get_interval(pid_t pid, struct timespec *interval)
+version		SUNW_1.23
+errno		EINVAL ENOSYS ESRCH
+end
+
+function	sched_setparam
+include		<sched.h>
+declaration	int sched_setparam(pid_t pid, const struct sched_param *param)
+version		SUNW_1.23
+errno		EINVAL ENOSYS EPERM ESRCH
+end
+
+function	sched_getparam
+include		<sched.h>
+declaration	int sched_getparam(pid_t pid, struct sched_param *param)
+version		SUNW_1.23
+errno		EINVAL ENOSYS EPERM ESRCH
+end
+
+function	sched_setscheduler
+include		<sched.h>
+declaration	int sched_setscheduler(pid_t pid, int policy, \
+			const struct sched_param *param)
+version		SUNW_1.23
+errno		EINVAL ENOSYS EPERM ESRCH
+end
+
+function	sched_getscheduler
+include		<sched.h>
+declaration	int sched_getscheduler(pid_t pid)
+version		SUNW_1.23
+errno		EINVAL ENOSYS EPERM ESRCH
+end
+
+function	sched_yield
+include		<sched.h>
+declaration	int sched_yield(void)
+version		SUNW_1.23
+errno		ENOSYS
+end
+
+function	sem_close
+include		<semaphore.h>
+declaration	int sem_close(sem_t *sem)
+version		SUNW_1.23
+errno		EINVAL ENOSYS
+end
+
+function	sem_destroy
+include		<semaphore.h>
+declaration	int sem_destroy(sem_t *sem)
+version		SUNW_1.23
+errno		EINVAL ENOSYS EBUSY
+end
+
+function	sem_getvalue
+include		<semaphore.h>
+declaration	int sem_getvalue(sem_t *sem, int *sval)
+version		SUNW_1.23
+errno		EINVAL ENOSYS
+end
+
+function	sem_init
+include		<semaphore.h>, <unistd.h>
+declaration	int sem_init(sem_t *sem, int pshared, unsigned int value)
+version		SUNW_1.23
+errno		EINVAL ENOSPC ENOSYS EPERM
+end
+
+function	sem_open
+include		<semaphore.h>, <unistd.h>, <sys/stat.h>
+declaration	sem_t *sem_open(const char *name, int oflag, ...)
+version		SUNW_1.23
+errno		EACCES EEXIST EINTR EINVAL EMFILE ENAMETOOLONG ENFILE \
+			ENOENT ENOSPC ENOSYS
+end
+
+function	sem_post
+include		<semaphore.h>
+declaration	int sem_post(sem_t *sem)
+version		SUNW_1.23
+errno		EINVAL ENOSYS
+end
+
+function	sem_unlink
+include		<semaphore.h>
+declaration	int sem_unlink(const char *name)
+version		SUNW_1.23
+errno		EACCES ENAMETOOLONG ENOENT ENOSYS
+end
+
+function	sem_wait
+include		<semaphore.h>
+declaration	int sem_wait(sem_t *sem)
+version		SUNW_1.23
+errno		EAGAIN EINVAL EINTR ENOSYS EDEADLK
+end
+
+function	sem_timedwait
+include		<semaphore.h> <time.h>
+declaration	int sem_timedwait(sem_t *sem, const timespec_t *abstime)
+version		SUNW_1.23
+errno		EAGAIN EINVAL EINTR ETIMEDOUT EDEADLK
+end
+
+function	sem_reltimedwait_np
+include		<semaphore.h> <time.h>
+declaration	int sem_reltimedwait_np(sem_t *sem, const timespec_t *reltime)
+version		SUNW_1.23
+errno		EAGAIN EINVAL EINTR ETIMEDOUT EDEADLK
+end
+
+function	sem_trywait
+include		<semaphore.h>
+declaration	int sem_trywait(sem_t *sem)
+version		SUNW_1.23
+errno		EAGAIN EINVAL EINTR ENOSYS EDEADLK
+end
+
+function	shm_open
+include		<sys/mman.h>, <sys/types.h>, <sys/stat.h>, <fcntl.h>
+declaration	int shm_open(const char *name, int oflag, mode_t mode)
+version		SUNW_1.23
+errno		EACCES EEXIST EINTR EINVAL EMFILE ENAMETOOLONG ENFILE \
+			ENOENT ENOSPC ENOSYS
+end
+
+function	shm_unlink
+declaration	int shm_unlink(const char *name)
+version		SUNW_1.23
+errno		EACCES ENAMETOOLONG ENOENT ENOSYS
+end
+
+function	sigqueue
+include		<signal.h>
+declaration	int  sigqueue(pid_t  pid, int signo, const union sigval value)
+version		SUNW_1.23
+errno		EAGAIN EINVAL ENOSYS EPERM ESRCH
+end
+
+function	sigwaitinfo
+include		<signal.h>
+declaration	int sigwaitinfo(const sigset_t *_RESTRICT_KYWD set, \
+		siginfo_t *_RESTRICT_KYWD info)
+version		SUNW_1.23
+errno		EINTR ENOSYS EAGAIN EINVAL
+end
+
+function	sigtimedwait
+include		<signal.h>
+declaration	int sigtimedwait(const sigset_t *_RESTRICT_KYWD set, \
+		siginfo_t *_RESTRICT_KYWD info, \
+		const struct timespec *_RESTRICT_KYWD timeout)
+version		SUNW_1.23
+errno		EINTR ENOSYS EAGAIN EINVAL
+end
+
+function	timer_create
+include		<signal.h>, <time.h>
+declaration	int timer_create(clockid_t clock_id, struct sigevent *evp, \
+			timer_t *timerid)
+version		SUNW_1.23
+errno		EAGAIN EINVAL ENOSYS
+end
+
+function	timer_delete
+include		<time.h>
+declaration	int timer_delete(timer_t timerid)
+version		SUNW_1.23
+errno		EINVAL ENOSYS
+end
+
+function	timer_settime
+include		<time.h>
+declaration	int timer_settime(timer_t timerid, int flags, \
+			const struct itimerspec *value, \
+			struct itimerspec *ovalue)
+version		SUNW_1.23
+errno		EINVAL ENOSYS
+end
+
+function	timer_gettime
+include		<time.h>
+declaration	int timer_gettime(timer_t timerid, struct itimerspec *value)
+version		SUNW_1.23
+errno		EINVAL ENOSYS
+end
+
+function	timer_getoverrun
+include		<time.h>
+declaration	int timer_getoverrun(timer_t timerid)
+version		SUNW_1.23
+errno		EINVAL ENOSYS
+end
+
+function	clock_settime
+include		<time.h>
+declaration	int clock_settime(clockid_t clock_id, const struct timespec *tp)
+version		SUNW_1.23
+errno		EINVAL ENOSYS EPERM
+end
+
+function	clock_gettime
+include		<time.h>
+declaration	int clock_gettime(clockid_t clock_id, struct timespec *tp)
+version		SUNW_1.23
+errno		EINVAL ENOSYS EPERM
+end
+
+function	clock_getres
+include		<time.h>
+declaration	int clock_getres(clockid_t clock_id, struct timespec *res)
+version		SUNW_1.23
+errno		EINVAL ENOSYS EPERM
+end
+
+function	_clock_getres
+version		SUNWprivate_1.1
+end
+
+function	_clock_gettime
+version		SUNWprivate_1.1
+end
+
+function	_clock_settime
+version		SUNWprivate_1.1
+end
+
+function	_nanosleep
+version		SUNWprivate_1.1
+end
+
+function	_clock_nanosleep
+version		SUNWprivate_1.1
+end
+
+function	_timer_create
+version		SUNWprivate_1.1
+end
+
+function	_timer_delete
+version		SUNWprivate_1.1
+end
+
+function	_timer_getoverrun
+version		SUNWprivate_1.1
+end
+
+function	_timer_gettime
+version		SUNWprivate_1.1
+end
+
+function	_timer_settime
+version		SUNWprivate_1.1
+end
+
+#
+# Weak Specs
+#
+function	_sem_open
+weak		sem_open
+version		SUNWprivate_1.1
+end
+
+function	_sem_close
+weak		sem_close
+version		SUNWprivate_1.1
+end
+
+function	_sem_unlink
+weak		sem_unlink
+version		SUNWprivate_1.1
+end
+
+function	_sem_init
+weak		sem_init
+version		SUNWprivate_1.1
+end
+
+function	_sem_destroy
+weak		sem_destroy
+version		SUNWprivate_1.1
+end
+
+function	_sem_wait
+weak		sem_wait
+version		SUNWprivate_1.1
+end
+
+function	_sem_timedwait
+weak		sem_timedwait
+version		SUNWprivate_1.1
+end
+
+function	_sem_reltimedwait_np
+weak		sem_reltimedwait_np
+version		SUNWprivate_1.1
+end
+
+function	_sem_trywait
+weak		sem_trywait
+version		SUNWprivate_1.1
+end
+
+function	_sem_post
+weak		sem_post
+version		SUNWprivate_1.1
+end
+
+function	_sem_getvalue
+weak		sem_getvalue
+version		SUNWprivate_1.1
+end
+
+function	_sigwaitinfo
+weak		sigwaitinfo
+version		SUNWprivate_1.1
+end
+
+function	_sigtimedwait
+weak		sigtimedwait
+version		SUNWprivate_1.1
+end
+
+function	_sigqueue
+weak		sigqueue
+version		SUNWprivate_1.1
+end
+
diff --git a/usr/src/lib/libc/spec/sys.spec b/usr/src/lib/libc/spec/sys.spec
index e780453a1d..89aa86beb9 100644
--- a/usr/src/lib/libc/spec/sys.spec
+++ b/usr/src/lib/libc/spec/sys.spec
@@ -1,3 +1,4 @@
+#
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
@@ -17,6 +18,7 @@
 #
 # CDDL HEADER END
 #
+#
 # Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
@@ -2377,11 +2379,6 @@ version		sparc=SYSVABI_1.3 i386=SYSVABI_1.3 sparcv9=SUNW_0.7 \
 binding		nodirect
 end
 
-function	_libc_sigaction
-weak		sigaction
-version		SUNWprivate_1.1
-end
-
 function	sigaltstack
 include		<signal.h>
 declaration	int sigaltstack(const stack_t *_RESTRICT_KYWD ss, \
diff --git a/usr/src/lib/libc/spec/threads.spec b/usr/src/lib/libc/spec/threads.spec
index 1bd84cfbeb..21e22d308a 100644
--- a/usr/src/lib/libc/spec/threads.spec
+++ b/usr/src/lib/libc/spec/threads.spec
@@ -1,4 +1,6 @@
 #
+# CDDL HEADER START
+#
 # The contents of this file are subject to the terms of the
 # Common Development and Distribution License (the "License").
 # You may not use this file except in compliance with the License.
@@ -951,14 +953,6 @@ arch		i386
 version		i386=SUNWprivate_1.1
 end		
 
-function	_cancel_prologue
-version		SUNWprivate_1.1
-end
-
-function	_cancel_epilogue
-version		SUNWprivate_1.1
-end
-
 function	_sigoff
 version		SUNWprivate_1.1
 end
@@ -967,10 +961,6 @@ function	_sigon
 version		SUNWprivate_1.1
 end
 
-function	_sigdeferred
-version		SUNWprivate_1.1
-end
-
 function	_thr_detach
 version		SUNWprivate_1.1
 end