PSARC 2007/661 delete sched_nice

PSARC 2008/039 POSIX scheduling interfaces 6647542 POSIX scheduling should be compatible with Solaris scheduling classes --HG-- rename : usr/src/lib/libc/inc/rtsched.h => deleted_files/usr/src/lib/libc/inc/rtsched.h rename : usr/src/lib/libc/port/threads/rtsched.c => deleted_files/usr/src/lib/libc/port/threads/rtsched.c
author: raf <none@none> 2008-03-20 14:44:26 -0700
committer: raf <none@none> 2008-03-20 14:44:26 -0700
commit: d4204c85a44d2589b9afff2c81db7044e97f2d1d (patch)
tree: c46b62d4b195c18f308a9612c919ac3000e2db40
parent: 32fd284719e6d05c88b17f3b407c96e4aef0c1ee (diff)
download: illumos-gate-d4204c85a44d2589b9afff2c81db7044e97f2d1d.tar.gz
47 files changed, 2046 insertions, 1609 deletions
diff --git a/usr/src/lib/libc/inc/rtsched.h b/deleted_files/usr/src/lib/libc/inc/rtsched.h
index 90ae11c3b2..90ae11c3b2 100644
--- a/usr/src/lib/libc/inc/rtsched.h
+++ b/deleted_files/usr/src/lib/libc/inc/rtsched.h
diff --git a/usr/src/lib/libc/port/threads/rtsched.c b/deleted_files/usr/src/lib/libc/port/threads/rtsched.c
index c76e0b722a..c76e0b722a 100644
--- a/usr/src/lib/libc/port/threads/rtsched.c
+++ b/deleted_files/usr/src/lib/libc/port/threads/rtsched.c
diff --git a/usr/src/cmd/mdb/common/modules/libc/libc.c b/usr/src/cmd/mdb/common/modules/libc/libc.c
index 584aea7ad4..18114b0021 100644
--- a/usr/src/cmd/mdb/common/modules/libc/libc.c
+++ b/usr/src/cmd/mdb/common/modules/libc/libc.c
@@ -477,15 +477,15 @@ d_ulwp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 		ulwp.ul_ustack.ss_size,
 		stack_flags(&ulwp.ul_ustack));
 
-	HD("ix         lwpid      pri        mappedpri  policy     pri_mapped");
+	HD("ix         lwpid      pri        epri       policy     cid");
 	mdb_printf(OFFSTR "%-10d %-10d %-10d %-10d %-10d %d\n",
 		OFFSET(ul_ix),
 		ulwp.ul_ix,
 		ulwp.ul_lwpid,
 		ulwp.ul_pri,
-		ulwp.ul_mappedpri,
+		ulwp.ul_epri,
 		ulwp.ul_policy,
-		ulwp.ul_pri_mapped);
+		ulwp.ul_cid);
 
 	HD("cursig     pleasestop stop       signalled  dead       unwind");
 	mdb_printf(OFFSTR "%-10d ",
@@ -520,15 +520,15 @@ d_ulwp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 		ulwp.ul_max_spinners,
 		ulwp.ul_door_noreserve);
 
-	HD("queue_fifo c'w'defer  e'detect'  async_safe pad1       save_state");
+	HD("queue_fifo c'w'defer  e'detect'  async_safe rt         rtqueued");
 	mdb_printf(OFFSTR "%-10d %-10d %-10d %-10d %-10d %d\n",
 		OFFSET(ul_queue_fifo),
 		ulwp.ul_queue_fifo,
 		ulwp.ul_cond_wait_defer,
 		ulwp.ul_error_detection,
 		ulwp.ul_async_safe,
-		ulwp.ul_pad1,
-		ulwp.ul_save_state);
+		ulwp.ul_rt,
+		ulwp.ul_rtqueued);
 
 	HD("adapt'spin queue_spin critical   sigdefer   vfork");
 	mdb_printf(OFFSTR "%-10d %-10d %-10d %-10d %d\n",
@@ -607,12 +607,11 @@ d_ulwp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 		prt_addr(ulwp.ul_sleepq, 1),
 		prt_addr(ulwp.ul_cvmutex, 0));
 
-	HD("mxchain               epri       emappedpri");
-	mdb_printf(OFFSTR "%s %-10d %d\n",
+	HD("mxchain               save_state");
+	mdb_printf(OFFSTR "%s %d\n",
 		OFFSET(ul_mxchain),
 		prt_addr(ulwp.ul_mxchain, 1),
-		ulwp.ul_epri,
-		ulwp.ul_emappedpri);
+		ulwp.ul_save_state);
 
 	HD("rdlockcnt             rd_rwlock             rd_count");
 	mdb_printf(OFFSTR "%-21d %s %d\n",
@@ -629,7 +628,7 @@ d_ulwp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 		prt_addr(ulwp.ul_tpdp, 0));
 
 	HD("siglink               s'l'spin   s'l'spin2  s'l'sleep  s'l'wakeup");
-	mdb_printf(OFFSTR "%s %-10d %-10d %-10d %-10d\n",
+	mdb_printf(OFFSTR "%s %-10d %-10d %-10d %d\n",
 		OFFSET(ul_siglink),
 		prt_addr(ulwp.ul_siglink, 1),
 		ulwp.ul_spin_lock_spin,
@@ -637,6 +636,13 @@ d_ulwp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 		ulwp.ul_spin_lock_sleep,
 		ulwp.ul_spin_lock_wakeup);
 
+	HD("&queue_root           rtclassid  pilocks");
+	mdb_printf(OFFSTR "%s %-10d %d\n",
+		OFFSET(ul_queue_root),
+		prt_addr((void *)(addr + OFFSET(ul_queue_root)), 1),
+		ulwp.ul_rtclassid,
+		ulwp.ul_pilocks);
+
 	/*
 	 * The remainder of the ulwp_t structure
 	 * is invalid if this is a replacement.
diff --git a/usr/src/cmd/priocntl/rtpriocntl.c b/usr/src/cmd/priocntl/rtpriocntl.c
index 8a7340589f..12efb5dc83 100644
--- a/usr/src/cmd/priocntl/rtpriocntl.c
+++ b/usr/src/cmd/priocntl/rtpriocntl.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -88,8 +88,8 @@ int
 main(int argc, char *argv[])
 {
 	int		c;
-	int		lflag, dflag, sflag, pflag, tflag, rflag, eflag, iflag;
-	int		qflag;
+	int		lflag, dflag, sflag, pflag;
+	int		tflag, rflag, eflag, iflag, qflag;
 	pri_t		rtpri;
 	long		tqntm;
 	long		res;
@@ -101,8 +101,8 @@ main(int argc, char *argv[])
 
 	(void) strlcpy(cmdpath, argv[0], MAXPATHLEN);
 	(void) strlcpy(basenm, basename(argv[0]), BASENMSZ);
-	qflag =
-	lflag = dflag = sflag = pflag = tflag = rflag = eflag = iflag = 0;
+	lflag = dflag = sflag = pflag = 0;
+	tflag = rflag = eflag = iflag = qflag = 0;
 	while ((c = getopt(argc, argv, "ldsp:t:r:q:ec:i:")) != -1) {
 		switch (c) {
 
@@ -246,7 +246,7 @@ main(int argc, char *argv[])
 
 
 /*
- * Print our class name and the maximum configured real-time priority.
+ * Print our class name and the configured user priority range.
  */
 static void
 print_rtinfo(void)
@@ -260,7 +260,7 @@ print_rtinfo(void)
 	if (priocntl(0, 0, PC_GETCID, (caddr_t)&pcinfo) == -1)
 		fatalerr("\tCan't get maximum configured RT priority\n");
 
-	(void) printf("\tMaximum Configured RT Priority: %d\n",
+	(void) printf("\tConfigured RT User Priority Range: 0 through %d\n",
 	    ((rtinfo_t *)pcinfo.pc_clinfo)->rt_maxpri);
 }
 
diff --git a/usr/src/cmd/smserverd/smediad.c b/usr/src/cmd/smserverd/smediad.c
index d9512604e0..b69be16694 100644
--- a/usr/src/cmd/smserverd/smediad.c
+++ b/usr/src/cmd/smserverd/smediad.c
@@ -18,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -2969,7 +2970,9 @@ main(int argc, char **argv)
 		if (pmclose) {
 			(void) pthread_attr_init(&attr);
 			(void) pthread_attr_setscope(&attr,
-				PTHREAD_SCOPE_SYSTEM | PTHREAD_CREATE_DETACHED);
+			    PTHREAD_SCOPE_SYSTEM);
+			(void) pthread_attr_setdetachstate(&attr,
+			    PTHREAD_CREATE_DETACHED);
 			if (pthread_create(NULL, &attr, closedown, NULL) != 0) {
 				syslog(LOG_ERR, gettext(
 					"cannot create closedown thread"));
@@ -2998,7 +3001,9 @@ main(int argc, char **argv)
 		svcstart_level = get_run_level();
 		(void) pthread_attr_init(&attr);
 		(void) pthread_attr_setscope(&attr,
-			PTHREAD_SCOPE_SYSTEM | PTHREAD_CREATE_DETACHED);
+		    PTHREAD_SCOPE_SYSTEM);
+		(void) pthread_attr_setdetachstate(&attr,
+		    PTHREAD_CREATE_DETACHED);
 		if (pthread_create(NULL, &attr, closedown, NULL) != 0) {
 			syslog(LOG_ERR, gettext(
 				"cannot create closedown thread"));
diff --git a/usr/src/head/sched.h b/usr/src/head/sched.h
index 5be792bbd3..7460ff82c3 100644
--- a/usr/src/head/sched.h
+++ b/usr/src/head/sched.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 1993-2003 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -37,24 +37,21 @@ extern "C" {
 #endif
 
 struct sched_param {
-	int	sched_priority;	/* process execution scheduling priority */
-	int	sched_nicelim;	/* nice value limit for SCHED_OTHER policy */
-	int	sched_nice;	/* nice value for SCHED_OTHER policy */
-	int	sched_pad[6];	/* pad to the same size as pcparms_t of */
-				/* sys/priocntl.h */
-				/*	sizeof(sched_priority) +	*/
-				/*	sizeof(pcparms_t.pc_clparms)	*/
+	int	sched_priority;	/* scheduling priority */
+	int	sched_pad[8];
 };
 
 /*
- *	POSIX scheduling policies
+ * POSIX scheduling policies
  */
-#define	SCHED_OTHER	0
-#define	SCHED_FIFO	1	/* run to completion */
-#define	SCHED_RR	2	/* round-robin */
-#define	SCHED_SYS	3	/* sys scheduling class */
-#define	SCHED_IA	4	/* interactive class */
-#define	_SCHED_NEXT	5	/* first unassigned policy number */
+#define	SCHED_OTHER	0	/* traditional time-sharing scheduling class */
+#define	SCHED_FIFO	1	/* real-time class: run to completion */
+#define	SCHED_RR	2	/* real-time class: round-robin */
+#define	SCHED_SYS	3	/* system scheduling class */
+#define	SCHED_IA	4	/* interactive time-sharing class */
+#define	SCHED_FSS	5	/* fair-share scheduling class */
+#define	SCHED_FX	6	/* fixed-priority scheduling class */
+#define	_SCHED_NEXT	7	/* first unassigned policy number */
 
 /*
  * function prototypes
diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile
index e1d52f7c61..9946d95e00 100644
--- a/usr/src/lib/libc/amd64/Makefile
+++ b/usr/src/lib/libc/amd64/Makefile
@@ -743,7 +743,6 @@ THREADSOBJS=			\
 	pthr_mutex.o		\
 	pthr_rwlock.o		\
 	pthread.o		\
-	rtsched.o		\
 	rwlock.o		\
 	scalls.o		\
 	sema.o			\
@@ -917,8 +916,6 @@ CFLAGS64 += -xinline=
 THREAD_DEBUG =
 $(NOT_RELEASE_BUILD)THREAD_DEBUG = -DTHREAD_DEBUG
 
-CFLAGS64 += $(THREAD_DEBUG)
-
 ALTPICS= $(TRACEOBJS:%=pics/%)
 
 $(DYNLIB) := PICS += $(ROOTFS_LIBDIR64)/libc_i18n.a
@@ -926,7 +923,7 @@ $(DYNLIB) := BUILD.SO = $(LD) -o $@ -G $(DYNFLAGS) $(PICS) $(ALTPICS)
 
 MAPFILES =	../port/mapfile-vers mapfile-vers
 
-CPPFLAGS=	-D_REENTRANT -D$(MACH64) -D__$(MACH64) \
+CPPFLAGS=	-D_REENTRANT -D$(MACH64) -D__$(MACH64) $(THREAD_DEBUG) \
 		-I. -I$(LIBCBASE)/inc -I../inc $(CPPFLAGS.master) -I/usr/include
 ASFLAGS=	$(AS_PICFLAGS) -P -D__STDC__ -D_ASM $(CPPFLAGS) \
 		$(amd64_AS_XARCH)
@@ -1021,9 +1018,9 @@ TIL=				\
 	pthr_rwlock.o		\
 	pthread.o		\
 	rand.o			\
-	rtsched.o		\
 	rwlock.o		\
 	scalls.o		\
+	sched.o			\
 	sema.o			\
 	sigaction.o		\
 	sigev_thread.o		\
diff --git a/usr/src/lib/libc/amd64/threads/machdep.c b/usr/src/lib/libc/amd64/threads/machdep.c
index d600fb4cd5..cd96dfb1d9 100644
--- a/usr/src/lib/libc/amd64/threads/machdep.c
+++ b/usr/src/lib/libc/amd64/threads/machdep.c
@@ -18,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -78,11 +79,15 @@ _thr_setup(ulwp_t *self)
 	self->ul_ustack.ss_size = self->ul_stksiz;
 	self->ul_ustack.ss_flags = 0;
 	(void) _private_setustack(&self->ul_ustack);
+
+	update_sched(self);
 	tls_setup();
 
 	/* signals have been deferred until now */
 	sigon(self);
 
+	if (self->ul_cancel_pending == 2 && !self->ul_cancel_disabled)
+		return (NULL);	/* cancelled by pthread_create() */
 	return (self->ul_startpc(self->ul_startarg));
 }
 
@@ -148,7 +153,7 @@ __csigsetjmp(sigjmp_buf env, int savemask, gregset_t rs)
 		ucp->uc_stack = self->ul_ustack;
 	else {
 		ucp->uc_stack.ss_sp =
-			(void *)(self->ul_stktop - self->ul_stksiz);
+		    (void *)(self->ul_stktop - self->ul_stksiz);
 		ucp->uc_stack.ss_size = self->ul_stksiz;
 		ucp->uc_stack.ss_flags = 0;
 	}
diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com
index c773e4f8a0..93e3a5cb69 100644
--- a/usr/src/lib/libc/i386/Makefile.com
+++ b/usr/src/lib/libc/i386/Makefile.com
@@ -788,7 +788,6 @@ THREADSOBJS=			\
 	pthr_mutex.o		\
 	pthr_rwlock.o		\
 	pthread.o		\
-	rtsched.o		\
 	rwlock.o		\
 	scalls.o		\
 	sema.o			\
@@ -964,8 +963,6 @@ CFLAGS += $(XINLINE)
 THREAD_DEBUG =
 $(NOT_RELEASE_BUILD)THREAD_DEBUG = -DTHREAD_DEBUG
 
-CFLAGS += $(THREAD_DEBUG)
-
 ALTPICS= $(TRACEOBJS:%=pics/%)
 
 $(DYNLIB) := PICS += $(ROOTFS_LIBDIR)/libc_i18n.a
@@ -977,7 +974,7 @@ MAPFILES =	../port/mapfile-vers ../i386/mapfile-vers
 # EXTN_CPPFLAGS and EXTN_CFLAGS set in enclosing Makefile
 #
 CFLAGS +=	$(EXTN_CFLAGS)
-CPPFLAGS=	-D_REENTRANT -Di386 $(EXTN_CPPFLAGS) \
+CPPFLAGS=	-D_REENTRANT -Di386 $(EXTN_CPPFLAGS) $(THREAD_DEBUG) \
 		-I$(LIBCBASE)/inc -I../inc $(CPPFLAGS.master)
 ASFLAGS=	$(AS_PICFLAGS) -P -D__STDC__ -D_ASM $(CPPFLAGS) $(i386_AS_XARCH)
 
@@ -1085,9 +1082,9 @@ TIL=				\
 	pthr_rwlock.o		\
 	pthread.o		\
 	rand.o			\
-	rtsched.o		\
 	rwlock.o		\
 	scalls.o		\
+	sched.o			\
 	sema.o			\
 	sigaction.o		\
 	sigev_thread.o		\
diff --git a/usr/src/lib/libc/i386/threads/machdep.c b/usr/src/lib/libc/i386/threads/machdep.c
index 56e7446924..e28e2cd480 100644
--- a/usr/src/lib/libc/i386/threads/machdep.c
+++ b/usr/src/lib/libc/i386/threads/machdep.c
@@ -18,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -104,11 +105,14 @@ _thr_setup(ulwp_t *self)
 	self->ul_ustack.ss_flags = 0;
 	(void) _private_setustack(&self->ul_ustack);
 
+	update_sched(self);
 	tls_setup();
 
 	/* signals have been deferred until now */
 	sigon(self);
 
+	if (self->ul_cancel_pending == 2 && !self->ul_cancel_disabled)
+		return (NULL);	/* cancelled by pthread_create() */
 	return (self->ul_startpc(self->ul_startarg));
 }
 
@@ -171,7 +175,7 @@ __csigsetjmp(greg_t cs, greg_t ss, greg_t gs,
 		ucp->uc_stack = self->ul_ustack;
 	else {
 		ucp->uc_stack.ss_sp =
-			(void *)(self->ul_stktop - self->ul_stksiz);
+		    (void *)(self->ul_stktop - self->ul_stksiz);
 		ucp->uc_stack.ss_size = self->ul_stksiz;
 		ucp->uc_stack.ss_flags = 0;
 	}
diff --git a/usr/src/lib/libc/inc/thr_uberdata.h b/usr/src/lib/libc/inc/thr_uberdata.h
index 0fc0b652d0..4456b3d3df 100644
--- a/usr/src/lib/libc/inc/thr_uberdata.h
+++ b/usr/src/lib/libc/inc/thr_uberdata.h
@@ -207,7 +207,6 @@ typedef union {
  * to be optimized for speed.
  */
 
-
 /* double the default stack size for 64-bit processes */
 #ifdef _LP64
 #define	MINSTACK	(8 * 1024)
@@ -216,18 +215,10 @@ typedef union {
 #define	MINSTACK	(4 * 1024)
 #define	DEFAULTSTACK	(1024 * 1024)
 #endif
-#define	TSD_NKEYS	_POSIX_THREAD_KEYS_MAX
-
-#define	THREAD_MIN_PRIORITY	0
-#define	THREAD_MAX_PRIORITY	127
-
-#define	PRIO_SET	0	/* set priority and policy */
-#define	PRIO_SET_PRIO	1	/* set priority only */
-#define	PRIO_INHERIT	2
-#define	PRIO_DISINHERIT	3
 
 #define	MUTEX_TRY	0
 #define	MUTEX_LOCK	1
+#define	MUTEX_NOCEIL	0x40
 
 #if defined(__x86)
 
@@ -359,35 +350,83 @@ typedef struct {
 
 
 /*
- * Sleep queues for USYNC_THREAD condvars and mutexes.
- * The size and alignment is 64 bytes to reduce cache conflicts.
+ * Sleep queue root for USYNC_THREAD condvars and mutexes.
+ * There is a default queue root for each queue head (see below).
+ * Also, each ulwp_t contains a queue root that can be used
+ * when the thread is enqueued on the queue, if necessary
+ * (when more than one wchan hashes to the same queue head).
+ */
+typedef struct queue_root {
+	struct queue_root	*qr_next;
+	struct queue_root	*qr_prev;
+	struct ulwp		*qr_head;
+	struct ulwp		*qr_tail;
+	void			*qr_wchan;
+	uint32_t		qr_rtcount;
+	uint32_t		qr_qlen;
+	uint32_t		qr_qmax;
+} queue_root_t;
+
+#ifdef _SYSCALL32
+typedef struct queue_root32 {
+	caddr32_t		qr_next;
+	caddr32_t		qr_prev;
+	caddr32_t		qr_head;
+	caddr32_t		qr_tail;
+	caddr32_t		qr_wchan;
+	uint32_t		qr_rtcount;
+	uint32_t		qr_qlen;
+	uint32_t		qr_qmax;
+} queue_root32_t;
+#endif
+
+/*
+ * Sleep queue heads for USYNC_THREAD condvars and mutexes.
+ * The size and alignment is 128 bytes to reduce cache conflicts.
+ * Each queue head points to a list of queue roots, defined above.
+ * Each queue head contains a default queue root for use when only one
+ * is needed.  It is always at the tail of the queue root hash chain.
  */
 typedef union {
-	uint64_t	qh_64[8];
+	uint64_t		qh_64[16];
 	struct {
 		mutex_t		q_lock;
 		uint8_t		q_qcnt;
-		uint8_t		q_pad[7];
-		uint64_t	q_lockcount;
+		uint8_t		q_type;		/* MX or CV */
+		uint8_t		q_pad1[2];
+		uint32_t	q_lockcount;
 		uint32_t	q_qlen;
 		uint32_t	q_qmax;
-		struct ulwp	*q_head;
-		struct ulwp	*q_tail;
+		void		*q_wchan;	/* valid only while locked */
+		struct queue_root *q_root;	/* valid only while locked */
+		struct queue_root *q_hlist;
+#if !defined(_LP64)
+		caddr_t		q_pad2[3];
+#endif
+		queue_root_t	q_def_root;
+		uint32_t	q_hlen;
+		uint32_t	q_hmax;
 	} qh_qh;
 } queue_head_t;
 
 #define	qh_lock		qh_qh.q_lock
 #define	qh_qcnt		qh_qh.q_qcnt
+#define	qh_type		qh_qh.q_type
+#if defined(THREAD_DEBUG)
 #define	qh_lockcount	qh_qh.q_lockcount
 #define	qh_qlen		qh_qh.q_qlen
 #define	qh_qmax		qh_qh.q_qmax
-#define	qh_head		qh_qh.q_head
-#define	qh_tail		qh_qh.q_tail
-
-/* queue types passed to queue_lock() and enqueue() */
+#endif
+#define	qh_wchan	qh_qh.q_wchan
+#define	qh_root		qh_qh.q_root
+#define	qh_hlist	qh_qh.q_hlist
+#define	qh_def_root	qh_qh.q_def_root
+#define	qh_hlen		qh_qh.q_hlen
+#define	qh_hmax		qh_qh.q_hmax
+
+/* queue types passed to queue_lock() */
 #define	MX	0
 #define	CV	1
-#define	FIFOQ	0x10	/* or'ing with FIFOQ asks for FIFO queueing */
 #define	QHASHSHIFT	9			/* number of hashing bits */
 #define	QHASHSIZE	(1 << QHASHSHIFT)	/* power of 2 (1<<9 == 512) */
 #define	QUEUE_HASH(wchan, type)	((uint_t)			\
@@ -397,17 +436,29 @@ typedef union {
 
 extern	queue_head_t	*queue_lock(void *, int);
 extern	void		queue_unlock(queue_head_t *);
-extern	void		enqueue(queue_head_t *, struct ulwp *, void *, int);
-extern	struct ulwp	*dequeue(queue_head_t *, void *, int *);
-extern	struct ulwp	*queue_waiter(queue_head_t *, void *);
-extern	struct ulwp	*queue_unlink(queue_head_t *,
+extern	void		enqueue(queue_head_t *, struct ulwp *, int);
+extern	struct ulwp	*dequeue(queue_head_t *, int *);
+extern	struct ulwp	**queue_slot(queue_head_t *, struct ulwp **, int *);
+extern	struct ulwp	*queue_waiter(queue_head_t *);
+extern	int		dequeue_self(queue_head_t *);
+extern	void		queue_unlink(queue_head_t *,
 				struct ulwp **, struct ulwp *);
-extern	uint8_t		dequeue_self(queue_head_t *, void *);
 extern	void		unsleep_self(void);
 extern	void		spin_lock_set(mutex_t *);
 extern	void		spin_lock_clear(mutex_t *);
 
 /*
+ * Scheduling class information structure.
+ */
+typedef struct {
+	short		pcc_state;
+	short		pcc_policy;
+	pri_t		pcc_primin;
+	pri_t		pcc_primax;
+	pcinfo_t	pcc_info;
+} pcclass_t;
+
+/*
  * Memory block for chain of owned ceiling mutexes.
  */
 typedef struct mxchain {
@@ -491,10 +542,10 @@ typedef struct ulwp {
 	stack_t		ul_ustack;	/* current stack boundaries */
 	int		ul_ix;		/* hash index */
 	lwpid_t		ul_lwpid;	/* thread id, aka the lwp id */
-	pri_t		ul_pri;		/* priority known to the library */
-	pri_t		ul_mappedpri;	/* priority known to the application */
+	pri_t		ul_pri;		/* scheduling priority */
+	pri_t		ul_epri;	/* real-time ceiling priority */
 	char		ul_policy;	/* scheduling policy */
-	char		ul_pri_mapped;	/* != 0 means ul_mappedpri is valid */
+	char		ul_cid;		/* scheduling class id */
 	union {
 		struct {
 			char	cursig;	/* deferred signal number */
@@ -524,8 +575,8 @@ typedef struct ulwp {
 	char		ul_cond_wait_defer;	/* thread_cond_wait_defer */
 	char		ul_error_detection;	/* thread_error_detection */
 	char		ul_async_safe;		/* thread_async_safe */
-	char		ul_pad1;
-	char		ul_save_state;	/* bind_guard() interface to ld.so.1 */
+	char		ul_rt;			/* found on an RT queue */
+	char		ul_rtqueued;		/* was RT when queued */
 	int		ul_adaptive_spin;	/* thread_adaptive_spin */
 	int		ul_queue_spin;		/* thread_queue_spin */
 	volatile int	ul_critical;	/* non-zero == in a critical region */
@@ -543,8 +594,8 @@ typedef struct ulwp {
 	int		ul_errno;	/* per-thread errno */
 	int		*ul_errnop;	/* pointer to errno or self->ul_errno */
 	__cleanup_t	*ul_clnup_hdr;	/* head of cleanup handlers list */
-	uberflags_t *volatile ul_schedctl_called; /* ul_schedctl is set up */
-	volatile sc_shared_t *volatile ul_schedctl;	/* schedctl data */
+	uberflags_t	*ul_schedctl_called;	/* ul_schedctl is set up */
+	volatile sc_shared_t *ul_schedctl;	/* schedctl data */
 	int		ul_bindflags;	/* bind_guard() interface to ld.so.1 */
 	uint_t		ul_libc_locks;	/* count of cancel_safe_mutex_lock()s */
 	tsd_t		*ul_stsd;	/* slow TLS for keys >= TSD_NFAST */
@@ -562,8 +613,7 @@ typedef struct ulwp {
 	queue_head_t	*ul_sleepq;	/* sleep queue thread is waiting on */
 	mutex_t		*ul_cvmutex;	/* mutex dropped when waiting on a cv */
 	mxchain_t	*ul_mxchain;	/* chain of owned ceiling mutexes */
-	pri_t		ul_epri;	/* effective scheduling priority */
-	pri_t		ul_emappedpri;	/* effective mapped priority */
+	int		ul_save_state;	/* bind_guard() interface to ld.so.1 */
 	uint_t		ul_rdlockcnt;	/* # entries in ul_readlock array */
 				/* 0 means there is but a single entry */
 	union {				/* single entry or pointer to array */
@@ -584,6 +634,9 @@ typedef struct ulwp {
 	uint_t		ul_spin_lock_spin2;
 	uint_t		ul_spin_lock_sleep;
 	uint_t		ul_spin_lock_wakeup;
+	queue_root_t	ul_queue_root;	/* root of a sleep queue */
+	id_t		ul_rtclassid;	/* real-time class id */
+	uint_t		ul_pilocks;	/* count of PI locks held */
 		/* the following members *must* be last in the structure */
 		/* they are discarded when ulwp is replaced on thr_exit() */
 	sigset_t	ul_sigmask;	/* thread's current signal mask */
@@ -889,10 +942,10 @@ typedef struct ulwp32 {
 	stack32_t	ul_ustack;	/* current stack boundaries */
 	int		ul_ix;		/* hash index */
 	lwpid_t		ul_lwpid;	/* thread id, aka the lwp id */
-	pri_t		ul_pri;		/* priority known to the library */
-	pri_t		ul_mappedpri;	/* priority known to the application */
+	pri_t		ul_pri;		/* scheduling priority */
+	pri_t		ul_epri;	/* real-time ceiling priority */
 	char		ul_policy;	/* scheduling policy */
-	char		ul_pri_mapped;	/* != 0 means ul_mappedpri is valid */
+	char		ul_cid;		/* scheduling class id */
 	union {
 		struct {
 			char	cursig;	/* deferred signal number */
@@ -922,8 +975,8 @@ typedef struct ulwp32 {
 	char		ul_cond_wait_defer;	/* thread_cond_wait_defer */
 	char		ul_error_detection;	/* thread_error_detection */
 	char		ul_async_safe;		/* thread_async_safe */
-	char		ul_pad1;
-	char		ul_save_state;	/* bind_guard() interface to ld.so.1 */
+	char		ul_rt;			/* found on an RT queue */
+	char		ul_rtqueued;		/* was RT when queued */
 	int		ul_adaptive_spin;	/* thread_adaptive_spin */
 	int		ul_queue_spin;		/* thread_queue_spin */
 	int		ul_critical;	/* non-zero == in a critical region */
@@ -960,8 +1013,7 @@ typedef struct ulwp32 {
 	caddr32_t	ul_sleepq;	/* sleep queue thread is waiting on */
 	caddr32_t	ul_cvmutex;	/* mutex dropped when waiting on a cv */
 	caddr32_t	ul_mxchain;	/* chain of owned ceiling mutexes */
-	pri_t		ul_epri;	/* effective scheduling priority */
-	pri_t		ul_emappedpri;	/* effective mapped priority */
+	int		ul_save_state;	/* bind_guard() interface to ld.so.1 */
 	uint_t		ul_rdlockcnt;	/* # entries in ul_readlock array */
 				/* 0 means there is but a single entry */
 	union {				/* single entry or pointer to array */
@@ -982,6 +1034,9 @@ typedef struct ulwp32 {
 	uint_t		ul_spin_lock_spin2;
 	uint_t		ul_spin_lock_sleep;
 	uint_t		ul_spin_lock_wakeup;
+	queue_root32_t	ul_queue_root;	/* root of a sleep queue */
+	id_t		ul_rtclassid;	/* real-time class id */
+	uint_t		ul_pilocks;	/* count of PI locks held */
 		/* the following members *must* be last in the structure */
 		/* they are discarded when ulwp is replaced on thr_exit() */
 	sigset32_t	ul_sigmask;	/* thread's current signal mask */
@@ -1096,6 +1151,10 @@ extern	greg_t		stkptr(void);
 #define	__attribute__(string)
 #endif
 
+/* Fetch the dispatch (kernel) priority of a thread */
+#define	real_priority(ulwp)	\
+	((ulwp)->ul_schedctl? (ulwp)->ul_schedctl->sc_priority : 0)
+
 /*
  * Implementation functions.  Not visible outside of the library itself.
  */
@@ -1105,8 +1164,8 @@ extern	void	setgregs(ulwp_t *, gregset_t);
 extern	void	thr_panic(const char *);
 #pragma rarely_called(thr_panic)
 extern	ulwp_t	*find_lwp(thread_t);
-extern	int	real_priority(ulwp_t *);
 extern	void	finish_init(void);
+extern	void	update_sched(ulwp_t *);
 extern	void	queue_alloc(void);
 extern	void	tsd_exit(void);
 extern	void	tsd_free(ulwp_t *);
@@ -1356,6 +1415,8 @@ extern	int	__mutex_lock(mutex_t *);
 extern	int	__mutex_trylock(mutex_t *);
 extern	int	__mutex_unlock(mutex_t *);
 extern	int	mutex_is_held(mutex_t *);
+extern	int	mutex_lock_internal(mutex_t *, timespec_t *, int);
+extern	int	mutex_unlock_internal(mutex_t *, int);
 
 extern	int	_cond_init(cond_t *, int, void *);
 extern	int	_cond_signal(cond_t *);
@@ -1385,8 +1446,7 @@ extern	int	_thr_continue(thread_t);
 extern	int	_thr_create(void *, size_t, void *(*)(void *), void *, long,
 			thread_t *);
 extern	int	_thrp_create(void *, size_t, void *(*)(void *), void *, long,
-			thread_t *, pri_t, int, size_t);
-extern	int	_thr_getprio(thread_t, int *);
+			thread_t *, size_t);
 extern	int	_thr_getspecific(thread_key_t, void **);
 extern	int	_thr_join(thread_t, thread_t *, void **);
 extern	int	_thr_keycreate(thread_key_t *, PFrV);
@@ -1407,13 +1467,15 @@ extern	void	_thr_terminate(void *);
 extern	void	_thr_exit(void *);
 extern	void	_thrp_exit(void);
 
+extern	const pcclass_t *get_info_by_class(id_t);
+extern	const pcclass_t *get_info_by_policy(int);
+extern	void	_membar_producer(void);
+extern	void	_membar_consumer(void);
 extern	const thrattr_t *def_thrattr(void);
-extern	int	_thread_setschedparam_main(pthread_t, int,
-			const struct sched_param *, int);
-extern	int	_validate_rt_prio(int, int);
-extern	int	_thrp_setlwpprio(lwpid_t, int, int);
-extern	pri_t	map_rtpri_to_gp(pri_t);
-extern	int	get_info_by_policy(int);
+extern	id_t	setparam(idtype_t, id_t, int, int);
+extern	id_t	setprio(idtype_t, id_t, int, int *);
+extern	id_t	getparam(idtype_t, id_t, int *, struct sched_param *);
+extern	long	_private_priocntl(idtype_t, id_t, int, void *);
 
 /*
  * System call wrappers (direct interfaces to the kernel)
diff --git a/usr/src/lib/libc/port/gen/priocntl.c b/usr/src/lib/libc/port/gen/priocntl.c
index 36bd4ddb38..71aae38c63 100644
--- a/usr/src/lib/libc/port/gen/priocntl.c
+++ b/usr/src/lib/libc/port/gen/priocntl.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -29,20 +29,19 @@
 /*	Copyright (c) 1988 AT&T	*/
 /*	  All Rights Reserved  	*/
 
-
-#include	"synonyms.h"
-#include	<sys/types.h>
-#include	<sys/procset.h>
-#include	<sys/priocntl.h>
-#include	<stdarg.h>
-#include	<errno.h>
+#include "synonyms.h"
+#include <sys/types.h>
+#include <sys/procset.h>
+#include <sys/priocntl.h>
+#include <stdarg.h>
+#include <errno.h>
 
 /*
- * The declaration of __priocntlset() and __priocntl() was in prior releases
- * in <sys/priocntl.h>. They are used to define PC_VERSION at compile time,
- * based on the contents of the header file. This behavior is now changed.
- * Old binaries call __priocntl() and __priocntlset() instead priocntl() and
- * priocntlset(). New binaries call priocntl() and priocntlset().
+ * The declarations of __priocntlset() and __priocntl() were in prior releases
+ * in <sys/priocntl.h>.  They are used to define PC_VERSION at compile time,
+ * based on the contents of the header file.  This behavior is now changed.
+ * Old binaries call __priocntl() and __priocntlset() instead of priocntl()
+ * and priocntlset().  New binaries call priocntl() and priocntlset().
  */
 
 /*
@@ -50,12 +49,6 @@
  */
 extern long __priocntlset(int, procset_t *, int, caddr_t, ...);
 
-/*
- * prototype declaration
- */
-long __priocntl(int, idtype_t, id_t, int, caddr_t);
-
-
 static int pc_vaargs2parms(va_list valist, pc_vaparms_t *vp);
 
 long
@@ -68,6 +61,23 @@ __priocntl(int pc_version, idtype_t idtype, id_t id, int cmd, caddr_t arg)
 	return (__priocntlset(pc_version, &procset, cmd, arg, 0));
 }
 
+/*
+ * Internally to libc, we call this function rather than priocntl()
+ * when the cmd is not PC_GETXPARMS or PC_SETXPARMS.  We do this
+ * for the sake of calling common code in various places.  One of
+ * these places is in spawn() and spawnp(), where we must not call
+ * any function that is exported from libc while in the child of vfork().
+ */
+long
+_private_priocntl(idtype_t idtype, id_t id, int cmd, void *arg)
+{
+	extern long _private__priocntlset(int, procset_t *, int, caddr_t, ...);
+	procset_t procset;
+
+	setprocset(&procset, POP_AND, idtype, id, P_ALL, 0);
+	return (_private__priocntlset(PC_VERSION, &procset, cmd, arg, 0));
+}
+
 
 /*VARARGS3*/
 long
diff --git a/usr/src/lib/libc/port/gen/setpriority.c b/usr/src/lib/libc/port/gen/setpriority.c
index bb5f2848ee..d737c2ca19 100644
--- a/usr/src/lib/libc/port/gen/setpriority.c
+++ b/usr/src/lib/libc/port/gen/setpriority.c
@@ -18,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -41,6 +42,7 @@
 #include "synonyms.h"
 
 #include <string.h>
+#include <limits.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>
@@ -138,7 +140,7 @@ getpriority(int which, id_t who)
 	pcnice.pc_val = 0;
 	pcnice.pc_op = PC_GETNICE;
 
-	if (priocntl(idtype, id, PC_DONICE, (caddr_t)&pcnice) == -1)
+	if (priocntl(idtype, id, PC_DONICE, &pcnice) == -1)
 		return (-1);
 	else
 		return (pcnice.pc_val);
@@ -172,50 +174,41 @@ setpriority(int which, id_t who, int prio)
 	else
 		id = who;
 
-	if (prio > 19)
-		prio = 19;
-	else if (prio < -20)
-		prio = -20;
+	if (prio > NZERO - 1)
+		prio = NZERO - 1;
+	else if (prio < -NZERO)
+		prio = -NZERO;
 
 	pcnice.pc_val = prio;
 	pcnice.pc_op = PC_SETNICE;
 
-	ret = priocntl(idtype, id, PC_DONICE, (caddr_t)&pcnice);
+	ret = priocntl(idtype, id, PC_DONICE, &pcnice);
 
 	if (ret != 0 && errno == EPERM) {
-		int		incr;
-		int		tmp;
 		pcnice_t	gpcnice = { 0, PC_GETNICE };
-		priv_set_t	*pset;
+		priv_set_t	*pset = NULL;
 
 		/*
 		 * The priocntl PC_DONICE subcommand returns EPERM if we lack
 		 * sufficient privileges to carry out the operation, but
-		 * setpriority(3C) needs to return EACCES. We can't just change
-		 * EPERM to EACCES, because there are other conditions which
-		 * legitimately cause EPERM (such as an euid/ruid mismatch
+		 * setpriority(3C) may need to return EACCES.  We can't just
+		 * change EPERM to EACCES, because there are other conditions
+		 * which legitimately cause EPERM (such as an euid/ruid mismatch
 		 * between the current process and the target.).
-		 */
-		if ((tmp = priocntl(idtype, id, PC_DONICE,
-		    (caddr_t)&gpcnice)) != 0)
-			return (tmp);
-
-		incr = prio - gpcnice.pc_val;
-
-		if ((pset = priv_allocset()) == NULL ||
-		    getppriv(PRIV_EFFECTIVE, pset) != 0)
-			return (-1);
-
-		/*
+		 *
 		 * setpriority(3C) must return EACCES if we lack the privilege
 		 * checked for below and we are trying to increase the process
 		 * priority (by lowering the numeric value of its priority).
 		 */
-		if ((incr < 0 || incr > 2 * NZERO) &&
-		    !priv_ismember(pset, "proc_priocntl"))
-			errno = EACCES;
-
-		priv_freeset(pset);
+		if (priocntl(idtype, id, PC_DONICE, &gpcnice) == 0 &&
+		    prio < gpcnice.pc_val) {
+			if ((pset = priv_allocset()) != NULL &&
+			    getppriv(PRIV_EFFECTIVE, pset) == 0 &&
+			    !priv_ismember(pset, "proc_priocntl"))
+				errno = EACCES;
+			if (pset != NULL)
+				priv_freeset(pset);
+		}
 	}
 
 	return (ret);
diff --git a/usr/src/lib/libc/port/rt/sched.c b/usr/src/lib/libc/port/rt/sched.c
index cfa7259a96..701e07f894 100644
--- a/usr/src/lib/libc/port/rt/sched.c
+++ b/usr/src/lib/libc/port/rt/sched.c
@@ -20,245 +20,314 @@
  */
 
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
 #include "synonyms.h"
-#include "mtlib.h"
-#include <sys/types.h>
+#include "thr_uberdata.h"
 #include <sched.h>
-#include <errno.h>
-#include <limits.h>
-#include <unistd.h>
-#include <sys/priocntl.h>
-#include <sys/rtpriocntl.h>
 #include <sys/tspriocntl.h>
-#include <sys/rt.h>
-#include <sys/ts.h>
-#include <thread.h>
-#include <string.h>
-#include <stdlib.h>
-#include "rtsched.h"
+#include <sys/rtpriocntl.h>
+#include <sys/fxpriocntl.h>
 
 /*
- * The following variables are used for caching information
+ * The following array is used for caching information
  * for priocntl scheduling classes.
  */
-struct pcclass ts_class;
-struct pcclass rt_class;
-struct pcclass ia_class;
-struct pcclass sys_class;
+static pcclass_t sched_class[] = {
+	{0, SCHED_OTHER, 0, 0, {-1, "TS",  0}},
+	{0, SCHED_FIFO,	 0, 0, {-1, "RT",  0}},
+	{0, SCHED_RR,	 0, 0, {-1, "RT",  0}},
+	{0, SCHED_SYS,	 0, 0, {0,  "SYS", 0}},
+	{0, SCHED_IA,	 0, 0, {-1, "IA",  0}},
+	{0, SCHED_FSS,	 0, 0, {-1, "FSS", 0}},
+	{0, SCHED_FX,	 0, 0, {-1, "FX",  0}},
+	/*
+	 * Allow unknown (to us) scheduling classes.
+	 * The kernel allows space for exactly 10 scheduling classes
+	 * (see the definitions of 'sclass' and 'nclass' in the kernel).
+	 * We need that number of available slots here.
+	 * If the kernel space is changed, this has to change too.
+	 */
+	{0, -1,		 0, 0, {-1, "",	   0}},
+	{0, -1,		 0, 0, {-1, "",	   0}},
+	{0, -1,		 0, 0, {-1, "",	   0}},
+	{0, -1,		 0, 0, {-1, "",	   0}},
+	{0, -1,		 0, 0, {-1, "",	   0}},
+	{0, -1,		 0, 0, {-1, "",	   0}},
+	{0, -1,		 0, 0, {-1, "",	   0}},
+	{0, -1,		 0, 0, {-1, "",	   0}},
+	{0, -1,		 0, 0, {-1, "",	   0}},
+	{0, -1,		 0, 0, {-1, "",	   0}},
+};
+
+#define	NPOLICY	(sizeof (sched_class) / sizeof (pcclass_t))
+
+#if _SCHED_NEXT != SCHED_FX + 1
+#error "fatal: _SCHED_NEXT != SCHED_FX + 1"
+#endif
+
+static mutex_t class_lock = DEFAULTMUTEX;	/* protects sched_class[] */
 
-static rtdpent_t	*rt_dptbl;	/* RT class parameter table */
-
-typedef struct { /* type definition for generic class-specific parameters */
-	int	pc_clparms[PC_CLINFOSZ];
-} pc_clparms_t;
+/*
+ * Helper function for get_info_by_policy(), below.
+ * Don't let a manufactured policy number duplicate
+ * the class of one of our base policy numbers.
+ */
+static int
+is_base_class(const char *clname)
+{
+	const pcclass_t	*pccp;
+	int		policy;
 
-static int	map_gp_to_rtpri(pri_t);
+	for (policy = 0, pccp = sched_class;
+	    policy < _SCHED_NEXT;
+	    policy++, pccp++) {
+		if (strcmp(clname, pccp->pcc_info.pc_clname) == 0)
+			return (1);
+	}
+	return (0);
+}
 
 /*
- * cache priocntl information on scheduling classes by policy
+ * Cache priocntl information on scheduling class by policy.
  */
-int
+const pcclass_t *
 get_info_by_policy(int policy)
 {
-	char		*pccname;
-	struct pcclass	*pccp;
+	pcclass_t *pccp = &sched_class[policy];
+	pcpri_t pcpri;
+	pri_t prio;
+	int base = 0;
 
-	if (policy < 0) {
+	if ((uint_t)policy >= NPOLICY || pccp->pcc_state < 0) {
 		errno = EINVAL;
-		return (-1);
+		return (NULL);
+	}
+
+	if (pccp->pcc_state > 0)
+		return (pccp);
+
+	lmutex_lock(&class_lock);
+
+	/* get class info (the system class is known to have class-id == 0) */
+	if (pccp->pcc_policy == -1) {
+		/* policy number not defined in <sched.h> */
+		ASSERT(policy >= _SCHED_NEXT);
+		pccp->pcc_info.pc_cid = policy - _SCHED_NEXT;
+		if (_private_priocntl(0, 0, PC_GETCLINFO, &pccp->pcc_info)
+		    == -1 ||
+		    (base = is_base_class(pccp->pcc_info.pc_clname)) != 0) {
+			pccp->pcc_info.pc_clname[0] = '\0';
+			pccp->pcc_info.pc_cid = -1;
+			/*
+			 * If we duplicated a base class, permanently
+			 * disable this policy entry.  Else allow for
+			 * dynamic loading of scheduling classes.
+			 */
+			if (base) {
+				_membar_producer();
+				pccp->pcc_state = -1;
+			}
+			errno = EINVAL;
+			lmutex_unlock(&class_lock);
+			return (NULL);
+		}
+		pccp->pcc_policy = policy;
+	} else if (policy != SCHED_SYS &&
+	    _private_priocntl(0, 0, PC_GETCID, &pccp->pcc_info) == -1) {
+		_membar_producer();
+		pccp->pcc_state = -1;
+		errno = EINVAL;
+		lmutex_unlock(&class_lock);
+		return (NULL);
 	}
 
 	switch (policy) {
-	case SCHED_FIFO:
-	case SCHED_RR:
-		pccp = &rt_class;
-		pccname = "RT";
-		break;
 	case SCHED_OTHER:
-		pccp = &ts_class;
-		pccname = "TS";
-		break;
-	case SCHED_SYS:
-		pccp = &sys_class;
-		pccname = "sys";
-		break;
-	case SCHED_IA:
-		pccp = &ia_class;
-		pccname = "IA";
+		prio = ((tsinfo_t *)pccp->pcc_info.pc_clinfo)->ts_maxupri;
+		pccp->pcc_primin = -prio;
+		pccp->pcc_primax = prio;
 		break;
-	default:
-		return (policy);
-	}
-	if (pccp->pcc_state != 0) {
-		if (pccp->pcc_state < 0)
-			errno = ENOSYS;
-		return (pccp->pcc_state);
-	}
-
-	/* get class's info */
-	(void) strcpy(pccp->pcc_info.pc_clname, pccname);
-	if (policy == SCHED_SYS)
-		pccp->pcc_info.pc_cid = 0;
-	else if (priocntl(P_PID, 0, PC_GETCID, (caddr_t)&(pccp->pcc_info)) < 0)
-		return (-1);
-
-	if (policy == SCHED_FIFO || policy == SCHED_RR) {
-		pcadmin_t	pcadmin;
-		rtadmin_t	rtadmin;
-		size_t		rtdpsize;
-
-		/* get RT class dispatch table in rt_dptbl */
-		pcadmin.pc_cid = rt_class.pcc_info.pc_cid;
-		pcadmin.pc_cladmin = (caddr_t)&rtadmin;
-		rtadmin.rt_cmd = RT_GETDPSIZE;
-		if (priocntl(P_PID, 0, PC_ADMIN, (caddr_t)&pcadmin) < 0)
-			return (-1);
-		rtdpsize = (size_t)(rtadmin.rt_ndpents * sizeof (rtdpent_t));
-		if (rt_dptbl == NULL &&
-		    (rt_dptbl = lmalloc(rtdpsize)) == NULL) {
-			errno = EAGAIN;
-			return (-1);
-		}
-		rtadmin.rt_dpents = rt_dptbl;
-		rtadmin.rt_cmd = RT_GETDPTBL;
-		if (priocntl(P_PID, 0, PC_ADMIN, (caddr_t)&pcadmin) < 0)
-			return (-1);
+	case SCHED_FIFO:
+	case SCHED_RR:
+		prio = ((rtinfo_t *)pccp->pcc_info.pc_clinfo)->rt_maxpri;
 		pccp->pcc_primin = 0;
-		pccp->pcc_primax = ((rtinfo_t *)rt_class.pcc_info.pc_clinfo)->
-		    rt_maxpri;
-	} else if (policy == SCHED_OTHER) {
-		pri_t		prio;
-
-		prio = ((tsinfo_t *)ts_class.pcc_info.pc_clinfo)->ts_maxupri/3;
-		pccp->pcc_primin = -prio;
 		pccp->pcc_primax = prio;
-	} else {
-		/* non-RT scheduling class */
-		pcpri_t		pcpri;
-
+		break;
+	default:
 		/*
-		 * get class's global priority's min, max, and
-		 * translate them into RT priority level (index) via rt_dptbl.
+		 * All other policy numbers, including policy numbers
+		 * not defined in <sched.h>.
 		 */
 		pcpri.pc_cid = pccp->pcc_info.pc_cid;
-		if (priocntl(0, 0, PC_GETPRIRANGE, (caddr_t)&pcpri) < 0)
-			return (-1);
-		pccp->pcc_primax = map_gp_to_rtpri(pcpri.pc_clpmax);
-		pccp->pcc_primin = map_gp_to_rtpri(pcpri.pc_clpmin);
+		if (_private_priocntl(0, 0, PC_GETPRIRANGE, &pcpri) == 0) {
+			pccp->pcc_primin = pcpri.pc_clpmin;
+			pccp->pcc_primax = pcpri.pc_clpmax;
+		}
+		break;
 	}
 
+	_membar_producer();
 	pccp->pcc_state = 1;
-	return (1);
+	lmutex_unlock(&class_lock);
+	return (pccp);
 }
 
-/*
- * Translate global scheduling priority to RT class's user priority.
- * Use the gp values in the rt_dptbl to do a reverse mapping
- * of a given gpri value relative to the index range of rt_dptbl.
- */
-static int
-map_gp_to_rtpri(pri_t gpri)
+const pcclass_t *
+get_info_by_class(id_t classid)
 {
-	rtdpent_t	*rtdp;
-	pri_t		pri;
+	pcinfo_t	pcinfo;
+	pcclass_t	*pccp;
+	int		policy;
 
-	/* need RT class info before we can translate priorities */
-	if (rt_dptbl == NULL && get_info_by_policy(SCHED_FIFO) < 0)
-		return (-1);
+	if (classid < 0) {
+		errno = EINVAL;
+		return (NULL);
+	}
 
-	if (gpri <= rt_dptbl[rt_class.pcc_primin].rt_globpri) {
-		pri = gpri - rt_dptbl[rt_class.pcc_primin].rt_globpri + \
-		    rt_class.pcc_primin;
-	} else if (gpri >= rt_dptbl[rt_class.pcc_primax].rt_globpri) {
-		pri = gpri - rt_dptbl[rt_class.pcc_primax].rt_globpri + \
-		    rt_class.pcc_primax;
-	} else {
-		pri = rt_class.pcc_primin + 1;
-		for (rtdp = rt_dptbl+1; rtdp->rt_globpri < gpri; ++rtdp, ++pri)
-			;
-		if (rtdp->rt_globpri > gpri)
-			--pri;
+	/* determine if we already know this classid */
+	for (policy = 0, pccp = sched_class;
+	    policy < NPOLICY;
+	    policy++, pccp++) {
+		if (pccp->pcc_state > 0 && pccp->pcc_info.pc_cid == classid)
+			return (pccp);
 	}
 
-	return (pri);
+	pcinfo.pc_cid = classid;
+	if (_private_priocntl(0, 0, PC_GETCLINFO, &pcinfo) == -1) {
+		if (classid == 0)	/* no kernel info for sys class */
+			return (get_info_by_policy(SCHED_SYS));
+		return (NULL);
+	}
+
+	for (policy = 0, pccp = sched_class;
+	    policy < NPOLICY;
+	    policy++, pccp++) {
+		if (pccp->pcc_state == 0 &&
+		    strcmp(pcinfo.pc_clname, pccp->pcc_info.pc_clname) == 0)
+			return (get_info_by_policy(pccp->pcc_policy));
+	}
+
+	/*
+	 * We have encountered an unknown (to us) scheduling class.
+	 * Manufacture a policy number for it.  Hopefully we still
+	 * have room in the sched_class[] table.
+	 */
+	policy = _SCHED_NEXT + classid;
+	if (policy >= NPOLICY) {
+		errno = EINVAL;
+		return (NULL);
+	}
+	lmutex_lock(&class_lock);
+	pccp = &sched_class[policy];
+	pccp->pcc_policy = policy;
+	(void) strlcpy(pccp->pcc_info.pc_clname, pcinfo.pc_clname, PC_CLNMSZ);
+	lmutex_unlock(&class_lock);
+	return (get_info_by_policy(pccp->pcc_policy));
 }
 
 /*
- * Translate RT class's user priority to global scheduling priority.
+ * Helper function: get process or lwp current scheduling policy.
  */
-pri_t
-map_rtpri_to_gp(pri_t pri)
+static const pcclass_t *
+get_parms(idtype_t idtype, id_t id, pcparms_t *pcparmp)
 {
-	rtdpent_t	*rtdp;
-	pri_t		gpri;
-
-	if (rt_class.pcc_state == 0)
-		(void) get_info_by_policy(SCHED_FIFO);
-
-	/* First case is the default case, other two are seldomly taken */
-	if (pri <= rt_dptbl[rt_class.pcc_primin].rt_globpri) {
-		gpri = pri + rt_dptbl[rt_class.pcc_primin].rt_globpri -
-		    rt_class.pcc_primin;
-	} else if (pri >= rt_dptbl[rt_class.pcc_primax].rt_globpri) {
-		gpri = pri + rt_dptbl[rt_class.pcc_primax].rt_globpri -
-		    rt_class.pcc_primax;
-	} else {
-		gpri =  rt_dptbl[rt_class.pcc_primin].rt_globpri + 1;
-		for (rtdp = rt_dptbl+1; rtdp->rt_globpri < pri; ++rtdp, ++gpri)
-			;
-		if (rtdp->rt_globpri > pri)
-			--gpri;
-	}
-	return (gpri);
+	pcparmp->pc_cid = PC_CLNULL;
+	if (_private_priocntl(idtype, id, PC_GETPARMS, pcparmp) == -1)
+		return (NULL);
+	return (get_info_by_class(pcparmp->pc_cid));
 }
 
+/*
+ * Helper function for setprio() and setparam(), below.
+ */
 static int
-get_info_by_class(id_t classid)
+set_priority(idtype_t idtype, id_t id, int policy, int prio,
+    pcparms_t *pcparmp, int settq)
 {
-	pcinfo_t	pcinfo;
+	int rv;
 
-	/* determine if we already know this classid */
-	if (rt_class.pcc_state > 0 && rt_class.pcc_info.pc_cid == classid)
-		return (1);
-	if (ts_class.pcc_state > 0 && ts_class.pcc_info.pc_cid == classid)
-		return (1);
-	if (sys_class.pcc_state > 0 && sys_class.pcc_info.pc_cid == classid)
-		return (1);
-	if (ia_class.pcc_state > 0 && ia_class.pcc_info.pc_cid == classid)
-		return (1);
+	switch (policy) {
+	case SCHED_OTHER:
+	{
+		tsparms_t *tsp = (tsparms_t *)pcparmp->pc_clparms;
+		tsp->ts_uprilim = prio;
+		tsp->ts_upri = prio;
+		break;
+	}
+	case SCHED_FIFO:
+	case SCHED_RR:
+	{
+		rtparms_t *rtp = (rtparms_t *)pcparmp->pc_clparms;
+		rtp->rt_tqnsecs = settq?
+		    (policy == SCHED_FIFO? RT_TQINF : RT_TQDEF) :
+		    RT_NOCHANGE;
+		rtp->rt_pri = prio;
+		break;
+	}
+	default:
+	{
+		/*
+		 * Class-independent method for setting the priority.
+		 */
+		pcprio_t pcprio;
+
+		pcprio.pc_op = PC_SETPRIO;
+		pcprio.pc_cid = pcparmp->pc_cid;
+		pcprio.pc_val = prio;
+		do {
+			rv = _private_priocntl(idtype, id, PC_DOPRIO, &pcprio);
+		} while (rv == -1 && errno == ENOMEM);
+		return (rv);
+	}
+	}
 
-	pcinfo.pc_cid = classid;
-	if (priocntl(0, 0, PC_GETCLINFO, (caddr_t)&pcinfo) < 0) {
-		if (classid == 0)	/* no kernel info for sys class */
-			return (get_info_by_policy(SCHED_SYS));
+	do {
+		rv = _private_priocntl(idtype, id, PC_SETPARMS, pcparmp);
+	} while (rv == -1 && errno == ENOMEM);
+	return (rv);
+}
+
+/*
+ * Utility function, private to libc, used by sched_setparam()
+ * and posix_spawn().  Because it is called by the vfork() child of
+ * posix_spawn(), we must not call any functions exported from libc.
+ */
+id_t
+setprio(idtype_t idtype, id_t id, int prio, int *policyp)
+{
+	pcparms_t	pcparm;
+	int		policy;
+	const pcclass_t	*pccp;
+
+	if ((pccp = get_parms(idtype, id, &pcparm)) == NULL)
+		return (-1);
+	if (prio < pccp->pcc_primin || prio > pccp->pcc_primax) {
+		errno = EINVAL;
 		return (-1);
 	}
 
-	if (rt_class.pcc_state == 0 && strcmp(pcinfo.pc_clname, "RT") == 0)
-		return (get_info_by_policy(SCHED_FIFO));
-	if (ts_class.pcc_state == 0 && strcmp(pcinfo.pc_clname, "TS") == 0)
-		return (get_info_by_policy(SCHED_OTHER));
-	if (ia_class.pcc_state == 0 && strcmp(pcinfo.pc_clname, "IA") == 0)
-		return (get_info_by_policy(SCHED_IA));
+	policy = pccp->pcc_policy;
+	if (policyp != NULL &&
+	    (policy == SCHED_FIFO || policy == SCHED_RR)) {
+		rtparms_t *rtp = (rtparms_t *)pcparm.pc_clparms;
+		policy = (rtp->rt_tqnsecs == RT_TQINF? SCHED_FIFO : SCHED_RR);
+	}
 
-	return (1);
+	if (set_priority(idtype, id, policy, prio, &pcparm, 0) == -1)
+		return (-1);
+	if (policyp != NULL)
+		*policyp = policy;
+	return (pccp->pcc_info.pc_cid);
 }
 
 int
 sched_setparam(pid_t pid, const struct sched_param *param)
 {
-	pri_t		prio = param->sched_priority;
-	pcparms_t	pcparm;
-	tsparms_t	*tsp;
-	tsinfo_t	*tsi;
-	int		scale;
-
 	if (pid < 0) {
 		errno = ESRCH;
 		return (-1);
@@ -266,48 +335,66 @@ sched_setparam(pid_t pid, const struct sched_param *param)
 	if (pid == 0)
 		pid = P_MYID;
 
-	/* get process's current scheduling policy */
-	pcparm.pc_cid = PC_CLNULL;
-	if (priocntl(P_PID, pid, PC_GETPARMS, (caddr_t)&pcparm) == -1)
+	if (setprio(P_PID, pid, param->sched_priority, NULL) == -1)
 		return (-1);
-	if (get_info_by_class(pcparm.pc_cid) < 0)
+	return (0);
+}
+
+id_t
+getparam(idtype_t idtype, id_t id, int *policyp, struct sched_param *param)
+{
+	pcparms_t pcparm;
+	const pcclass_t *pccp;
+	int policy;
+	int priority;
+
+	if ((pccp = get_parms(idtype, id, &pcparm)) == NULL)
 		return (-1);
 
-	if (pcparm.pc_cid == rt_class.pcc_info.pc_cid) {
-		/* SCHED_FIFO or SCHED_RR policy */
-		if (prio < rt_class.pcc_primin || prio > rt_class.pcc_primax) {
-			errno = EINVAL;
-			return (-1);
-		}
-		((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = RT_NOCHANGE;
-		((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
-	} else if (pcparm.pc_cid == ts_class.pcc_info.pc_cid) {
-		/* SCHED_OTHER policy */
-		tsi = (tsinfo_t *)ts_class.pcc_info.pc_clinfo;
-		scale = tsi->ts_maxupri;
-		tsp = (tsparms_t *)pcparm.pc_clparms;
-		tsp->ts_uprilim = tsp->ts_upri = -(scale * prio) / 20;
-	} else {
+	switch (policy = pccp->pcc_policy) {
+	case SCHED_OTHER:
+	{
+		tsparms_t *tsp = (tsparms_t *)pcparm.pc_clparms;
+		priority = tsp->ts_upri;
+		break;
+	}
+	case SCHED_FIFO:
+	case SCHED_RR:
+	{
+		rtparms_t *rtp = (rtparms_t *)pcparm.pc_clparms;
+		priority = rtp->rt_pri;
+		policy = (rtp->rt_tqnsecs == RT_TQINF? SCHED_FIFO : SCHED_RR);
+		break;
+	}
+	default:
+	{
 		/*
-		 * policy is not defined by POSIX.4.
-		 * just pass parameter data through to priocntl.
-		 * param should contain an image of class-specific parameters
-		 * (after the sched_priority member).
+		 * Class-independent method for getting the priority.
 		 */
-		*((pc_clparms_t *)pcparm.pc_clparms) =
-		    *((pc_clparms_t *)(&(param->sched_priority)+1));
+		pcprio_t pcprio;
+
+		pcprio.pc_op = PC_GETPRIO;
+		pcprio.pc_cid = 0;
+		pcprio.pc_val = 0;
+		if (_private_priocntl(idtype, id, PC_DOPRIO, &pcprio) == 0)
+			priority = pcprio.pc_val;
+		else
+			priority = 0;
+		break;
+	}
 	}
 
-	return ((int)priocntl(P_PID, pid, PC_SETPARMS, (caddr_t)&pcparm));
+	*policyp = policy;
+	(void) memset(param, 0, sizeof (*param));
+	param->sched_priority = priority;
+
+	return (pcparm.pc_cid);
 }
 
 int
 sched_getparam(pid_t pid, struct sched_param *param)
 {
-	pcparms_t	pcparm;
-	pri_t		prio;
-	int		scale;
-	tsinfo_t	*tsi;
+	int policy;
 
 	if (pid < 0) {
 		errno = ESRCH;
@@ -316,49 +403,40 @@ sched_getparam(pid_t pid, struct sched_param *param)
 	if (pid == 0)
 		pid = P_MYID;
 
-	pcparm.pc_cid = PC_CLNULL;
-	if (priocntl(P_PID, pid, PC_GETPARMS, (caddr_t)&pcparm) == -1)
-		return (-1);
-	if (get_info_by_class(pcparm.pc_cid) < 0)
+	if (getparam(P_PID, pid, &policy, param) == -1)
 		return (-1);
+	return (0);
+}
 
-	if (pcparm.pc_cid == rt_class.pcc_info.pc_cid) {
-		param->sched_priority =
-			((rtparms_t *)pcparm.pc_clparms)->rt_pri;
-	} else if (pcparm.pc_cid == ts_class.pcc_info.pc_cid) {
-		param->sched_nicelim =
-			((tsparms_t *)pcparm.pc_clparms)->ts_uprilim;
-		prio = param->sched_nice =
-			((tsparms_t *)pcparm.pc_clparms)->ts_upri;
-		tsi = (tsinfo_t *)ts_class.pcc_info.pc_clinfo;
-		scale = tsi->ts_maxupri;
-		if (scale == 0)
-			param->sched_priority = 0;
-		else
-			param->sched_priority = -(prio * 20) / scale;
-	} else {
-		/*
-		 * policy is not defined by POSIX.4
-		 * just return a copy of pcparams_t image in param.
-		 */
-		*((pc_clparms_t *)(&(param->sched_priority)+1)) =
-		    *((pc_clparms_t *)pcparm.pc_clparms);
-		param->sched_priority =
-		    sched_get_priority_min((int)(pcparm.pc_cid + _SCHED_NEXT));
+/*
+ * Utility function, private to libc, used by sched_setscheduler()
+ * and posix_spawn().  Because it is called by the vfork() child of
+ * posix_spawn(), we must not call any functions exported from libc.
+ */
+id_t
+setparam(idtype_t idtype, id_t id, int policy, int prio)
+{
+	pcparms_t	pcparm;
+	const pcclass_t	*pccp;
+
+	if (policy == SCHED_SYS ||
+	    (pccp = get_info_by_policy(policy)) == NULL ||
+	    prio < pccp->pcc_primin || prio > pccp->pcc_primax) {
+		errno = EINVAL;
+		return (-1);
 	}
 
-	return (0);
+	pcparm.pc_cid = pccp->pcc_info.pc_cid;
+	if (set_priority(idtype, id, policy, prio, &pcparm, 1) == -1)
+		return (-1);
+	return (pccp->pcc_info.pc_cid);
 }
 
 int
 sched_setscheduler(pid_t pid, int policy, const struct sched_param *param)
 {
 	pri_t		prio = param->sched_priority;
-	pcparms_t	pcparm;
 	int		oldpolicy;
-	tsinfo_t	*tsi;
-	tsparms_t	*tsp;
-	int		scale;
 
 	if ((oldpolicy = sched_getscheduler(pid)) < 0)
 		return (-1);
@@ -366,56 +444,7 @@ sched_setscheduler(pid_t pid, int policy, const struct sched_param *param)
 	if (pid == 0)
 		pid = P_MYID;
 
-	if (get_info_by_policy(policy) < 0) {
-		errno = EINVAL;
-		return (-1);
-	}
-
-	switch (policy) {
-	case SCHED_FIFO:
-	case SCHED_RR:
-		if (prio < rt_class.pcc_primin || prio > rt_class.pcc_primax) {
-			errno = EINVAL;
-			return (-1);
-		}
-		pcparm.pc_cid = rt_class.pcc_info.pc_cid;
-		((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
-		((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs =
-		    (policy == SCHED_RR ? RT_TQDEF : RT_TQINF);
-		break;
-
-	case SCHED_OTHER:
-		pcparm.pc_cid = ts_class.pcc_info.pc_cid;
-		tsi = (tsinfo_t *)ts_class.pcc_info.pc_clinfo;
-		scale = tsi->ts_maxupri;
-		tsp = (tsparms_t *)pcparm.pc_clparms;
-		tsp->ts_uprilim = tsp->ts_upri = -(scale * prio) / 20;
-		break;
-
-	default:
-		switch (policy) {
-		case SCHED_SYS:
-			pcparm.pc_cid = sys_class.pcc_info.pc_cid;
-			break;
-		case SCHED_IA:
-			pcparm.pc_cid = ia_class.pcc_info.pc_cid;
-			break;
-		default:
-			pcparm.pc_cid = policy - _SCHED_NEXT;
-			break;
-		}
-		/*
-		 * policy is not defined by POSIX.4.
-		 * just pass parameter data through to priocntl.
-		 * param should contain an image of class-specific parameters
-		 * (after the sched_priority member).
-		 */
-		*((pc_clparms_t *)pcparm.pc_clparms) =
-		    *((pc_clparms_t *)&(param->sched_priority)+1);
-	}
-
-	/* setting scheduling policy & parameters for the process */
-	if (priocntl(P_PID, pid, PC_SETPARMS, (caddr_t)&pcparm) == -1)
+	if (setparam(P_PID, pid, policy, prio) == -1)
 		return (-1);
 
 	return (oldpolicy);
@@ -425,6 +454,7 @@ int
 sched_getscheduler(pid_t pid)
 {
 	pcparms_t	pcparm;
+	const pcclass_t	*pccp;
 	int		policy;
 
 	if (pid < 0) {
@@ -434,28 +464,13 @@ sched_getscheduler(pid_t pid)
 	if (pid == 0)
 		pid = P_MYID;
 
-	/* get scheduling policy & parameters for the process */
-	pcparm.pc_cid = PC_CLNULL;
-	if (priocntl(P_PID, pid, PC_GETPARMS, (caddr_t)&pcparm) == -1)
-		return (-1);
-	if (get_info_by_class(pcparm.pc_cid) < 0)
+	if ((pccp = get_parms(P_PID, pid, &pcparm)) == NULL)
 		return (-1);
 
-	if (pcparm.pc_cid == rt_class.pcc_info.pc_cid)
-		policy = ((((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
-		    RT_TQINF ? SCHED_FIFO : SCHED_RR));
-	else if (pcparm.pc_cid == ts_class.pcc_info.pc_cid)
-		policy = SCHED_OTHER;
-	else if (pcparm.pc_cid == sys_class.pcc_info.pc_cid)
-		policy = SCHED_SYS;
-	else if (pcparm.pc_cid == ia_class.pcc_info.pc_cid)
-		policy = SCHED_IA;
-	else {
-		/*
-		 * policy is not defined by POSIX.4
-		 * return a unique dot4 policy id.
-		 */
-		policy = (int)(_SCHED_NEXT + pcparm.pc_cid);
+	if ((policy = pccp->pcc_policy) == SCHED_FIFO || policy == SCHED_RR) {
+		policy =
+		    (((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs == RT_TQINF?
+		    SCHED_FIFO : SCHED_RR);
 	}
 
 	return (policy);
@@ -471,25 +486,10 @@ sched_yield(void)
 int
 sched_get_priority_max(int policy)
 {
-	pcpri_t	pcpri;
-
-	if (get_info_by_policy(policy) < 0)
-		return (-1);
-
-	if (policy == SCHED_FIFO || policy == SCHED_RR)
-		return (rt_class.pcc_primax);
-	else if (policy == SCHED_OTHER)
-		return (ts_class.pcc_primax);
-	else if (policy == SCHED_SYS)
-		return (sys_class.pcc_primax);
-	else if (policy == SCHED_IA)
-		return (ia_class.pcc_primax);
-	else { /* policy not in POSIX.4 */
-		pcpri.pc_cid = policy - _SCHED_NEXT;
-		if (priocntl(0, 0, PC_GETPRIRANGE, (caddr_t)&pcpri) == 0)
-			return (map_gp_to_rtpri(pcpri.pc_clpmax));
-	}
+	const pcclass_t *pccp;
 
+	if ((pccp = get_info_by_policy(policy)) != NULL)
+		return (pccp->pcc_primax);
 	errno = EINVAL;
 	return (-1);
 }
@@ -497,25 +497,10 @@ sched_get_priority_max(int policy)
 int
 sched_get_priority_min(int policy)
 {
-	pcpri_t pcpri;
-
-	if (get_info_by_policy(policy) < 0)
-		return (-1);
-
-	if (policy == SCHED_FIFO || policy == SCHED_RR)
-		return (rt_class.pcc_primin);
-	else if (policy == SCHED_OTHER)
-		return (ts_class.pcc_primin);
-	else if (policy == SCHED_SYS)
-		return (sys_class.pcc_primin);
-	else if (policy == SCHED_IA)
-		return (ia_class.pcc_primin);
-	else { /* policy not in POSIX.4 */
-		pcpri.pc_cid = policy - _SCHED_NEXT;
-		if (priocntl(0, 0, PC_GETPRIRANGE, (caddr_t)&pcpri) == 0)
-			return (map_gp_to_rtpri(pcpri.pc_clpmin));
-	}
+	const pcclass_t *pccp;
 
+	if ((pccp = get_info_by_policy(policy)) != NULL)
+		return (pccp->pcc_primin);
 	errno = EINVAL;
 	return (-1);
 }
@@ -524,6 +509,7 @@ int
 sched_rr_get_interval(pid_t pid, timespec_t *interval)
 {
 	pcparms_t pcparm;
+	const pcclass_t *pccp;
 
 	if (pid < 0) {
 		errno = ESRCH;
@@ -532,22 +518,119 @@ sched_rr_get_interval(pid_t pid, timespec_t *interval)
 	if (pid == 0)
 		pid = P_MYID;
 
-	if (get_info_by_policy(SCHED_RR) < 0)
+	if ((pccp = get_parms(P_PID, pid, &pcparm)) == NULL)
 		return (-1);
 
-	pcparm.pc_cid = PC_CLNULL;
-	if (priocntl(P_PID, pid, PC_GETPARMS, (caddr_t)&pcparm) == -1)
-		return (-1);
+	/*
+	 * At the moment, we have no class-independent method to fetch
+	 * the process/lwp time quantum.  Since SUSv3 does not restrict
+	 * this operation to the real-time class, we return an indefinite
+	 * quantum (tv_sec == 0 and tv_nsec == 0) for scheduling policies
+	 * for which this information isn't available.
+	 */
+	interval->tv_sec = 0;
+	interval->tv_nsec = 0;
+
+	switch (pccp->pcc_policy) {
+	case SCHED_FIFO:
+	case SCHED_RR:
+		{
+			rtparms_t *rtp = (rtparms_t *)pcparm.pc_clparms;
+			if (rtp->rt_tqnsecs != RT_TQINF) {
+				interval->tv_sec = rtp->rt_tqsecs;
+				interval->tv_nsec = rtp->rt_tqnsecs;
+			}
+		}
+		break;
+	case SCHED_FX:
+		{
+			fxparms_t *fxp = (fxparms_t *)pcparm.pc_clparms;
+			if (fxp->fx_tqnsecs != FX_TQINF) {
+				interval->tv_sec = fxp->fx_tqsecs;
+				interval->tv_nsec = fxp->fx_tqnsecs;
+			}
+		}
+		break;
+	}
+
+	return (0);
+}
 
-	if (pcparm.pc_cid == rt_class.pcc_info.pc_cid &&
-	    (((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs != RT_TQINF)) {
-		/* SCHED_RR */
-		interval->tv_sec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqsecs;
-		interval->tv_nsec =
-		    ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs;
-		return (0);
+/*
+ * Initialize or update ul_policy, ul_cid, and ul_pri.
+ */
+void
+update_sched(ulwp_t *self)
+{
+	volatile sc_shared_t *scp;
+	pcparms_t pcparm;
+	pcprio_t pcprio;
+	const pcclass_t *pccp;
+	int priority;
+	int policy;
+
+	ASSERT(self == curthread);
+
+	enter_critical(self);
+
+	if ((scp = self->ul_schedctl) == NULL &&
+	    (scp = setup_schedctl()) == NULL) {		/* can't happen? */
+		if (self->ul_policy < 0) {
+			self->ul_cid = 0;
+			self->ul_pri = 0;
+			_membar_producer();
+			self->ul_policy = SCHED_OTHER;
+		}
+		exit_critical(self);
+		return;
 	}
 
-	errno = EINVAL;
-	return (-1);
+	if (self->ul_policy >= 0 &&
+	    self->ul_cid == scp->sc_cid &&
+	    (self->ul_pri == scp->sc_cpri ||
+	    (self->ul_epri > 0 && self->ul_epri == scp->sc_cpri))) {
+		exit_critical(self);
+		return;
+	}
+
+	pccp = get_parms(P_LWPID, P_MYID, &pcparm);
+	if (pccp == NULL) {		/* can't happen? */
+		self->ul_cid = scp->sc_cid;
+		self->ul_pri = scp->sc_cpri;
+		_membar_producer();
+		self->ul_policy = SCHED_OTHER;
+		exit_critical(self);
+		return;
+	}
+
+	switch (policy = pccp->pcc_policy) {
+	case SCHED_OTHER:
+		priority = ((tsparms_t *)pcparm.pc_clparms)->ts_upri;
+		break;
+	case SCHED_FIFO:
+	case SCHED_RR:
+		priority = ((rtparms_t *)pcparm.pc_clparms)->rt_pri;
+		policy =
+		    ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs == RT_TQINF?
+		    SCHED_FIFO : SCHED_RR;
+		break;
+	default:
+		/*
+		 * Class-independent method for getting the priority.
+		 */
+		pcprio.pc_op = PC_GETPRIO;
+		pcprio.pc_cid = 0;
+		pcprio.pc_val = 0;
+		if (_private_priocntl(P_LWPID, P_MYID, PC_DOPRIO, &pcprio) == 0)
+			priority = pcprio.pc_val;
+		else
+			priority = 0;
+	}
+
+	self->ul_cid = pcparm.pc_cid;
+	self->ul_pri = priority;
+	_membar_producer();
+	self->ul_policy = policy;
+
+	exit_critical(self);
 }
diff --git a/usr/src/lib/libc/port/threads/pthr_attr.c b/usr/src/lib/libc/port/threads/pthr_attr.c
index bcae664e13..dc7056c067 100644
--- a/usr/src/lib/libc/port/threads/pthr_attr.c
+++ b/usr/src/lib/libc/port/threads/pthr_attr.c
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -45,7 +45,7 @@ def_thrattr(void)
 		PTHREAD_SCOPE_PROCESS,		/* scope */
 		0,				/* prio */
 		SCHED_OTHER,			/* policy */
-		PTHREAD_EXPLICIT_SCHED,		/* inherit */
+		PTHREAD_INHERIT_SCHED,		/* inherit */
 		0				/* guardsize */
 	};
 	if (thrattr.guardsize == 0)
@@ -94,7 +94,7 @@ _pthread_attr_clone(pthread_attr_t *attr, const pthread_attr_t *old_attr)
 {
 	thrattr_t *ap;
 	const thrattr_t *old_ap =
-		old_attr? old_attr->__pthread_attrp : def_thrattr();
+	    old_attr? old_attr->__pthread_attrp : def_thrattr();
 
 	if (old_ap == NULL)
 		return (EINVAL);
@@ -336,8 +336,7 @@ _pthread_attr_getinheritsched(const pthread_attr_t *attr, int *inherit)
 }
 
 /*
- * pthread_attr_setschedpolicy: sets the scheduling policy to SCHED_RR,
- * SCHED_FIFO or SCHED_OTHER.
+ * pthread_attr_setschedpolicy: sets the scheduling policy.
  */
 #pragma weak pthread_attr_setschedpolicy = _pthread_attr_setschedpolicy
 int
@@ -346,9 +345,7 @@ _pthread_attr_setschedpolicy(pthread_attr_t *attr, int policy)
 	thrattr_t *ap;
 
 	if (attr != NULL && (ap = attr->__pthread_attrp) != NULL &&
-	    (policy == SCHED_OTHER ||
-	    policy == SCHED_FIFO ||
-	    policy == SCHED_RR)) {
+	    policy != SCHED_SYS && get_info_by_policy(policy) != NULL) {
 		ap->policy = policy;
 		return (0);
 	}
@@ -382,23 +379,13 @@ _pthread_attr_setschedparam(pthread_attr_t *attr,
 	const struct sched_param *param)
 {
 	thrattr_t *ap;
-	int	policy;
-	int	pri;
 
-	if (attr == NULL || (ap = attr->__pthread_attrp) == NULL)
-		return (EINVAL);
-
-	policy = ap->policy;
-	pri = param->sched_priority;
-	if (policy == SCHED_OTHER) {
-		if ((pri < THREAD_MIN_PRIORITY || pri > THREAD_MAX_PRIORITY) &&
-		    _validate_rt_prio(policy, pri))
-			return (EINVAL);
-	} else if (_validate_rt_prio(policy, pri)) {
-		return (EINVAL);
+	if (attr != NULL && (ap = attr->__pthread_attrp) != NULL &&
+	    param != NULL) {
+		ap->prio = param->sched_priority;
+		return (0);
 	}
-	ap->prio = pri;
-	return (0);
+	return (EINVAL);
 }
 
 /*
diff --git a/usr/src/lib/libc/port/threads/pthr_mutex.c b/usr/src/lib/libc/port/threads/pthr_mutex.c
index 3eabd3de61..b49d5fb3d0 100644
--- a/usr/src/lib/libc/port/threads/pthr_mutex.c
+++ b/usr/src/lib/libc/port/threads/pthr_mutex.c
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -109,10 +109,11 @@ _pthread_mutexattr_getpshared(const pthread_mutexattr_t *attr, int *pshared)
 int
 _pthread_mutexattr_setprioceiling(pthread_mutexattr_t *attr, int prioceiling)
 {
+	const pcclass_t *pccp = get_info_by_policy(SCHED_FIFO);
 	mattr_t	*ap;
 
 	if (attr == NULL || (ap = attr->__pthread_mutexattrp) == NULL ||
-	    _validate_rt_prio(SCHED_FIFO, prioceiling))
+	    prioceiling < pccp->pcc_primin || prioceiling > pccp->pcc_primax)
 		return (EINVAL);
 	ap->prioceiling = prioceiling;
 	return (0);
@@ -238,23 +239,29 @@ _pthread_mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *attr)
 
 /*
  * pthread_mutex_setprioceiling: sets the prioceiling.
+ * From the SUSv3 (POSIX) specification for pthread_mutex_setprioceiling():
+ *	The process of locking the mutex need not
+ *	adhere to the priority protect protocol.
+ * We pass the MUTEX_NOCEIL flag to mutex_lock_internal() so that
+ * a non-realtime thread can successfully execute this operation.
  */
 #pragma weak pthread_mutex_setprioceiling =  _pthread_mutex_setprioceiling
 int
 _pthread_mutex_setprioceiling(pthread_mutex_t *mutex, int ceil, int *oceil)
 {
 	mutex_t *mp = (mutex_t *)mutex;
+	const pcclass_t *pccp = get_info_by_policy(SCHED_FIFO);
 	int error;
 
 	if (!(mp->mutex_type & PTHREAD_PRIO_PROTECT) ||
-	    _validate_rt_prio(SCHED_FIFO, ceil) != 0)
+	    ceil < pccp->pcc_primin || ceil > pccp->pcc_primax)
 		return (EINVAL);
-	error = _private_mutex_lock(mp);
+	error = mutex_lock_internal(mp, NULL, MUTEX_LOCK | MUTEX_NOCEIL);
 	if (error == 0) {
 		if (oceil)
 			*oceil = mp->mutex_ceiling;
-		mp->mutex_ceiling = (uint8_t)ceil;
-		error = _private_mutex_unlock(mp);
+		mp->mutex_ceiling = ceil;
+		error = mutex_unlock_internal(mp, 0);
 	}
 	return (error);
 }
diff --git a/usr/src/lib/libc/port/threads/pthread.c b/usr/src/lib/libc/port/threads/pthread.c
index baaadae7bd..10a4330faa 100644
--- a/usr/src/lib/libc/port/threads/pthread.c
+++ b/usr/src/lib/libc/port/threads/pthread.c
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -45,6 +45,41 @@ typedef struct  __once {
 
 #define	once_flag	oflag.pad32_flag[1]
 
+static int
+_thr_setparam(pthread_t tid, int policy, int prio)
+{
+	ulwp_t *ulwp;
+	id_t cid;
+	int error = 0;
+
+	if ((ulwp = find_lwp(tid)) == NULL) {
+		error = ESRCH;
+	} else {
+		if (policy == ulwp->ul_policy &&
+		    (policy == SCHED_FIFO || policy == SCHED_RR) &&
+		    ulwp->ul_cid == ulwp->ul_rtclassid &&
+		    ulwp->ul_epri != 0) {
+			/*
+			 * Don't change the ceiling priority,
+			 * just the base priority.
+			 */
+			if (prio > ulwp->ul_epri)
+				error = EPERM;
+			else
+				ulwp->ul_pri = prio;
+		} else if ((cid = setparam(P_LWPID, tid, policy, prio)) == -1) {
+			error = errno;
+		} else {
+			ulwp->ul_cid = cid;
+			ulwp->ul_pri = prio;
+			_membar_producer();
+			ulwp->ul_policy = policy;
+		}
+		ulwp_unlock(ulwp, curthread->ul_uberdata);
+	}
+	return (error);
+}
+
 /*
  * pthread_create: creates a thread in the current process.
  * calls common _thrp_create() after copying the attributes.
@@ -55,67 +90,56 @@ _pthread_create(pthread_t *thread, const pthread_attr_t *attr,
 	void * (*start_routine)(void *), void *arg)
 {
 	ulwp_t		*self = curthread;
-	uberdata_t	*udp = self->ul_uberdata;
 	const thrattr_t	*ap = attr? attr->__pthread_attrp : def_thrattr();
+	const pcclass_t	*pccp;
 	long		flag;
 	pthread_t	tid;
-	int		policy;
-	pri_t		priority;
 	int		error;
-	int		mapped = 0;
-	int		mappedpri;
-	int		rt = 0;
+
+	update_sched(self);
 
 	if (ap == NULL)
 		return (EINVAL);
 
-	if (ap->inherit == PTHREAD_INHERIT_SCHED) {
-		policy = self->ul_policy;
-		priority = self->ul_pri;
-		mapped = self->ul_pri_mapped;
-		mappedpri = self->ul_mappedpri;
-	} else {
-		policy = ap->policy;
-		priority = ap->prio;
-		if (policy == SCHED_OTHER) {
-			if (priority < THREAD_MIN_PRIORITY ||
-			    priority > THREAD_MAX_PRIORITY) {
-				if (_validate_rt_prio(policy, priority))
-					return (EINVAL);
-				mapped = 1;
-				mappedpri = priority;
-				priority = map_rtpri_to_gp(priority);
-				ASSERT(priority >= THREAD_MIN_PRIORITY &&
-				    priority <= THREAD_MAX_PRIORITY);
-			}
-		} else if (policy == SCHED_FIFO || policy == SCHED_RR) {
-			if (_validate_rt_prio(policy, priority))
-				return (EINVAL);
-			if (_private_geteuid() == 0)
-				rt = 1;
-		} else {
-			return (EINVAL);
-		}
-	}
+	/* validate explicit scheduling attributes */
+	if (ap->inherit == PTHREAD_EXPLICIT_SCHED &&
+	    (ap->policy == SCHED_SYS ||
+	    (pccp = get_info_by_policy(ap->policy)) == NULL ||
+	    ap->prio < pccp->pcc_primin || ap->prio > pccp->pcc_primax))
+		return (EINVAL);
 
 	flag = ap->scope | ap->detachstate | ap->daemonstate | THR_SUSPENDED;
 	error = _thrp_create(ap->stkaddr, ap->stksize, start_routine, arg,
-		flag, &tid, priority, policy, ap->guardsize);
+	    flag, &tid, ap->guardsize);
 	if (error == 0) {
-		int prio_err;
-
-		if (mapped) {
+		if (ap->inherit == PTHREAD_EXPLICIT_SCHED &&
+		    (ap->policy != self->ul_policy ||
+		    ap->prio != (self->ul_epri? self->ul_epri : self->ul_pri)))
+			/*
+			 * The SUSv3 specification requires pthread_create()
+			 * to fail with EPERM if it cannot set the scheduling
+			 * policy and parameters on the new thread.
+			 */
+			error = _thr_setparam(tid, ap->policy, ap->prio);
+		if (error) {
+			/*
+			 * We couldn't determine this error before
+			 * actually creating the thread.  To recover,
+			 * mark the thread detached and cancel it.
+			 * It is as though it was never created.
+			 */
 			ulwp_t *ulwp = find_lwp(tid);
-			ulwp->ul_pri_mapped = 1;
-			ulwp->ul_mappedpri = mappedpri;
-			ulwp_unlock(ulwp, udp);
-		}
-
-		if (rt && (prio_err = _thrp_setlwpprio(tid, policy, priority)))
-			return (prio_err);
-
-		if (thread)
+			if (ulwp->ul_detached == 0) {
+				ulwp->ul_detached = 1;
+				ulwp->ul_usropts |= THR_DETACHED;
+				(void) __lwp_detach(tid);
+			}
+			ulwp->ul_cancel_pending = 2; /* cancelled on creation */
+			ulwp->ul_cancel_disabled = 0;
+			ulwp_unlock(ulwp, self->ul_uberdata);
+		} else if (thread) {
 			*thread = tid;
+		}
 		(void) _thr_continue(tid);
 	}
 
@@ -166,135 +190,51 @@ _pthread_equal(pthread_t t1, pthread_t t2)
 }
 
 /*
- * pthread_getschedparam: gets the sched parameters in a struct.
+ * pthread_getschedparam: get the thread's sched parameters.
  */
 #pragma weak	pthread_getschedparam		= _pthread_getschedparam
 int
 _pthread_getschedparam(pthread_t tid, int *policy, struct sched_param *param)
 {
-	uberdata_t *udp = curthread->ul_uberdata;
 	ulwp_t *ulwp;
+	id_t cid;
 	int error = 0;
 
-	if (param == NULL || policy == NULL)
-		error = EINVAL;
-	else if ((ulwp = find_lwp(tid)) == NULL)
+	if ((ulwp = find_lwp(tid)) == NULL) {
 		error = ESRCH;
-	else {
-		if (ulwp->ul_pri_mapped)
-			param->sched_priority = ulwp->ul_mappedpri;
-		else
+	} else {
+		cid = getparam(P_LWPID, ulwp->ul_lwpid, policy, param);
+		if (cid == -1) {
+			error = errno;
+		} else if (*policy == ulwp->ul_policy && cid == ulwp->ul_cid &&
+		    (*policy == SCHED_FIFO || *policy == SCHED_RR)) {
+			/*
+			 * Return the defined priority, not the effective
+			 * priority from priority ceiling mutexes.
+			 */
 			param->sched_priority = ulwp->ul_pri;
-		*policy = ulwp->ul_policy;
-		ulwp_unlock(ulwp, udp);
+		} else {
+			ulwp->ul_cid = cid;
+			ulwp->ul_pri = param->sched_priority;
+			_membar_producer();
+			ulwp->ul_policy = *policy;
+		}
+		ulwp_unlock(ulwp, curthread->ul_uberdata);
 	}
 
 	return (error);
 }
 
-/*
- * Besides the obvious arguments, the inheritflag needs to be explained:
- * If set to PRIO_SET or PRIO_SET_PRIO, it does the normal, expected work
- * of setting thread's assigned scheduling parameters and policy.
- * If set to PRIO_INHERIT, it sets the thread's effective priority values
- * (t_epri, t_empappedpri), and does not update the assigned priority values
- * (t_pri, t_mappedpri).  If set to PRIO_DISINHERIT, it clears the thread's
- * effective priority values, and reverts the thread, if necessary, back
- * to the assigned priority values.
- */
+#pragma weak thr_getprio = _thr_getprio
 int
-_thread_setschedparam_main(pthread_t tid, int policy,
-    const struct sched_param *param, int inheritflag)
+_thr_getprio(thread_t tid, int *priority)
 {
-	uberdata_t *udp = curthread->ul_uberdata;
-	ulwp_t	*ulwp;
-	int	error = 0;
-	int	prio;
-	int	opolicy;
-	int	mappedprio;
-	int	mapped = 0;
-	pri_t	*mappedprip;
-
-	if (param == NULL)
-		return (EINVAL);
-	if ((ulwp = find_lwp(tid)) == NULL)
-		return (ESRCH);
-	prio = param->sched_priority;
-	opolicy = ulwp->ul_policy;
-	if (inheritflag == PRIO_SET_PRIO) {	/* don't change policy */
-		policy = opolicy;
-		inheritflag = PRIO_SET;
-	}
-	ASSERT(inheritflag == PRIO_SET || opolicy == policy);
-	if (inheritflag == PRIO_DISINHERIT) {
-		ulwp->ul_emappedpri = 0;
-		ulwp->ul_epri = 0;
-		prio = ulwp->ul_pri;	/* ignore prio in sched_param */
-	}
-	if (policy == SCHED_OTHER) {
-		/*
-		 * Set thread's policy to OTHER
-		 */
-		if (prio < THREAD_MIN_PRIORITY || prio > THREAD_MAX_PRIORITY) {
-			if (_validate_rt_prio(policy, prio)) {
-				error = EINVAL;
-				goto out;
-			}
-			mapped = 1;
-			mappedprio = prio;
-			prio = map_rtpri_to_gp(prio);
-			ASSERT(prio >= THREAD_MIN_PRIORITY &&
-			    prio <= THREAD_MAX_PRIORITY);
-		}
-		/*
-		 * Thread changing from FIFO/RR to OTHER
-		 */
-		if (opolicy == SCHED_FIFO || opolicy == SCHED_RR) {
-			if ((error = _thrp_setlwpprio(tid, policy, prio)) != 0)
-				goto out;
-		}
-		if (inheritflag != PRIO_DISINHERIT) {
-			if (inheritflag == PRIO_INHERIT)
-				mappedprip = &ulwp->ul_emappedpri;
-			else
-				mappedprip = &ulwp->ul_mappedpri;
-			if (mapped) {
-				ulwp->ul_pri_mapped = 1;
-				*mappedprip = mappedprio;
-			} else {
-				ulwp->ul_pri_mapped = 0;
-				*mappedprip = 0;
-			}
-		}
-		ulwp->ul_policy = policy;
-		if (inheritflag == PRIO_INHERIT)
-			ulwp->ul_epri = prio;
-		else
-			ulwp->ul_pri = prio;
-	} else if (policy == SCHED_FIFO || policy == SCHED_RR) {
-		if (_validate_rt_prio(policy, prio))
-			error = EINVAL;
-		else {
-			int prio_err;
-
-			if (_private_geteuid() == 0 &&
-			    (prio_err = _thrp_setlwpprio(tid, policy, prio))) {
-				error = prio_err;
-				goto out;
-			}
+	struct sched_param param;
+	int policy;
+	int error;
 
-			ulwp->ul_policy = policy;
-			if (inheritflag == PRIO_INHERIT)
-				ulwp->ul_epri = prio;
-			else
-				ulwp->ul_pri = prio;
-		}
-	} else {
-		error = EINVAL;
-	}
-
-out:
-	ulwp_unlock(ulwp, udp);
+	if ((error = _pthread_getschedparam(tid, &policy, &param)) == 0)
+		*priority = param.sched_priority;
 	return (error);
 }
 
@@ -306,5 +246,26 @@ int
 _pthread_setschedparam(pthread_t tid,
 	int policy, const struct sched_param *param)
 {
-	return (_thread_setschedparam_main(tid, policy, param, PRIO_SET));
+	return (_thr_setparam(tid, policy, param->sched_priority));
+}
+
+#pragma weak thr_setprio = _thr_setprio
+#pragma weak pthread_setschedprio = _thr_setprio
+#pragma weak _pthread_setschedprio = _thr_setprio
+int
+_thr_setprio(thread_t tid, int prio)
+{
+	struct sched_param param;
+	int policy;
+	int error;
+
+	/*
+	 * _pthread_getschedparam() has the side-effect of setting
+	 * the target thread's ul_policy, ul_pri and ul_cid correctly.
+	 */
+	if ((error = _pthread_getschedparam(tid, &policy, &param)) != 0)
+		return (error);
+	if (param.sched_priority == prio)	/* no change */
+		return (0);
+	return (_thr_setparam(tid, policy, prio));
 }
diff --git a/usr/src/lib/libc/port/threads/rwlock.c b/usr/src/lib/libc/port/threads/rwlock.c
index 0f58b3a230..5770049bb1 100644
--- a/usr/src/lib/libc/port/threads/rwlock.c
+++ b/usr/src/lib/libc/port/threads/rwlock.c
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -105,7 +105,7 @@ rwl_entry(rwlock_t *rwlp)
 	 */
 	readlockp = lmalloc(nlocks * 2 * sizeof (readlock_t));
 	(void) _memcpy(readlockp, self->ul_readlock.array,
-		nlocks * sizeof (readlock_t));
+	    nlocks * sizeof (readlock_t));
 	lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t));
 	self->ul_readlock.array = readlockp;
 	self->ul_rdlockcnt *= 2;
@@ -254,7 +254,7 @@ read_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
 {
 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
 	uint32_t mask = ignore_waiters_flag?
-		URW_WRITE_LOCKED : (URW_HAS_WAITERS | URW_WRITE_LOCKED);
+	    URW_WRITE_LOCKED : (URW_HAS_WAITERS | URW_WRITE_LOCKED);
 	uint32_t readers;
 	ulwp_t *self = curthread;
 
@@ -298,8 +298,8 @@ write_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
 {
 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
 	uint32_t mask = ignore_waiters_flag?
-		(URW_WRITE_LOCKED | URW_READERS_MASK) :
-		(URW_HAS_WAITERS | URW_WRITE_LOCKED | URW_READERS_MASK);
+	    (URW_WRITE_LOCKED | URW_READERS_MASK) :
+	    (URW_HAS_WAITERS | URW_WRITE_LOCKED | URW_READERS_MASK);
 	ulwp_t *self = curthread;
 	uint32_t readers;
 
@@ -347,12 +347,12 @@ rw_queue_release(queue_head_t *qp, rwlock_t *rwlp)
 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
 	uint32_t readers;
 	uint32_t writers;
-	int nlwpid = 0;
-	int maxlwps = MAXLWPS;
-	ulwp_t *self;
 	ulwp_t **ulwpp;
 	ulwp_t *ulwp;
-	ulwp_t *prev = NULL;
+	ulwp_t *prev;
+	int nlwpid = 0;
+	int more;
+	int maxlwps = MAXLWPS;
 	lwpid_t buffer[MAXLWPS];
 	lwpid_t *lwpid = buffer;
 
@@ -366,9 +366,9 @@ rw_queue_release(queue_head_t *qp, rwlock_t *rwlp)
 	writers = 0;
 
 	/*
-	 * Walk the list of waiters and prepare to wake up as
-	 * many readers as we encounter before encountering
-	 * a writer.  If the first thread on the list is a
+	 * Examine the queue of waiters in priority order and prepare
+	 * to wake up as many readers as we encounter before encountering
+	 * a writer.  If the highest priority thread on the queue is a
 	 * writer, stop there and wake it up.
 	 *
 	 * We keep track of lwpids that are to be unparked in lwpid[].
@@ -383,13 +383,9 @@ rw_queue_release(queue_head_t *qp, rwlock_t *rwlp)
 	 * alloc_lwpids() to allocate a bigger buffer using the mmap()
 	 * system call directly since that path acquires no locks.
 	 */
-	ulwpp = &qp->qh_head;
-	while ((ulwp = *ulwpp) != NULL) {
-		if (ulwp->ul_wchan != rwlp) {
-			prev = ulwp;
-			ulwpp = &ulwp->ul_link;
-			continue;
-		}
+	while ((ulwpp = queue_slot(qp, &prev, &more)) != NULL) {
+		ulwp = *ulwpp;
+		ASSERT(ulwp->ul_wchan == rwlp);
 		if (ulwp->ul_writer) {
 			if (writers != 0 || readers != 0)
 				break;
@@ -403,15 +399,17 @@ rw_queue_release(queue_head_t *qp, rwlock_t *rwlp)
 			if (nlwpid == maxlwps)
 				lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
 		}
-		(void) queue_unlink(qp, ulwpp, prev);
+		queue_unlink(qp, ulwpp, prev);
+		ulwp->ul_sleepq = NULL;
+		ulwp->ul_wchan = NULL;
 		lwpid[nlwpid++] = ulwp->ul_lwpid;
 	}
-	if (ulwp == NULL)
+	if (ulwpp == NULL)
 		atomic_and_32(rwstate, ~URW_HAS_WAITERS);
 	if (nlwpid == 0) {
 		queue_unlock(qp);
 	} else {
-		self = curthread;
+		ulwp_t *self = curthread;
 		no_preempt(self);
 		queue_unlock(qp);
 		if (nlwpid == 1)
@@ -440,7 +438,6 @@ shared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
 {
 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
 	mutex_t *mp = &rwlp->mutex;
-	/* LINTED set but not used */
 	uint32_t readers;
 	int try_flag;
 	int error;
@@ -517,6 +514,7 @@ rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
 	queue_head_t *qp;
 	ulwp_t *ulwp;
 	int try_flag;
+	int ignore_waiters_flag;
 	int error = 0;
 
 	try_flag = (rd_wr & TRY_FLAG);
@@ -528,15 +526,18 @@ rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
 	}
 
 	qp = queue_lock(rwlp, MX);
-retry:
+	/* initial attempt to acquire the lock fails if there are waiters */
+	ignore_waiters_flag = 0;
 	while (error == 0) {
 		if (rd_wr == READ_LOCK) {
-			if (read_lock_try(rwlp, 0))
-				goto out;
+			if (read_lock_try(rwlp, ignore_waiters_flag))
+				break;
 		} else {
-			if (write_lock_try(rwlp, 0))
-				goto out;
+			if (write_lock_try(rwlp, ignore_waiters_flag))
+				break;
 		}
+		/* subsequent attempts do not fail due to waiters */
+		ignore_waiters_flag = 1;
 		atomic_or_32(rwstate, URW_HAS_WAITERS);
 		readers = *rwstate;
 		ASSERT_CONSISTENT_STATE(readers);
@@ -544,10 +545,15 @@ retry:
 		    (rd_wr == WRITE_LOCK &&
 		    (readers & URW_READERS_MASK) != 0))
 			/* EMPTY */;	/* somebody holds the lock */
-		else if ((ulwp = queue_waiter(qp, rwlp)) == NULL) {
+		else if ((ulwp = queue_waiter(qp)) == NULL) {
 			atomic_and_32(rwstate, ~URW_HAS_WAITERS);
-			break;		/* no queued waiters */
+			continue;	/* no queued waiters, try again */
 		} else {
+			/*
+			 * Do a priority check on the queued waiter (the
+			 * highest priority thread on the queue) to see
+			 * if we should defer to him or just grab the lock.
+			 */
 			int our_pri = real_priority(self);
 			int his_pri = real_priority(ulwp);
 
@@ -557,7 +563,7 @@ retry:
 				 * a higher priority than ours.
 				 */
 				if (his_pri <= our_pri)
-					break;
+					continue;	/* try again */
 			} else {
 				/*
 				 * We defer to a queued thread that has
@@ -566,7 +572,7 @@ retry:
 				 */
 				if (his_pri < our_pri ||
 				    (his_pri == our_pri && !ulwp->ul_writer))
-					break;
+					continue;	/* try again */
 			}
 		}
 		/*
@@ -578,33 +584,21 @@ retry:
 			break;
 		}
 		/*
-		 * Enqueue writers ahead of readers of the
-		 * same priority.
+		 * Enqueue writers ahead of readers.
 		 */
 		self->ul_writer = rd_wr;	/* *must* be 0 or 1 */
-		enqueue(qp, self, rwlp, MX);
+		enqueue(qp, self, 0);
 		set_parking_flag(self, 1);
 		queue_unlock(qp);
 		if ((error = __lwp_park(tsp, 0)) == EINTR)
-			error = 0;
-		self->ul_writer = 0;
+			error = ignore_waiters_flag = 0;
 		set_parking_flag(self, 0);
 		qp = queue_lock(rwlp, MX);
-		if (self->ul_sleepq && dequeue_self(qp, rwlp) == 0)
+		if (self->ul_sleepq && dequeue_self(qp) == 0)
 			atomic_and_32(rwstate, ~URW_HAS_WAITERS);
+		self->ul_writer = 0;
 	}
 
-	if (error == 0) {
-		if (rd_wr == READ_LOCK) {
-			if (!read_lock_try(rwlp, 1))
-				goto retry;
-		} else {
-			if (!write_lock_try(rwlp, 1))
-				goto retry;
-		}
-	}
-
-out:
 	queue_unlock(qp);
 
 	if (!try_flag) {
diff --git a/usr/src/lib/libc/port/threads/sema.c b/usr/src/lib/libc/port/threads/sema.c
index f2894a6df7..1378facf49 100644
--- a/usr/src/lib/libc/port/threads/sema.c
+++ b/usr/src/lib/libc/port/threads/sema.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -123,7 +123,6 @@ sema_wait_impl(sema_t *sp, timespec_t *tsp)
 	} else {				/* multithreaded or blocking */
 		queue_head_t *qp;
 		ulwp_t *ulwp;
-		int more;
 		lwpid_t lwpid = 0;
 
 		qp = queue_lock(lsp, CV);
@@ -132,7 +131,7 @@ sema_wait_impl(sema_t *sp, timespec_t *tsp)
 			 * SUSV3 requires FIFO queueing for semaphores,
 			 * at least for SCHED_FIFO and SCHED_RR scheduling.
 			 */
-			enqueue(qp, self, lsp, CV | FIFOQ);
+			enqueue(qp, self, 1);
 			lsp->sema_waiters = 1;
 			set_parking_flag(self, 1);
 			queue_unlock(qp);
@@ -148,18 +147,17 @@ sema_wait_impl(sema_t *sp, timespec_t *tsp)
 			set_parking_flag(self, 0);
 			qp = queue_lock(lsp, CV);
 			if (self->ul_sleepq)	/* timeout or spurious wakeup */
-				lsp->sema_waiters = dequeue_self(qp, lsp);
+				lsp->sema_waiters = dequeue_self(qp);
 		}
 		if (error == 0)
 			lsp->count--;
 		if (lsp->count != 0 && lsp->sema_waiters) {
-			if ((ulwp = dequeue(qp, lsp, &more)) == NULL)
-				lsp->sema_waiters = 0;
-			else {
+			int more;
+			if ((ulwp = dequeue(qp, &more)) != NULL) {
 				no_preempt(self);
 				lwpid = ulwp->ul_lwpid;
-				lsp->sema_waiters = (more? 1 : 0);
 			}
+			lsp->sema_waiters = more;
 		}
 		queue_unlock(qp);
 		if (lwpid) {
@@ -245,20 +243,18 @@ _sema_trywait(sema_t *sp)
 	} else {				/* multithreaded */
 		queue_head_t *qp;
 		ulwp_t *ulwp;
-		int more;
 		lwpid_t lwpid = 0;
 
 		qp = queue_lock(lsp, CV);
 		if (lsp->count == 0)
 			error = EBUSY;
 		else if (--lsp->count != 0 && lsp->sema_waiters) {
-			if ((ulwp = dequeue(qp, lsp, &more)) == NULL)
-				lsp->sema_waiters = 0;
-			else {
+			int more;
+			if ((ulwp = dequeue(qp, &more)) != NULL) {
 				no_preempt(self);
 				lwpid = ulwp->ul_lwpid;
-				lsp->sema_waiters = (more? 1 : 0);
 			}
+			lsp->sema_waiters = more;
 		}
 		queue_unlock(qp);
 		if (lwpid) {
@@ -314,20 +310,18 @@ _sema_post(sema_t *sp)
 	} else {				/* multithreaded */
 		queue_head_t *qp;
 		ulwp_t *ulwp;
-		int more;
 		lwpid_t lwpid = 0;
 
 		qp = queue_lock(lsp, CV);
 		if (lsp->count >= _semvaluemax)
 			error = EOVERFLOW;
 		else if (lsp->count++ == 0 && lsp->sema_waiters) {
-			if ((ulwp = dequeue(qp, lsp, &more)) == NULL)
-				lsp->sema_waiters = 0;
-			else {
+			int more;
+			if ((ulwp = dequeue(qp, &more)) != NULL) {
 				no_preempt(self);
 				lwpid = ulwp->ul_lwpid;
-				lsp->sema_waiters = (more? 1 : 0);
 			}
+			lsp->sema_waiters = more;
 		}
 		queue_unlock(qp);
 		if (lwpid) {
diff --git a/usr/src/lib/libc/port/threads/spawn.c b/usr/src/lib/libc/port/threads/spawn.c
index 2e81ab0634..5c0d505a24 100644
--- a/usr/src/lib/libc/port/threads/spawn.c
+++ b/usr/src/lib/libc/port/threads/spawn.c
@@ -30,14 +30,9 @@
 #include "thr_uberdata.h"
 #include <sys/libc_kernel.h>
 #include <sys/procset.h>
-#include <sys/rtpriocntl.h>
-#include <sys/tspriocntl.h>
 #include <sys/fork.h>
-#include <sys/rt.h>
-#include <sys/ts.h>
 #include <alloca.h>
 #include <spawn.h>
-#include "rtsched.h"
 
 #define	ALL_POSIX_SPAWN_FLAGS			\
 		(POSIX_SPAWN_RESETIDS |		\
@@ -50,8 +45,8 @@
 		POSIX_SPAWN_WAITPID_NP)
 
 typedef struct {
-	short		sa_psflags;	/* POSIX_SPAWN_* flags */
-	pri_t		sa_priority;
+	int		sa_psflags;	/* POSIX_SPAWN_* flags */
+	int		sa_priority;
 	int		sa_schedpolicy;
 	pid_t		sa_pgroup;
 	sigset_t	sa_sigdefault;
@@ -70,8 +65,6 @@ typedef struct file_attr {
 	int		fa_newfiledes;	/* new file descriptor for dup2() */
 } file_attr_t;
 
-extern struct pcclass ts_class, rt_class;
-
 extern	pid_t	_vforkx(int);
 #pragma unknown_control_flow(_vforkx)
 extern	void	*_private_memset(void *, int, size_t);
@@ -89,95 +82,6 @@ extern	uid_t	_private_getuid(void);
 extern	uid_t	_private_geteuid(void);
 extern	void	_private_exit(int);
 
-/*
- * We call this function rather than priocntl() because we must not call
- * any function that is exported from libc while in the child of vfork().
- * Also, we are not using PC_GETXPARMS or PC_SETXPARMS so we can use
- * the simple call to __priocntlset() rather than the varargs version.
- */
-static long
-_private_priocntl(idtype_t idtype, id_t id, int cmd, caddr_t arg)
-{
-	extern long _private__priocntlset(int, procset_t *, int, caddr_t, ...);
-	procset_t procset;
-
-	setprocset(&procset, POP_AND, idtype, id, P_ALL, 0);
-	return (_private__priocntlset(PC_VERSION, &procset, cmd, arg, 0));
-}
-
-/*
- * The following two functions are blatently stolen from
- * sched_setscheduler() and sched_setparam() in librt.
- * This would be a lot easier if librt were folded into libc.
- */
-static int
-setscheduler(int policy, pri_t prio)
-{
-	pcparms_t	pcparm;
-	tsinfo_t	*tsi;
-	tsparms_t	*tsp;
-	int		scale;
-
-	switch (policy) {
-	case SCHED_FIFO:
-	case SCHED_RR:
-		if (prio < rt_class.pcc_primin || prio > rt_class.pcc_primax) {
-			errno = EINVAL;
-			return (-1);
-		}
-		pcparm.pc_cid = rt_class.pcc_info.pc_cid;
-		((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
-		((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs =
-		    (policy == SCHED_RR ? RT_TQDEF : RT_TQINF);
-		break;
-
-	case SCHED_OTHER:
-		pcparm.pc_cid = ts_class.pcc_info.pc_cid;
-		tsi = (tsinfo_t *)ts_class.pcc_info.pc_clinfo;
-		scale = tsi->ts_maxupri;
-		tsp = (tsparms_t *)pcparm.pc_clparms;
-		tsp->ts_uprilim = tsp->ts_upri = -(scale * prio) / 20;
-		break;
-
-	default:
-		errno = EINVAL;
-		return (-1);
-	}
-
-	return (_private_priocntl(P_PID, P_MYID,
-	    PC_SETPARMS, (caddr_t)&pcparm));
-}
-
-static int
-setparam(pcparms_t *pcparmp, pri_t prio)
-{
-	tsparms_t	*tsp;
-	tsinfo_t	*tsi;
-	int		scale;
-
-	if (pcparmp->pc_cid == rt_class.pcc_info.pc_cid) {
-		/* SCHED_FIFO or SCHED_RR policy */
-		if (prio < rt_class.pcc_primin || prio > rt_class.pcc_primax) {
-			errno = EINVAL;
-			return (-1);
-		}
-		((rtparms_t *)pcparmp->pc_clparms)->rt_tqnsecs = RT_NOCHANGE;
-		((rtparms_t *)pcparmp->pc_clparms)->rt_pri = prio;
-	} else if (pcparmp->pc_cid == ts_class.pcc_info.pc_cid) {
-		/* SCHED_OTHER policy */
-		tsi = (tsinfo_t *)ts_class.pcc_info.pc_clinfo;
-		scale = tsi->ts_maxupri;
-		tsp = (tsparms_t *)pcparmp->pc_clparms;
-		tsp->ts_uprilim = tsp->ts_upri = -(scale * prio) / 20;
-	} else {
-		errno = EINVAL;
-		return (-1);
-	}
-
-	return (_private_priocntl(P_PID, P_MYID,
-	    PC_SETPARMS, (caddr_t)pcparmp));
-}
-
 static int
 perform_flag_actions(spawn_attr_t *sap)
 {
@@ -209,20 +113,11 @@ perform_flag_actions(spawn_attr_t *sap)
 	}
 
 	if (sap->sa_psflags & POSIX_SPAWN_SETSCHEDULER) {
-		if (setscheduler(sap->sa_schedpolicy, sap->sa_priority) != 0)
+		if (setparam(P_LWPID, P_MYID,
+		    sap->sa_schedpolicy, sap->sa_priority) == -1)
 			return (errno);
 	} else if (sap->sa_psflags & POSIX_SPAWN_SETSCHEDPARAM) {
-		/*
-		 * Get the process's current scheduling parameters,
-		 * then modify to set the new priority.
-		 */
-		pcparms_t pcparm;
-
-		pcparm.pc_cid = PC_CLNULL;
-		if (_private_priocntl(P_PID, P_MYID,
-		    PC_GETPARMS, (caddr_t)&pcparm) == -1)
-			return (errno);
-		if (setparam(&pcparm, sap->sa_priority) != 0)
+		if (setprio(P_LWPID, P_MYID, sap->sa_priority, NULL) == -1)
 			return (errno);
 	}
 
@@ -682,17 +577,6 @@ _posix_spawnattr_setflags(
 	    (flags & ~ALL_POSIX_SPAWN_FLAGS))
 		return (EINVAL);
 
-	if (flags & (POSIX_SPAWN_SETSCHEDPARAM | POSIX_SPAWN_SETSCHEDULER)) {
-		/*
-		 * Populate ts_class and rt_class.
-		 * We will need them in the child of vfork().
-		 */
-		if (rt_class.pcc_state == 0)
-			(void) get_info_by_policy(SCHED_FIFO);
-		if (ts_class.pcc_state == 0)
-			(void) get_info_by_policy(SCHED_OTHER);
-	}
-
 	sap->sa_psflags = flags;
 	return (0);
 }
@@ -789,17 +673,15 @@ _posix_spawnattr_setschedpolicy(
 {
 	spawn_attr_t *sap = attr->__spawn_attrp;
 
-	if (sap == NULL)
+	if (sap == NULL || schedpolicy == SCHED_SYS)
 		return (EINVAL);
 
-	switch (schedpolicy) {
-	case SCHED_OTHER:
-	case SCHED_FIFO:
-	case SCHED_RR:
-		break;
-	default:
-		return (EINVAL);
-	}
+	/*
+	 * Cache the policy information for later use
+	 * by the vfork() child of posix_spawn().
+	 */
+	if (get_info_by_policy(schedpolicy) == NULL)
+		return (errno);
 
 	sap->sa_schedpolicy = schedpolicy;
 	return (0);
diff --git a/usr/src/lib/libc/port/threads/synch.c b/usr/src/lib/libc/port/threads/synch.c
index b6a5be6634..626252671a 100644
--- a/usr/src/lib/libc/port/threads/synch.c
+++ b/usr/src/lib/libc/port/threads/synch.c
@@ -30,9 +30,22 @@
 
 #include "lint.h"
 #include "thr_uberdata.h"
+#include <sys/rtpriocntl.h>
 #include <sys/sdt.h>
 #include <atomic.h>
 
+#if defined(THREAD_DEBUG)
+#define	INCR32(x)	(((x) != UINT32_MAX)? (x)++ : 0)
+#define	INCR(x)		((x)++)
+#define	DECR(x)		((x)--)
+#define	MAXINCR(m, x)	((m < ++x)? (m = x) : 0)
+#else
+#define	INCR32(x)
+#define	INCR(x)
+#define	DECR(x)
+#define	MAXINCR(m, x)
+#endif
+
 /*
  * This mutex is initialized to be held by lwp#1.
  * It is used to block a thread that has returned from a mutex_lock()
@@ -120,7 +133,9 @@ int
 __mutex_init(mutex_t *mp, int type, void *arg)
 {
 	int basetype = (type & ~ALL_ATTRIBUTES);
+	const pcclass_t *pccp;
 	int error = 0;
+	int ceil;
 
 	if (basetype == USYNC_PROCESS_ROBUST) {
 		/*
@@ -134,9 +149,14 @@ __mutex_init(mutex_t *mp, int type, void *arg)
 		basetype = USYNC_PROCESS;
 	}
 
-	if (!(basetype == USYNC_THREAD || basetype == USYNC_PROCESS) ||
+	if (type & LOCK_PRIO_PROTECT)
+		pccp = get_info_by_policy(SCHED_FIFO);
+	if ((basetype != USYNC_THREAD && basetype != USYNC_PROCESS) ||
 	    (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT))
-	    == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) {
+	    == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT) ||
+	    ((type & LOCK_PRIO_PROTECT) &&
+	    ((ceil = *(int *)arg) < pccp->pcc_primin ||
+	    ceil > pccp->pcc_primax))) {
 		error = EINVAL;
 	} else if (type & LOCK_ROBUST) {
 		/*
@@ -156,8 +176,7 @@ __mutex_init(mutex_t *mp, int type, void *arg)
 			_atomic_or_16(&mp->mutex_flag, LOCK_INITED);
 			mp->mutex_magic = MUTEX_MAGIC;
 		} else if (type != mp->mutex_type ||
-		    ((type & LOCK_PRIO_PROTECT) &&
-		    mp->mutex_ceiling != (*(int *)arg))) {
+		    ((type & LOCK_PRIO_PROTECT) && mp->mutex_ceiling != ceil)) {
 			error = EINVAL;
 		} else if (__mutex_consistent(mp) != 0) {
 			error = EBUSY;
@@ -172,14 +191,15 @@ __mutex_init(mutex_t *mp, int type, void *arg)
 		mp->mutex_magic = MUTEX_MAGIC;
 	}
 
-	if (error == 0 && (type & LOCK_PRIO_PROTECT))
-		mp->mutex_ceiling = (uint8_t)(*(int *)arg);
+	if (error == 0 && (type & LOCK_PRIO_PROTECT)) {
+		mp->mutex_ceiling = ceil;
+	}
 
 	return (error);
 }
 
 /*
- * Delete mp from list of ceil mutexes owned by curthread.
+ * Delete mp from list of ceiling mutexes owned by curthread.
  * Return 1 if the head of the chain was updated.
  */
 int
@@ -189,17 +209,20 @@ _ceil_mylist_del(mutex_t *mp)
 	mxchain_t **mcpp;
 	mxchain_t *mcp;
 
-	mcpp = &self->ul_mxchain;
-	while ((*mcpp)->mxchain_mx != mp)
-		mcpp = &(*mcpp)->mxchain_next;
-	mcp = *mcpp;
-	*mcpp = mcp->mxchain_next;
-	lfree(mcp, sizeof (*mcp));
-	return (mcpp == &self->ul_mxchain);
+	for (mcpp = &self->ul_mxchain;
+	    (mcp = *mcpp) != NULL;
+	    mcpp = &mcp->mxchain_next) {
+		if (mcp->mxchain_mx == mp) {
+			*mcpp = mcp->mxchain_next;
+			lfree(mcp, sizeof (*mcp));
+			return (mcpp == &self->ul_mxchain);
+		}
+	}
+	return (0);
 }
 
 /*
- * Add mp to head of list of ceil mutexes owned by curthread.
+ * Add mp to the list of ceiling mutexes owned by curthread.
  * Return ENOMEM if no memory could be allocated.
  */
 int
@@ -217,26 +240,30 @@ _ceil_mylist_add(mutex_t *mp)
 }
 
 /*
- * Inherit priority from ceiling.  The inheritance impacts the effective
- * priority, not the assigned priority.  See _thread_setschedparam_main().
+ * Helper function for _ceil_prio_inherit() and _ceil_prio_waive(), below.
+ */
+static void
+set_rt_priority(ulwp_t *self, int prio)
+{
+	pcparms_t pcparm;
+
+	pcparm.pc_cid = self->ul_rtclassid;
+	((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = RT_NOCHANGE;
+	((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
+	(void) _private_priocntl(P_LWPID, self->ul_lwpid, PC_SETPARMS, &pcparm);
+}
+
+/*
+ * Inherit priority from ceiling.
+ * This changes the effective priority, not the assigned priority.
  */
 void
-_ceil_prio_inherit(int ceil)
+_ceil_prio_inherit(int prio)
 {
 	ulwp_t *self = curthread;
-	struct sched_param param;
 
-	(void) _memset(&param, 0, sizeof (param));
-	param.sched_priority = ceil;
-	if (_thread_setschedparam_main(self->ul_lwpid,
-	    self->ul_policy, &param, PRIO_INHERIT)) {
-		/*
-		 * Panic since unclear what error code to return.
-		 * If we do return the error codes returned by above
-		 * called routine, update the man page...
-		 */
-		thr_panic("_thread_setschedparam_main() fails");
-	}
+	self->ul_epri = prio;
+	set_rt_priority(self, prio);
 }
 
 /*
@@ -248,30 +275,17 @@ void
 _ceil_prio_waive(void)
 {
 	ulwp_t *self = curthread;
-	struct sched_param param;
+	mxchain_t *mcp = self->ul_mxchain;
+	int prio;
 
-	(void) _memset(&param, 0, sizeof (param));
-	if (self->ul_mxchain == NULL) {
-		/*
-		 * No ceil locks held.  Zero the epri, revert back to ul_pri.
-		 * Since thread's hash lock is not held, one cannot just
-		 * read ul_pri here...do it in the called routine...
-		 */
-		param.sched_priority = self->ul_pri;	/* ignored */
-		if (_thread_setschedparam_main(self->ul_lwpid,
-		    self->ul_policy, &param, PRIO_DISINHERIT))
-			thr_panic("_thread_setschedparam_main() fails");
+	if (mcp == NULL) {
+		prio = self->ul_pri;
+		self->ul_epri = 0;
 	} else {
-		/*
-		 * Set priority to that of the mutex at the head
-		 * of the ceilmutex chain.
-		 */
-		param.sched_priority =
-		    self->ul_mxchain->mxchain_mx->mutex_ceiling;
-		if (_thread_setschedparam_main(self->ul_lwpid,
-		    self->ul_policy, &param, PRIO_INHERIT))
-			thr_panic("_thread_setschedparam_main() fails");
+		prio = mcp->mxchain_mx->mutex_ceiling;
+		self->ul_epri = prio;
 	}
+	set_rt_priority(self, prio);
 }
 
 /*
@@ -386,8 +400,7 @@ spin_lock_set(mutex_t *mp)
 	/*
 	 * Spin for a while, attempting to acquire the lock.
 	 */
-	if (self->ul_spin_lock_spin != UINT_MAX)
-		self->ul_spin_lock_spin++;
+	INCR32(self->ul_spin_lock_spin);
 	if (mutex_queuelock_adaptive(mp) == 0 ||
 	    set_lock_byte(&mp->mutex_lockw) == 0) {
 		mp->mutex_owner = (uintptr_t)self;
@@ -397,8 +410,7 @@ spin_lock_set(mutex_t *mp)
 	 * Try harder if we were previously at a no premption level.
 	 */
 	if (self->ul_preempt > 1) {
-		if (self->ul_spin_lock_spin2 != UINT_MAX)
-			self->ul_spin_lock_spin2++;
+		INCR32(self->ul_spin_lock_spin2);
 		if (mutex_queuelock_adaptive(mp) == 0 ||
 		    set_lock_byte(&mp->mutex_lockw) == 0) {
 			mp->mutex_owner = (uintptr_t)self;
@@ -408,8 +420,7 @@ spin_lock_set(mutex_t *mp)
 	/*
 	 * Give up and block in the kernel for the mutex.
 	 */
-	if (self->ul_spin_lock_sleep != UINT_MAX)
-		self->ul_spin_lock_sleep++;
+	INCR32(self->ul_spin_lock_sleep);
 	(void) ___lwp_mutex_timedlock(mp, NULL);
 	mp->mutex_owner = (uintptr_t)self;
 }
@@ -422,8 +433,7 @@ spin_lock_clear(mutex_t *mp)
 	mp->mutex_owner = 0;
 	if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) {
 		(void) ___lwp_mutex_wakeup(mp, 0);
-		if (self->ul_spin_lock_wakeup != UINT_MAX)
-			self->ul_spin_lock_wakeup++;
+		INCR32(self->ul_spin_lock_wakeup);
 	}
 	preempt(self);
 }
@@ -436,7 +446,7 @@ queue_alloc(void)
 {
 	ulwp_t *self = curthread;
 	uberdata_t *udp = self->ul_uberdata;
-	mutex_t *mp;
+	queue_head_t *qp;
 	void *data;
 	int i;
 
@@ -449,11 +459,16 @@ queue_alloc(void)
 	    PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0))
 	    == MAP_FAILED)
 		thr_panic("cannot allocate thread queue_head table");
-	udp->queue_head = (queue_head_t *)data;
-	for (i = 0; i < 2 * QHASHSIZE; i++) {
-		mp = &udp->queue_head[i].qh_lock;
-		mp->mutex_flag = LOCK_INITED;
-		mp->mutex_magic = MUTEX_MAGIC;
+	udp->queue_head = qp = (queue_head_t *)data;
+	for (i = 0; i < 2 * QHASHSIZE; qp++, i++) {
+		qp->qh_type = (i < QHASHSIZE)? MX : CV;
+		qp->qh_lock.mutex_flag = LOCK_INITED;
+		qp->qh_lock.mutex_magic = MUTEX_MAGIC;
+		qp->qh_hlist = &qp->qh_def_root;
+#if defined(THREAD_DEBUG)
+		qp->qh_hlen = 1;
+		qp->qh_hmax = 1;
+#endif
 	}
 }
 
@@ -467,31 +482,43 @@ QVERIFY(queue_head_t *qp)
 {
 	ulwp_t *self = curthread;
 	uberdata_t *udp = self->ul_uberdata;
+	queue_root_t *qrp;
 	ulwp_t *ulwp;
 	ulwp_t *prev;
 	uint_t index;
-	uint32_t cnt = 0;
+	uint32_t cnt;
 	char qtype;
 	void *wchan;
 
 	ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE);
 	ASSERT(MUTEX_OWNED(&qp->qh_lock, self));
-	ASSERT((qp->qh_head != NULL && qp->qh_tail != NULL) ||
-	    (qp->qh_head == NULL && qp->qh_tail == NULL));
+	for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) {
+		cnt++;
+		ASSERT((qrp->qr_head != NULL && qrp->qr_tail != NULL) ||
+		    (qrp->qr_head == NULL && qrp->qr_tail == NULL));
+	}
+	ASSERT(qp->qh_hlen == cnt && qp->qh_hmax >= cnt);
+	qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV;
+	ASSERT(qp->qh_type == qtype);
 	if (!thread_queue_verify)
 		return;
 	/* real expensive stuff, only for _THREAD_QUEUE_VERIFY */
-	qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV;
-	for (prev = NULL, ulwp = qp->qh_head; ulwp != NULL;
-	    prev = ulwp, ulwp = ulwp->ul_link, cnt++) {
-		ASSERT(ulwp->ul_qtype == qtype);
-		ASSERT(ulwp->ul_wchan != NULL);
-		ASSERT(ulwp->ul_sleepq == qp);
-		wchan = ulwp->ul_wchan;
-		index = QUEUE_HASH(wchan, qtype);
-		ASSERT(&udp->queue_head[index] == qp);
-	}
-	ASSERT(qp->qh_tail == prev);
+	for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) {
+		for (prev = NULL, ulwp = qrp->qr_head; ulwp != NULL;
+		    prev = ulwp, ulwp = ulwp->ul_link) {
+			cnt++;
+			if (ulwp->ul_writer)
+				ASSERT(prev == NULL || prev->ul_writer);
+			ASSERT(ulwp->ul_qtype == qtype);
+			ASSERT(ulwp->ul_wchan != NULL);
+			ASSERT(ulwp->ul_sleepq == qp);
+			wchan = ulwp->ul_wchan;
+			ASSERT(qrp->qr_wchan == wchan);
+			index = QUEUE_HASH(wchan, qtype);
+			ASSERT(&udp->queue_head[index] == qp);
+		}
+		ASSERT(qrp->qr_tail == prev);
+	}
 	ASSERT(qp->qh_qlen == cnt);
 }
 
@@ -509,6 +536,7 @@ queue_lock(void *wchan, int qtype)
 {
 	uberdata_t *udp = curthread->ul_uberdata;
 	queue_head_t *qp;
+	queue_root_t *qrp;
 
 	ASSERT(qtype == MX || qtype == CV);
 
@@ -522,11 +550,20 @@ queue_lock(void *wchan, int qtype)
 	}
 	qp += QUEUE_HASH(wchan, qtype);
 	spin_lock_set(&qp->qh_lock);
-	/*
-	 * At once per nanosecond, qh_lockcount will wrap after 512 years.
-	 * Were we to care about this, we could peg the value at UINT64_MAX.
-	 */
-	qp->qh_lockcount++;
+	for (qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next)
+		if (qrp->qr_wchan == wchan)
+			break;
+	if (qrp == NULL && qp->qh_def_root.qr_head == NULL) {
+		/* the default queue root is available; use it */
+		qrp = &qp->qh_def_root;
+		qrp->qr_wchan = wchan;
+		ASSERT(qrp->qr_next == NULL);
+		ASSERT(qrp->qr_tail == NULL &&
+		    qrp->qr_rtcount == 0 && qrp->qr_qlen == 0);
+	}
+	qp->qh_wchan = wchan;	/* valid until queue_unlock() is called */
+	qp->qh_root = qrp;	/* valid until queue_unlock() is called */
+	INCR32(qp->qh_lockcount);
 	QVERIFY(qp);
 	return (qp);
 }
@@ -549,19 +586,33 @@ queue_unlock(queue_head_t *qp)
 #define	CMP_PRIO(ulwp)	((real_priority(ulwp) << 1) + (ulwp)->ul_writer)
 
 void
-enqueue(queue_head_t *qp, ulwp_t *ulwp, void *wchan, int qtype)
+enqueue(queue_head_t *qp, ulwp_t *ulwp, int force_fifo)
 {
+	queue_root_t *qrp;
 	ulwp_t **ulwpp;
 	ulwp_t *next;
 	int pri = CMP_PRIO(ulwp);
-	int force_fifo = (qtype & FIFOQ);
-	int do_fifo;
 
-	qtype &= ~FIFOQ;
-	ASSERT(qtype == MX || qtype == CV);
 	ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread));
 	ASSERT(ulwp->ul_sleepq != qp);
 
+	if ((qrp = qp->qh_root) == NULL) {
+		/* use the thread's queue root for the linkage */
+		qrp = &ulwp->ul_queue_root;
+		qrp->qr_next = qp->qh_hlist;
+		qrp->qr_prev = NULL;
+		qrp->qr_head = NULL;
+		qrp->qr_tail = NULL;
+		qrp->qr_wchan = qp->qh_wchan;
+		qrp->qr_rtcount = 0;
+		qrp->qr_qlen = 0;
+		qrp->qr_qmax = 0;
+		qp->qh_hlist->qr_prev = qrp;
+		qp->qh_hlist = qrp;
+		qp->qh_root = qrp;
+		MAXINCR(qp->qh_hmax, qp->qh_hlen);
+	}
+
 	/*
 	 * LIFO queue ordering is unfair and can lead to starvation,
 	 * but it gives better performance for heavily contended locks.
@@ -580,30 +631,28 @@ enqueue(queue_head_t *qp, ulwp_t *ulwp, void *wchan, int qtype)
 	 * This breaks live lock conditions that occur in applications
 	 * that are written assuming (incorrectly) that threads acquire
 	 * locks fairly, that is, in roughly round-robin order.
-	 * In any event, the queue is maintained in priority order.
+	 * In any event, the queue is maintained in kernel priority order.
 	 *
-	 * If we are given the FIFOQ flag in qtype, fifo queueing is forced.
+	 * If force_fifo is non-zero, fifo queueing is forced.
 	 * SUSV3 requires this for semaphores.
 	 */
-	do_fifo = (force_fifo ||
-	    ((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0);
-
-	if (qp->qh_head == NULL) {
+	if (qrp->qr_head == NULL) {
 		/*
 		 * The queue is empty.  LIFO/FIFO doesn't matter.
 		 */
-		ASSERT(qp->qh_tail == NULL);
-		ulwpp = &qp->qh_head;
-	} else if (do_fifo) {
+		ASSERT(qrp->qr_tail == NULL);
+		ulwpp = &qrp->qr_head;
+	} else if (force_fifo |
+	    (((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0)) {
 		/*
 		 * Enqueue after the last thread whose priority is greater
 		 * than or equal to the priority of the thread being queued.
 		 * Attempt first to go directly onto the tail of the queue.
 		 */
-		if (pri <= CMP_PRIO(qp->qh_tail))
-			ulwpp = &qp->qh_tail->ul_link;
+		if (pri <= CMP_PRIO(qrp->qr_tail))
+			ulwpp = &qrp->qr_tail->ul_link;
 		else {
-			for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL;
+			for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL;
 			    ulwpp = &next->ul_link)
 				if (pri > CMP_PRIO(next))
 					break;
@@ -614,174 +663,262 @@ enqueue(queue_head_t *qp, ulwp_t *ulwp, void *wchan, int qtype)
 		 * than or equal to the priority of the thread being queued.
 		 * Hopefully we can go directly onto the head of the queue.
 		 */
-		for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL;
+		for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL;
 		    ulwpp = &next->ul_link)
 			if (pri >= CMP_PRIO(next))
 				break;
 	}
 	if ((ulwp->ul_link = *ulwpp) == NULL)
-		qp->qh_tail = ulwp;
+		qrp->qr_tail = ulwp;
 	*ulwpp = ulwp;
 
 	ulwp->ul_sleepq = qp;
-	ulwp->ul_wchan = wchan;
-	ulwp->ul_qtype = qtype;
-	if (qp->qh_qmax < ++qp->qh_qlen)
-		qp->qh_qmax = qp->qh_qlen;
+	ulwp->ul_wchan = qp->qh_wchan;
+	ulwp->ul_qtype = qp->qh_type;
+	if ((ulwp->ul_schedctl != NULL &&
+	    ulwp->ul_schedctl->sc_cid == ulwp->ul_rtclassid) |
+	    ulwp->ul_pilocks) {
+		ulwp->ul_rtqueued = 1;
+		qrp->qr_rtcount++;
+	}
+	MAXINCR(qrp->qr_qmax, qrp->qr_qlen);
+	MAXINCR(qp->qh_qmax, qp->qh_qlen);
 }
 
 /*
- * Return a pointer to the queue slot of the
- * highest priority thread on the queue.
- * On return, prevp, if not NULL, will contain a pointer
- * to the thread's predecessor on the queue
+ * Helper function for queue_slot() and queue_slot_rt().
+ * Try to find a non-suspended thread on the queue.
  */
 static ulwp_t **
-queue_slot(queue_head_t *qp, void *wchan, int *more, ulwp_t **prevp)
+queue_slot_runnable(ulwp_t **ulwpp, ulwp_t **prevp, int rt)
 {
-	ulwp_t **ulwpp;
 	ulwp_t *ulwp;
-	ulwp_t *prev = NULL;
-	ulwp_t **suspp = NULL;
-	ulwp_t *susprev;
-
-	ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread));
+	ulwp_t **foundpp = NULL;
+	int priority = -1;
+	ulwp_t *prev;
+	int tpri;
 
-	/*
-	 * Find a waiter on the sleep queue.
-	 */
-	for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL;
+	for (prev = NULL;
+	    (ulwp = *ulwpp) != NULL;
 	    prev = ulwp, ulwpp = &ulwp->ul_link) {
-		if (ulwp->ul_wchan == wchan) {
-			if (!ulwp->ul_stop)
+		if (ulwp->ul_stop)	/* skip suspended threads */
+			continue;
+		tpri = rt? CMP_PRIO(ulwp) : 0;
+		if (tpri > priority) {
+			foundpp = ulwpp;
+			*prevp = prev;
+			priority = tpri;
+			if (!rt)
 				break;
-			/*
-			 * Try not to return a suspended thread.
-			 * This mimics the old libthread's behavior.
-			 */
-			if (suspp == NULL) {
-				suspp = ulwpp;
-				susprev = prev;
-			}
 		}
 	}
+	return (foundpp);
+}
+
+/*
+ * For real-time, we search the entire queue because the dispatch
+ * (kernel) priorities may have changed since enqueueing.
+ */
+static ulwp_t **
+queue_slot_rt(ulwp_t **ulwpp_org, ulwp_t **prevp)
+{
+	ulwp_t **ulwpp = ulwpp_org;
+	ulwp_t *ulwp = *ulwpp;
+	ulwp_t **foundpp = ulwpp;
+	int priority = CMP_PRIO(ulwp);
+	ulwp_t *prev;
+	int tpri;
 
-	if (ulwp == NULL && suspp != NULL) {
-		ulwp = *(ulwpp = suspp);
-		prev = susprev;
-		suspp = NULL;
+	for (prev = ulwp, ulwpp = &ulwp->ul_link;
+	    (ulwp = *ulwpp) != NULL;
+	    prev = ulwp, ulwpp = &ulwp->ul_link) {
+		tpri = CMP_PRIO(ulwp);
+		if (tpri > priority) {
+			foundpp = ulwpp;
+			*prevp = prev;
+			priority = tpri;
+		}
 	}
-	if (ulwp == NULL) {
-		if (more != NULL)
-			*more = 0;
-		return (NULL);
+	ulwp = *foundpp;
+
+	/*
+	 * Try not to return a suspended thread.
+	 * This mimics the old libthread's behavior.
+	 */
+	if (ulwp->ul_stop &&
+	    (ulwpp = queue_slot_runnable(ulwpp_org, prevp, 1)) != NULL) {
+		foundpp = ulwpp;
+		ulwp = *foundpp;
 	}
+	ulwp->ul_rt = 1;
+	return (foundpp);
+}
 
-	if (prevp != NULL)
-		*prevp = prev;
-	if (more == NULL)
-		return (ulwpp);
+ulwp_t **
+queue_slot(queue_head_t *qp, ulwp_t **prevp, int *more)
+{
+	queue_root_t *qrp;
+	ulwp_t **ulwpp;
+	ulwp_t *ulwp;
+	int rt;
 
+	ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread));
+
+	if ((qrp = qp->qh_root) == NULL || (ulwp = qrp->qr_head) == NULL) {
+		*more = 0;
+		return (NULL);		/* no lwps on the queue */
+	}
+	rt = (qrp->qr_rtcount != 0);
+	*prevp = NULL;
+	if (ulwp->ul_link == NULL) {	/* only one lwp on the queue */
+		*more = 0;
+		ulwp->ul_rt = rt;
+		return (&qrp->qr_head);
+	}
+	*more = 1;
+
+	if (rt)		/* real-time queue */
+		return (queue_slot_rt(&qrp->qr_head, prevp));
 	/*
-	 * Scan the remainder of the queue for another waiter.
+	 * Try not to return a suspended thread.
+	 * This mimics the old libthread's behavior.
 	 */
-	if (suspp != NULL) {
-		*more = 1;
+	if (ulwp->ul_stop &&
+	    (ulwpp = queue_slot_runnable(&qrp->qr_head, prevp, 0)) != NULL) {
+		ulwp = *ulwpp;
+		ulwp->ul_rt = 0;
 		return (ulwpp);
 	}
-	for (ulwp = ulwp->ul_link; ulwp != NULL; ulwp = ulwp->ul_link) {
-		if (ulwp->ul_wchan == wchan) {
-			*more = 1;
-			return (ulwpp);
-		}
-	}
-
-	*more = 0;
-	return (ulwpp);
+	/*
+	 * The common case; just pick the first thread on the queue.
+	 */
+	ulwp->ul_rt = 0;
+	return (&qrp->qr_head);
 }
 
-ulwp_t *
+/*
+ * Common code for unlinking an lwp from a user-level sleep queue.
+ */
+void
 queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev)
 {
-	ulwp_t *ulwp;
+	queue_root_t *qrp = qp->qh_root;
+	queue_root_t *nqrp;
+	ulwp_t *ulwp = *ulwpp;
+	ulwp_t *next;
 
-	ulwp = *ulwpp;
-	*ulwpp = ulwp->ul_link;
-	ulwp->ul_link = NULL;
-	if (qp->qh_tail == ulwp)
-		qp->qh_tail = prev;
-	qp->qh_qlen--;
-	ulwp->ul_sleepq = NULL;
-	ulwp->ul_wchan = NULL;
+	ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread));
+	ASSERT(qp->qh_wchan != NULL && ulwp->ul_wchan == qp->qh_wchan);
 
-	return (ulwp);
+	DECR(qp->qh_qlen);
+	DECR(qrp->qr_qlen);
+	if (ulwp->ul_rtqueued) {
+		ulwp->ul_rtqueued = 0;
+		qrp->qr_rtcount--;
+	}
+	next = ulwp->ul_link;
+	*ulwpp = next;
+	ulwp->ul_link = NULL;
+	if (qrp->qr_tail == ulwp)
+		qrp->qr_tail = prev;
+	if (qrp == &ulwp->ul_queue_root) {
+		/*
+		 * We can't continue to use the unlinked thread's
+		 * queue root for the linkage.
+		 */
+		queue_root_t *qr_next = qrp->qr_next;
+		queue_root_t *qr_prev = qrp->qr_prev;
+
+		if (qrp->qr_tail) {
+			/* switch to using the last thread's queue root */
+			ASSERT(qrp->qr_qlen != 0);
+			nqrp = &qrp->qr_tail->ul_queue_root;
+			*nqrp = *qrp;
+			if (qr_next)
+				qr_next->qr_prev = nqrp;
+			if (qr_prev)
+				qr_prev->qr_next = nqrp;
+			else
+				qp->qh_hlist = nqrp;
+			qp->qh_root = nqrp;
+		} else {
+			/* empty queue root; just delete from the hash list */
+			ASSERT(qrp->qr_qlen == 0);
+			if (qr_next)
+				qr_next->qr_prev = qr_prev;
+			if (qr_prev)
+				qr_prev->qr_next = qr_next;
+			else
+				qp->qh_hlist = qr_next;
+			qp->qh_root = NULL;
+			DECR(qp->qh_hlen);
+		}
+	}
 }
 
 ulwp_t *
-dequeue(queue_head_t *qp, void *wchan, int *more)
+dequeue(queue_head_t *qp, int *more)
 {
 	ulwp_t **ulwpp;
+	ulwp_t *ulwp;
 	ulwp_t *prev;
 
-	if ((ulwpp = queue_slot(qp, wchan, more, &prev)) == NULL)
+	if ((ulwpp = queue_slot(qp, &prev, more)) == NULL)
 		return (NULL);
-	return (queue_unlink(qp, ulwpp, prev));
+	ulwp = *ulwpp;
+	queue_unlink(qp, ulwpp, prev);
+	ulwp->ul_sleepq = NULL;
+	ulwp->ul_wchan = NULL;
+	return (ulwp);
 }
 
 /*
  * Return a pointer to the highest priority thread sleeping on wchan.
  */
 ulwp_t *
-queue_waiter(queue_head_t *qp, void *wchan)
+queue_waiter(queue_head_t *qp)
 {
 	ulwp_t **ulwpp;
+	ulwp_t *prev;
+	int more;
 
-	if ((ulwpp = queue_slot(qp, wchan, NULL, NULL)) == NULL)
+	if ((ulwpp = queue_slot(qp, &prev, &more)) == NULL)
 		return (NULL);
 	return (*ulwpp);
 }
 
-uint8_t
-dequeue_self(queue_head_t *qp, void *wchan)
+int
+dequeue_self(queue_head_t *qp)
 {
 	ulwp_t *self = curthread;
+	queue_root_t *qrp;
 	ulwp_t **ulwpp;
 	ulwp_t *ulwp;
-	ulwp_t *prev = NULL;
+	ulwp_t *prev;
 	int found = 0;
-	int more = 0;
 
 	ASSERT(MUTEX_OWNED(&qp->qh_lock, self));
 
 	/* find self on the sleep queue */
-	for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL;
-	    prev = ulwp, ulwpp = &ulwp->ul_link) {
-		if (ulwp == self) {
-			/* dequeue ourself */
-			ASSERT(self->ul_wchan == wchan);
-			(void) queue_unlink(qp, ulwpp, prev);
-			self->ul_cvmutex = NULL;
-			self->ul_cv_wake = 0;
-			found = 1;
-			break;
+	if ((qrp = qp->qh_root) != NULL) {
+		for (prev = NULL, ulwpp = &qrp->qr_head;
+		    (ulwp = *ulwpp) != NULL;
+		    prev = ulwp, ulwpp = &ulwp->ul_link) {
+			if (ulwp == self) {
+				queue_unlink(qp, ulwpp, prev);
+				self->ul_cvmutex = NULL;
+				self->ul_sleepq = NULL;
+				self->ul_wchan = NULL;
+				found = 1;
+				break;
+			}
 		}
-		if (ulwp->ul_wchan == wchan)
-			more = 1;
 	}
 
 	if (!found)
 		thr_panic("dequeue_self(): curthread not found on queue");
 
-	if (more)
-		return (1);
-
-	/* scan the remainder of the queue for another waiter */
-	for (ulwp = *ulwpp; ulwp != NULL; ulwp = ulwp->ul_link) {
-		if (ulwp->ul_wchan == wchan)
-			return (1);
-	}
-
-	return (0);
+	return ((qrp = qp->qh_root) != NULL && qrp->qr_head != NULL);
 }
 
 /*
@@ -807,12 +944,11 @@ unsleep_self(void)
 		 * If so, just loop around and try again.
 		 * dequeue_self() clears self->ul_sleepq.
 		 */
-		if (qp == self->ul_sleepq) {
-			(void) dequeue_self(qp, self->ul_wchan);
-			self->ul_writer = 0;
-		}
+		if (qp == self->ul_sleepq)
+			(void) dequeue_self(qp);
 		queue_unlock(qp);
 	}
+	self->ul_writer = 0;
 	self->ul_critical--;
 }
 
@@ -1423,9 +1559,9 @@ static lwpid_t
 mutex_wakeup(mutex_t *mp)
 {
 	lwpid_t lwpid = 0;
+	int more;
 	queue_head_t *qp;
 	ulwp_t *ulwp;
-	int more;
 
 	/*
 	 * Dequeue a waiter from the sleep queue.  Don't touch the mutex
@@ -1433,9 +1569,9 @@ mutex_wakeup(mutex_t *mp)
 	 * might have been deallocated or reallocated for another purpose.
 	 */
 	qp = queue_lock(mp, MX);
-	if ((ulwp = dequeue(qp, mp, &more)) != NULL) {
+	if ((ulwp = dequeue(qp, &more)) != NULL) {
 		lwpid = ulwp->ul_lwpid;
-		mp->mutex_waiters = (more? 1 : 0);
+		mp->mutex_waiters = more;
 	}
 	queue_unlock(qp);
 	return (lwpid);
@@ -1448,11 +1584,10 @@ static void
 mutex_wakeup_all(mutex_t *mp)
 {
 	queue_head_t *qp;
+	queue_root_t *qrp;
 	int nlwpid = 0;
 	int maxlwps = MAXLWPS;
-	ulwp_t **ulwpp;
 	ulwp_t *ulwp;
-	ulwp_t *prev = NULL;
 	lwpid_t buffer[MAXLWPS];
 	lwpid_t *lwpid = buffer;
 
@@ -1473,17 +1608,17 @@ mutex_wakeup_all(mutex_t *mp)
 	 * system call directly since that path acquires no locks.
 	 */
 	qp = queue_lock(mp, MX);
-	ulwpp = &qp->qh_head;
-	while ((ulwp = *ulwpp) != NULL) {
-		if (ulwp->ul_wchan != mp) {
-			prev = ulwp;
-			ulwpp = &ulwp->ul_link;
-		} else {
-			if (nlwpid == maxlwps)
-				lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
-			(void) queue_unlink(qp, ulwpp, prev);
-			lwpid[nlwpid++] = ulwp->ul_lwpid;
-		}
+	for (;;) {
+		if ((qrp = qp->qh_root) == NULL ||
+		    (ulwp = qrp->qr_head) == NULL)
+			break;
+		ASSERT(ulwp->ul_wchan == mp);
+		queue_unlink(qp, &qrp->qr_head, NULL);
+		ulwp->ul_sleepq = NULL;
+		ulwp->ul_wchan = NULL;
+		if (nlwpid == maxlwps)
+			lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
+		lwpid[nlwpid++] = ulwp->ul_lwpid;
 	}
 
 	if (nlwpid == 0) {
@@ -1555,17 +1690,6 @@ mutex_unlock_process(mutex_t *mp, int release_all)
 	}
 }
 
-/*
- * Return the real priority of a thread.
- */
-int
-real_priority(ulwp_t *ulwp)
-{
-	if (ulwp->ul_epri == 0)
-		return (ulwp->ul_mappedpri? ulwp->ul_mappedpri : ulwp->ul_pri);
-	return (ulwp->ul_emappedpri? ulwp->ul_emappedpri : ulwp->ul_epri);
-}
-
 void
 stall(void)
 {
@@ -1608,12 +1732,12 @@ mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp,
 	 * The waiter bit can be set/cleared only while holding the queue lock.
 	 */
 	qp = queue_lock(mp, MX);
-	enqueue(qp, self, mp, MX);
+	enqueue(qp, self, 0);
 	mp->mutex_waiters = 1;
 	for (;;) {
 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
 			mp->mutex_owner = (uintptr_t)self;
-			mp->mutex_waiters = dequeue_self(qp, mp);
+			mp->mutex_waiters = dequeue_self(qp);
 			break;
 		}
 		set_parking_flag(self, 1);
@@ -1635,7 +1759,7 @@ mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp,
 		qp = queue_lock(mp, MX);
 		if (self->ul_sleepq == NULL) {
 			if (error) {
-				mp->mutex_waiters = queue_waiter(qp, mp)? 1 : 0;
+				mp->mutex_waiters = queue_waiter(qp)? 1 : 0;
 				if (error != EINTR)
 					break;
 				error = 0;
@@ -1644,7 +1768,7 @@ mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp,
 				mp->mutex_owner = (uintptr_t)self;
 				break;
 			}
-			enqueue(qp, self, mp, MX);
+			enqueue(qp, self, 0);
 			mp->mutex_waiters = 1;
 		}
 		ASSERT(self->ul_sleepq == qp &&
@@ -1652,7 +1776,7 @@ mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp,
 		    self->ul_wchan == mp);
 		if (error) {
 			if (error != EINTR) {
-				mp->mutex_waiters = dequeue_self(qp, mp);
+				mp->mutex_waiters = dequeue_self(qp);
 				break;
 			}
 			error = 0;
@@ -1812,7 +1936,7 @@ unregister_locks(void)
 /*
  * Returns with mutex_owner set correctly.
  */
-static int
+int
 mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try)
 {
 	ulwp_t *self = curthread;
@@ -1820,9 +1944,11 @@ mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try)
 	int mtype = mp->mutex_type;
 	tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
 	int error = 0;
+	int noceil = try & MUTEX_NOCEIL;
 	uint8_t ceil;
 	int myprio;
 
+	try &= ~MUTEX_NOCEIL;
 	ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK);
 
 	if (!self->ul_schedctl_called)
@@ -1838,10 +1964,14 @@ mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try)
 	    tsp == NULL && mutex_is_held(mp))
 		lock_error(mp, "mutex_lock", NULL, NULL);
 
-	if (mtype & LOCK_PRIO_PROTECT) {
+	if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) {
+		update_sched(self);
+		if (self->ul_cid != self->ul_rtclassid) {
+			DTRACE_PROBE2(plockstat, mutex__error, mp, EPERM);
+			return (EPERM);
+		}
 		ceil = mp->mutex_ceiling;
-		ASSERT(_validate_rt_prio(SCHED_FIFO, ceil) == 0);
-		myprio = real_priority(self);
+		myprio = self->ul_epri? self->ul_epri : self->ul_pri;
 		if (myprio > ceil) {
 			DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL);
 			return (EINVAL);
@@ -1871,10 +2001,12 @@ mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try)
 		 */
 		switch (error) {
 		case 0:
+			self->ul_pilocks++;
 			mp->mutex_lockw = LOCKSET;
 			break;
 		case EOWNERDEAD:
 		case ELOCKUNMAPPED:
+			self->ul_pilocks++;
 			mp->mutex_lockw = LOCKSET;
 			/* FALLTHROUGH */
 		case ENOTRECOVERABLE:
@@ -1906,7 +2038,7 @@ mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try)
 			record_begin_hold(msp);
 		break;
 	default:
-		if (mtype & LOCK_PRIO_PROTECT) {
+		if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) {
 			(void) _ceil_mylist_del(mp);
 			if (myprio < ceil)
 				_ceil_prio_waive();
@@ -1967,9 +2099,8 @@ static int
 mutex_lock_impl(mutex_t *mp, timespec_t *tsp)
 {
 	ulwp_t *self = curthread;
-	uberdata_t *udp = self->ul_uberdata;
+	int mtype = mp->mutex_type;
 	uberflags_t *gflags;
-	int mtype;
 
 	/*
 	 * Optimize the case of USYNC_THREAD, including
@@ -1978,8 +2109,8 @@ mutex_lock_impl(mutex_t *mp, timespec_t *tsp)
 	 * and the process has only a single thread.
 	 * (Most likely a traditional single-threaded application.)
 	 */
-	if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
-	    udp->uberflags.uf_all) == 0) {
+	if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
+	    self->ul_uberdata->uberflags.uf_all) == 0) {
 		/*
 		 * Only one thread exists so we don't need an atomic operation.
 		 */
@@ -2099,10 +2230,11 @@ __mutex_trylock(mutex_t *mp)
 {
 	ulwp_t *self = curthread;
 	uberdata_t *udp = self->ul_uberdata;
+	int mtype = mp->mutex_type;
 	uberflags_t *gflags;
-	int mtype;
 
 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
+
 	/*
 	 * Optimize the case of USYNC_THREAD, including
 	 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases,
@@ -2110,7 +2242,7 @@ __mutex_trylock(mutex_t *mp)
 	 * and the process has only a single thread.
 	 * (Most likely a traditional single-threaded application.)
 	 */
-	if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
+	if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
 	    udp->uberflags.uf_all) == 0) {
 		/*
 		 * Only one thread exists so we don't need an atomic operation.
@@ -2194,6 +2326,7 @@ mutex_unlock_internal(mutex_t *mp, int retain_robust_flags)
 		/* mp->mutex_ownerpid is cleared by ___lwp_mutex_unlock() */
 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
 		mp->mutex_lockw = LOCKCLEAR;
+		self->ul_pilocks--;
 		error = ___lwp_mutex_unlock(mp);
 		preempt(self);
 	} else if (mtype & USYNC_PROCESS) {
@@ -2223,10 +2356,9 @@ int
 __mutex_unlock(mutex_t *mp)
 {
 	ulwp_t *self = curthread;
-	uberdata_t *udp = self->ul_uberdata;
+	int mtype = mp->mutex_type;
 	uberflags_t *gflags;
 	lwpid_t lwpid;
-	int mtype;
 	short el;
 
 	/*
@@ -2236,8 +2368,8 @@ __mutex_unlock(mutex_t *mp)
 	 * and the process has only a single thread.
 	 * (Most likely a traditional single-threaded application.)
 	 */
-	if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
-	    udp->uberflags.uf_all) == 0) {
+	if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
+	    self->ul_uberdata->uberflags.uf_all) == 0) {
 		if (mtype) {
 			/*
 			 * At this point we know that one or both of the
@@ -2872,6 +3004,7 @@ cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
 	lwpid_t lwpid;
 	int signalled;
 	int error;
+	int cv_wake;
 	int release_all;
 
 	/*
@@ -2882,10 +3015,10 @@ cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
 	 */
 	self->ul_sp = stkptr();
 	qp = queue_lock(cvp, CV);
-	enqueue(qp, self, cvp, CV);
+	enqueue(qp, self, 0);
 	cvp->cond_waiters_user = 1;
 	self->ul_cvmutex = mp;
-	self->ul_cv_wake = (tsp != NULL);
+	self->ul_cv_wake = cv_wake = (tsp != NULL);
 	self->ul_signalled = 0;
 	if (mp->mutex_flag & LOCK_OWNERDEAD) {
 		mp->mutex_flag &= ~LOCK_OWNERDEAD;
@@ -2924,7 +3057,8 @@ cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
 		 * or we may just have gotten a spurious wakeup.
 		 */
 		qp = queue_lock(cvp, CV);
-		mqp = queue_lock(mp, MX);
+		if (!cv_wake)
+			mqp = queue_lock(mp, MX);
 		if (self->ul_sleepq == NULL)
 			break;
 		/*
@@ -2933,15 +3067,15 @@ cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
 		 * were interrupted or we timed out (EINTR or ETIME).
 		 * Else this is a spurious wakeup; continue the loop.
 		 */
-		if (self->ul_sleepq == mqp) {		/* mutex queue */
+		if (!cv_wake && self->ul_sleepq == mqp) { /* mutex queue */
 			if (error) {
-				mp->mutex_waiters = dequeue_self(mqp, mp);
+				mp->mutex_waiters = dequeue_self(mqp);
 				break;
 			}
 			tsp = NULL;	/* no more timeout */
 		} else if (self->ul_sleepq == qp) {	/* condvar queue */
 			if (error) {
-				cvp->cond_waiters_user = dequeue_self(qp, cvp);
+				cvp->cond_waiters_user = dequeue_self(qp);
 				break;
 			}
 			/*
@@ -2951,18 +3085,21 @@ cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
 		} else {
 			thr_panic("cond_sleep_queue(): thread not on queue");
 		}
-		queue_unlock(mqp);
+		if (!cv_wake)
+			queue_unlock(mqp);
 	}
 
 	self->ul_sp = 0;
-	ASSERT(self->ul_cvmutex == NULL && self->ul_cv_wake == 0);
+	self->ul_cv_wake = 0;
+	ASSERT(self->ul_cvmutex == NULL);
 	ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL &&
 	    self->ul_wchan == NULL);
 
 	signalled = self->ul_signalled;
 	self->ul_signalled = 0;
 	queue_unlock(qp);
-	queue_unlock(mqp);
+	if (!cv_wake)
+		queue_unlock(mqp);
 
 	/*
 	 * If we were concurrently cond_signal()d and any of:
@@ -3034,8 +3171,10 @@ cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
 	self->ul_wchan = cvp;
 	mp->mutex_owner = 0;
 	/* mp->mutex_ownerpid is cleared by ___lwp_cond_wait() */
-	if (mtype & LOCK_PRIO_INHERIT)
+	if (mtype & LOCK_PRIO_INHERIT) {
 		mp->mutex_lockw = LOCKCLEAR;
+		self->ul_pilocks--;
+	}
 	/*
 	 * ___lwp_cond_wait() returns immediately with EINTR if
 	 * set_parking_flag(self,0) is called on this lwp before it
@@ -3356,15 +3495,14 @@ cond_signal_internal(cond_t *cvp)
 	uberdata_t *udp = self->ul_uberdata;
 	tdb_cond_stats_t *csp = COND_STATS(cvp, udp);
 	int error = 0;
+	int more;
+	lwpid_t lwpid;
 	queue_head_t *qp;
 	mutex_t *mp;
 	queue_head_t *mqp;
 	ulwp_t **ulwpp;
 	ulwp_t *ulwp;
-	ulwp_t *prev = NULL;
-	ulwp_t *next;
-	ulwp_t **suspp = NULL;
-	ulwp_t *susprev;
+	ulwp_t *prev;
 
 	if (csp)
 		tdb_incr(csp->cond_signal);
@@ -3383,43 +3521,13 @@ cond_signal_internal(cond_t *cvp)
 	 * is set, just dequeue and unpark him.
 	 */
 	qp = queue_lock(cvp, CV);
-	for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL;
-	    prev = ulwp, ulwpp = &ulwp->ul_link) {
-		if (ulwp->ul_wchan == cvp) {
-			if (!ulwp->ul_stop)
-				break;
-			/*
-			 * Try not to dequeue a suspended thread.
-			 * This mimics the old libthread's behavior.
-			 */
-			if (suspp == NULL) {
-				suspp = ulwpp;
-				susprev = prev;
-			}
-		}
-	}
-	if (ulwp == NULL && suspp != NULL) {
-		ulwp = *(ulwpp = suspp);
-		prev = susprev;
-		suspp = NULL;
-	}
-	if (ulwp == NULL) {	/* no one on the sleep queue */
-		cvp->cond_waiters_user = 0;
+	ulwpp = queue_slot(qp, &prev, &more);
+	cvp->cond_waiters_user = more;
+	if (ulwpp == NULL) {	/* no one on the sleep queue */
 		queue_unlock(qp);
 		return (error);
 	}
-	/*
-	 * Scan the remainder of the CV queue for another waiter.
-	 */
-	if (suspp != NULL) {
-		next = *suspp;
-	} else {
-		for (next = ulwp->ul_link; next != NULL; next = next->ul_link)
-			if (next->ul_wchan == cvp)
-				break;
-	}
-	if (next == NULL)
-		cvp->cond_waiters_user = 0;
+	ulwp = *ulwpp;
 
 	/*
 	 * Inform the thread that he was the recipient of a cond_signal().
@@ -3434,29 +3542,25 @@ cond_signal_internal(cond_t *cvp)
 	 * while we move him to the mutex queue so that he can
 	 * deal properly with spurious wakeups.
 	 */
-	*ulwpp = ulwp->ul_link;
-	ulwp->ul_link = NULL;
-	if (qp->qh_tail == ulwp)
-		qp->qh_tail = prev;
-	qp->qh_qlen--;
+	queue_unlink(qp, ulwpp, prev);
 
 	mp = ulwp->ul_cvmutex;		/* the mutex he will acquire */
 	ulwp->ul_cvmutex = NULL;
 	ASSERT(mp != NULL);
 
 	if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) {
-		lwpid_t lwpid = ulwp->ul_lwpid;
-
+		/* just wake him up */
+		lwpid = ulwp->ul_lwpid;
 		no_preempt(self);
 		ulwp->ul_sleepq = NULL;
 		ulwp->ul_wchan = NULL;
-		ulwp->ul_cv_wake = 0;
 		queue_unlock(qp);
 		(void) __lwp_unpark(lwpid);
 		preempt(self);
 	} else {
+		/* move him to the mutex queue */
 		mqp = queue_lock(mp, MX);
-		enqueue(mqp, ulwp, mp, MX);
+		enqueue(mqp, ulwp, 0);
 		mp->mutex_waiters = 1;
 		queue_unlock(mqp);
 		queue_unlock(qp);
@@ -3525,12 +3629,11 @@ cond_broadcast_internal(cond_t *cvp)
 	tdb_cond_stats_t *csp = COND_STATS(cvp, udp);
 	int error = 0;
 	queue_head_t *qp;
+	queue_root_t *qrp;
 	mutex_t *mp;
 	mutex_t *mp_cache = NULL;
 	queue_head_t *mqp = NULL;
-	ulwp_t **ulwpp;
 	ulwp_t *ulwp;
-	ulwp_t *prev = NULL;
 	int nlwpid = 0;
 	int maxlwps = MAXLWPS;
 	lwpid_t buffer[MAXLWPS];
@@ -3566,36 +3669,31 @@ cond_broadcast_internal(cond_t *cvp)
 	 */
 	qp = queue_lock(cvp, CV);
 	cvp->cond_waiters_user = 0;
-	ulwpp = &qp->qh_head;
-	while ((ulwp = *ulwpp) != NULL) {
-		if (ulwp->ul_wchan != cvp) {
-			prev = ulwp;
-			ulwpp = &ulwp->ul_link;
-			continue;
-		}
-		*ulwpp = ulwp->ul_link;
-		ulwp->ul_link = NULL;
-		if (qp->qh_tail == ulwp)
-			qp->qh_tail = prev;
-		qp->qh_qlen--;
+	for (;;) {
+		if ((qrp = qp->qh_root) == NULL ||
+		    (ulwp = qrp->qr_head) == NULL)
+			break;
+		ASSERT(ulwp->ul_wchan == cvp);
+		queue_unlink(qp, &qrp->qr_head, NULL);
 		mp = ulwp->ul_cvmutex;		/* his mutex */
 		ulwp->ul_cvmutex = NULL;
 		ASSERT(mp != NULL);
 		if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) {
+			/* just wake him up */
 			ulwp->ul_sleepq = NULL;
 			ulwp->ul_wchan = NULL;
-			ulwp->ul_cv_wake = 0;
 			if (nlwpid == maxlwps)
 				lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
 			lwpid[nlwpid++] = ulwp->ul_lwpid;
 		} else {
+			/* move him to the mutex queue */
 			if (mp != mp_cache) {
 				mp_cache = mp;
 				if (mqp != NULL)
 					queue_unlock(mqp);
 				mqp = queue_lock(mp, MX);
 			}
-			enqueue(mqp, ulwp, mp, MX);
+			enqueue(mqp, ulwp, 0);
 			mp->mutex_waiters = 1;
 		}
 	}
@@ -3634,7 +3732,6 @@ assert_no_libc_locks_held(void)
 {
 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
 }
-#endif
 
 /* protected by link_lock */
 uint64_t spin_lock_spin;
@@ -3680,26 +3777,28 @@ dump_queue_statistics(void)
 		return;
 
 	if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 ||
-	    fprintf(stderr, "queue#   lockcount    max qlen\n") < 0)
+	    fprintf(stderr, "queue#   lockcount    max qlen    max hlen\n") < 0)
 		return;
 	for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) {
 		if (qp->qh_lockcount == 0)
 			continue;
 		spin_lock_total += qp->qh_lockcount;
-		if (fprintf(stderr, "%5d %12llu%12u\n", qn,
-		    (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0)
+		if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn,
+		    (u_longlong_t)qp->qh_lockcount,
+		    qp->qh_qmax, qp->qh_hmax) < 0)
 			return;
 	}
 
 	if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 ||
-	    fprintf(stderr, "queue#   lockcount    max qlen\n") < 0)
+	    fprintf(stderr, "queue#   lockcount    max qlen    max hlen\n") < 0)
 		return;
 	for (qn = 0; qn < QHASHSIZE; qn++, qp++) {
 		if (qp->qh_lockcount == 0)
 			continue;
 		spin_lock_total += qp->qh_lockcount;
-		if (fprintf(stderr, "%5d %12llu%12u\n", qn,
-		    (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0)
+		if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn,
+		    (u_longlong_t)qp->qh_lockcount,
+		    qp->qh_qmax, qp->qh_hmax) < 0)
 			return;
 	}
 
@@ -3714,3 +3813,4 @@ dump_queue_statistics(void)
 	(void) fprintf(stderr, "  spin_lock_wakeup = %10llu\n",
 	    (u_longlong_t)spin_lock_wakeup);
 }
+#endif
diff --git a/usr/src/lib/libc/port/threads/thr.c b/usr/src/lib/libc/port/threads/thr.c
index 3f11ad1f2f..c9fda4bfb4 100644
--- a/usr/src/lib/libc/port/threads/thr.c
+++ b/usr/src/lib/libc/port/threads/thr.c
@@ -544,8 +544,7 @@ find_lwp(thread_t tid)
 
 int
 _thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
-	long flags, thread_t *new_thread, pri_t priority, int policy,
-	size_t guardsize)
+	long flags, thread_t *new_thread, size_t guardsize)
 {
 	ulwp_t *self = curthread;
 	uberdata_t *udp = self->ul_uberdata;
@@ -566,8 +565,7 @@ _thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
 	if (udp->hash_size == 1)
 		finish_init();
 
-	if (((stk || stksize) && stksize < MINSTACK) ||
-	    priority < THREAD_MIN_PRIORITY || priority > THREAD_MAX_PRIORITY)
+	if ((stk || stksize) && stksize < MINSTACK)
 		return (EINVAL);
 
 	if (stk == NULL) {
@@ -606,6 +604,12 @@ _thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
 	ulwp->ul_queue_spin = self->ul_queue_spin;
 	ulwp->ul_door_noreserve = self->ul_door_noreserve;
 
+	/* new thread inherits creating thread's scheduling parameters */
+	ulwp->ul_policy = self->ul_policy;
+	ulwp->ul_pri = (self->ul_epri? self->ul_epri : self->ul_pri);
+	ulwp->ul_cid = self->ul_cid;
+	ulwp->ul_rtclassid = self->ul_rtclassid;
+
 	ulwp->ul_primarymap = self->ul_primarymap;
 	ulwp->ul_self = ulwp;
 	ulwp->ul_uberdata = udp;
@@ -669,8 +673,6 @@ _thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
 	ulwp->ul_stop = TSTP_REGULAR;
 	if (flags & THR_SUSPENDED)
 		ulwp->ul_created = 1;
-	ulwp->ul_policy = policy;
-	ulwp->ul_pri = priority;
 
 	lmutex_lock(&udp->link_lock);
 	ulwp->ul_forw = udp->all_lwps;
@@ -705,8 +707,7 @@ int
 _thr_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
 	long flags, thread_t *new_thread)
 {
-	return (_thrp_create(stk, stksize, func, arg, flags, new_thread,
-	    curthread->ul_pri, curthread->ul_policy, 0));
+	return (_thrp_create(stk, stksize, func, arg, flags, new_thread, 0));
 }
 
 /*
@@ -793,8 +794,10 @@ _thrp_exit()
 		self->ul_back->ul_forw = self->ul_forw;
 	}
 	self->ul_forw = self->ul_back = NULL;
+#if defined(THREAD_DEBUG)
 	/* collect queue lock statistics before marking ourself dead */
 	record_spin_locks(self);
+#endif
 	self->ul_dead = 1;
 	self->ul_pleasestop = 0;
 	if (replace != NULL) {
@@ -865,6 +868,7 @@ _thrp_exit()
 	thr_panic("_thrp_exit(): _lwp_terminate() returned");
 }
 
+#if defined(THREAD_DEBUG)
 void
 collect_queue_statistics()
 {
@@ -881,6 +885,7 @@ collect_queue_statistics()
 		lmutex_unlock(&udp->link_lock);
 	}
 }
+#endif
 
 void
 _thr_exit_common(void *status, int unwind)
@@ -1156,9 +1161,9 @@ etest(const char *ev)
 #if defined(THREAD_DEBUG)
 	if ((value = envvar(ev, "QUEUE_VERIFY", 1)) >= 0)
 		thread_queue_verify = value;
-#endif
 	if ((value = envvar(ev, "QUEUE_DUMP", 1)) >= 0)
 		thread_queue_dump = value;
+#endif
 	if ((value = envvar(ev, "STACK_CACHE", 10000)) >= 0)
 		thread_stack_cache = value;
 	if ((value = envvar(ev, "COND_WAIT_DEFER", 1)) >= 0)
@@ -1312,6 +1317,9 @@ libc_init(void)
 	self->ul_lwpid = 1; /* __lwp_self() */
 	self->ul_main = 1;
 	self->ul_self = self;
+	self->ul_policy = -1;		/* initialize only when needed */
+	self->ul_pri = 0;
+	self->ul_cid = 0;
 	self->ul_uberdata = udp;
 	if (oldself != NULL) {
 		int i;
@@ -1389,6 +1397,7 @@ libc_init(void)
 		/* tls_size was zero when oldself was allocated */
 		lfree(oldself, sizeof (ulwp_t));
 	}
+	self->ul_rtclassid = get_info_by_policy(SCHED_FIFO)->pcc_info.pc_cid;
 	mutex_setup();
 	atfork_init();
 	signal_init();
@@ -1510,7 +1519,12 @@ finish_init()
 	ASSERT(udp->hash_size == 1);
 
 	/*
-	 * First allocate the queue_head array if not already allocated.
+	 * Initialize self->ul_policy, self->ul_cid, and self->ul_pri.
+	 */
+	update_sched(self);
+
+	/*
+	 * Allocate the queue_head array if not already allocated.
 	 */
 	if (udp->queue_head == NULL)
 		queue_alloc();
@@ -1542,14 +1556,16 @@ finish_init()
 	/*
 	 * Arrange to do special things on exit --
 	 * - collect queue statistics from all remaining active threads.
+	 * - dump queue statistics to stderr if _THREAD_QUEUE_DUMP is set.
 	 * - grab assert_lock to ensure that assertion failures
 	 *   and a core dump take precedence over _exit().
-	 * - dump queue statistics to stderr if _THREAD_QUEUE_DUMP is set.
 	 * (Functions are called in the reverse order of their registration.)
 	 */
-	(void) _atexit(dump_queue_statistics);
 	(void) _atexit(grab_assert_lock);
+#if defined(THREAD_DEBUG)
+	(void) _atexit(dump_queue_statistics);
 	(void) _atexit(collect_queue_statistics);
+#endif
 }
 
 /*
@@ -1575,7 +1591,7 @@ postfork1_child()
 {
 	ulwp_t *self = curthread;
 	uberdata_t *udp = self->ul_uberdata;
-	mutex_t *mp;
+	queue_head_t *qp;
 	ulwp_t *next;
 	ulwp_t *ulwp;
 	int i;
@@ -1599,13 +1615,18 @@ postfork1_child()
 	    USYNC_THREAD | LOCK_RECURSIVE, NULL);
 
 	/* no one in the child is on a sleep queue; reinitialize */
-	if (udp->queue_head) {
-		(void) _private_memset(udp->queue_head, 0,
+	if ((qp = udp->queue_head) != NULL) {
+		(void) _private_memset(qp, 0,
 		    2 * QHASHSIZE * sizeof (queue_head_t));
-		for (i = 0; i < 2 * QHASHSIZE; i++) {
-			mp = &udp->queue_head[i].qh_lock;
-			mp->mutex_flag = LOCK_INITED;
-			mp->mutex_magic = MUTEX_MAGIC;
+		for (i = 0; i < 2 * QHASHSIZE; qp++, i++) {
+			qp->qh_type = (i < QHASHSIZE)? MX : CV;
+			qp->qh_lock.mutex_flag = LOCK_INITED;
+			qp->qh_lock.mutex_magic = MUTEX_MAGIC;
+			qp->qh_hlist = &qp->qh_def_root;
+#if defined(THREAD_DEBUG)
+			qp->qh_hlen = 1;
+			qp->qh_hmax = 1;
+#endif
 		}
 	}
 
@@ -1666,36 +1687,6 @@ postfork1_child()
 	postfork1_child_aio();
 }
 
-#pragma weak thr_setprio = _thr_setprio
-#pragma weak pthread_setschedprio = _thr_setprio
-#pragma weak _pthread_setschedprio = _thr_setprio
-int
-_thr_setprio(thread_t tid, int priority)
-{
-	struct sched_param param;
-
-	(void) _memset(&param, 0, sizeof (param));
-	param.sched_priority = priority;
-	return (_thread_setschedparam_main(tid, 0, &param, PRIO_SET_PRIO));
-}
-
-#pragma weak thr_getprio = _thr_getprio
-int
-_thr_getprio(thread_t tid, int *priority)
-{
-	uberdata_t *udp = curthread->ul_uberdata;
-	ulwp_t *ulwp;
-	int error = 0;
-
-	if ((ulwp = find_lwp(tid)) == NULL)
-		error = ESRCH;
-	else {
-		*priority = ulwp->ul_pri;
-		ulwp_unlock(ulwp, udp);
-	}
-	return (error);
-}
-
 lwpid_t
 lwp_self(void)
 {
diff --git a/usr/src/lib/libc/sparc/Makefile b/usr/src/lib/libc/sparc/Makefile
index a9f1b9642b..5080553482 100644
--- a/usr/src/lib/libc/sparc/Makefile
+++ b/usr/src/lib/libc/sparc/Makefile
@@ -813,7 +813,6 @@ THREADSOBJS=			\
 	pthr_mutex.o		\
 	pthr_rwlock.o		\
 	pthread.o		\
-	rtsched.o		\
 	rwlock.o		\
 	scalls.o		\
 	sema.o			\
@@ -984,8 +983,6 @@ CFLAGS += -xinline=
 THREAD_DEBUG =
 $(NOT_RELEASE_BUILD)THREAD_DEBUG = -DTHREAD_DEBUG
 
-CFLAGS += $(THREAD_DEBUG)
-
 ALTPICS= $(TRACEOBJS:%=pics/%)
 
 $(DYNLIB) := PICS += $(ROOTFS_LIBDIR)/libc_i18n.a
@@ -994,7 +991,7 @@ $(DYNLIB) := BUILD.SO = $(LD) -o $@ -G $(DYNFLAGS) $(PICS) $(ALTPICS)
 MAPFILES =	../port/mapfile-vers mapfile-vers
 
 CFLAGS +=	$(EXTN_CFLAGS)
-CPPFLAGS=	-D_REENTRANT -Dsparc $(EXTN_CPPFLAGS) \
+CPPFLAGS=	-D_REENTRANT -Dsparc $(EXTN_CPPFLAGS) $(THREAD_DEBUG) \
 		-I$(LIBCBASE)/inc -I../inc $(CPPFLAGS.master)
 ASFLAGS=	-K pic -P -D__STDC__ -D_ASM $(CPPFLAGS) $(sparc_AS_XARCH)
 
@@ -1105,9 +1102,9 @@ TIL=				\
 	pthr_rwlock.o		\
 	pthread.o		\
 	rand.o			\
-	rtsched.o		\
 	rwlock.o		\
 	scalls.o		\
+	sched.o			\
 	sema.o			\
 	sigaction.o		\
 	sigev_thread.o		\
diff --git a/usr/src/lib/libc/sparc/threads/machdep.c b/usr/src/lib/libc/sparc/threads/machdep.c
index 5ebb6b324c..0fc62303f8 100644
--- a/usr/src/lib/libc/sparc/threads/machdep.c
+++ b/usr/src/lib/libc/sparc/threads/machdep.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -78,11 +78,15 @@ _thr_setup(ulwp_t *self)
 	self->ul_ustack.ss_size = self->ul_stksiz;
 	self->ul_ustack.ss_flags = 0;
 	(void) _private_setustack(&self->ul_ustack);
+
+	update_sched(self);
 	tls_setup();
 
 	/* signals have been deferred until now */
 	sigon(self);
 
+	if (self->ul_cancel_pending == 2 && !self->ul_cancel_disabled)
+		return (NULL);	/* cancelled by pthread_create() */
 	return (self->ul_startpc(self->ul_startarg));
 }
 
@@ -164,7 +168,7 @@ __csigsetjmp(sigjmp_buf env, int savemask)
 		bp->sjs_stack = self->ul_ustack;
 	else {
 		bp->sjs_stack.ss_sp =
-			(void *)(self->ul_stktop - self->ul_stksiz);
+		    (void *)(self->ul_stktop - self->ul_stksiz);
 		bp->sjs_stack.ss_size = self->ul_stksiz;
 		bp->sjs_stack.ss_flags = 0;
 	}
diff --git a/usr/src/lib/libc/sparcv9/Makefile b/usr/src/lib/libc/sparcv9/Makefile
index 9a201f7b60..9155de7910 100644
--- a/usr/src/lib/libc/sparcv9/Makefile
+++ b/usr/src/lib/libc/sparcv9/Makefile
@@ -759,7 +759,6 @@ THREADSOBJS=			\
 	pthr_mutex.o		\
 	pthr_rwlock.o		\
 	pthread.o		\
-	rtsched.o		\
 	rwlock.o		\
 	scalls.o		\
 	sema.o			\
@@ -927,8 +926,6 @@ CFLAGS64 += -xinline=
 THREAD_DEBUG =
 $(NOT_RELEASE_BUILD)THREAD_DEBUG = -DTHREAD_DEBUG
 
-CFLAGS64 += $(THREAD_DEBUG)
-
 ALTPICS= $(TRACEOBJS:%=pics/%)
 
 $(DYNLIB) := PICS += $(ROOTFS_LIBDIR64)/libc_i18n.a
@@ -938,7 +935,7 @@ MAPFILES =	../port/mapfile-vers mapfile-vers
 
 sparcv9_C_PICFLAGS= -K PIC
 CFLAGS64 +=	$(EXTN_CFLAGS)
-CPPFLAGS=	-D_REENTRANT -Dsparc $(EXTN_CPPFLAGS) \
+CPPFLAGS=	-D_REENTRANT -Dsparc $(EXTN_CPPFLAGS) $(THREAD_DEBUG) \
 		-I$(LIBCBASE)/inc -I../inc $(CPPFLAGS.master)
 ASFLAGS=	-K PIC -P -D__STDC__ -D_ASM -D__sparcv9 $(CPPFLAGS) \
 		$(sparcv9_AS_XARCH)
@@ -1036,9 +1033,9 @@ TIL=				\
 	pthr_rwlock.o		\
 	pthread.o		\
 	rand.o			\
-	rtsched.o		\
 	rwlock.o		\
 	scalls.o		\
+	sched.o			\
 	sema.o			\
 	sigaction.o		\
 	sigev_thread.o		\
diff --git a/usr/src/lib/libc_db/common/thread_db.c b/usr/src/lib/libc_db/common/thread_db.c
index 28b0ff3bf9..b99f0f3047 100644
--- a/usr/src/lib/libc_db/common/thread_db.c
+++ b/usr/src/lib/libc_db/common/thread_db.c
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -157,9 +157,9 @@ td_read_uberdata(td_thragent_t *ta_p)
 			return (TD_DBERR);
 		ta_p->primary_map = uberdata.primary_map;
 		ta_p->tdb_eventmask_addr = ta_p->uberdata_addr +
-			offsetof(uberdata_t, tdb.tdb_ev_global_mask);
+		    offsetof(uberdata_t, tdb.tdb_ev_global_mask);
 		ta_p->tdb_register_sync_addr = ta_p->uberdata_addr +
-			offsetof(uberdata_t, uberflags.uf_tdb_register_sync);
+		    offsetof(uberdata_t, uberflags.uf_tdb_register_sync);
 		ta_p->hash_table_addr = (psaddr_t)uberdata.thr_hash_table;
 		ta_p->hash_size = uberdata.hash_size;
 		if (ps_pdread(ph_p, (psaddr_t)uberdata.tdb.tdb_events,
@@ -177,9 +177,9 @@ td_read_uberdata(td_thragent_t *ta_p)
 			return (TD_DBERR);
 		ta_p->primary_map = uberdata.primary_map;
 		ta_p->tdb_eventmask_addr = ta_p->uberdata_addr +
-			offsetof(uberdata32_t, tdb.tdb_ev_global_mask);
+		    offsetof(uberdata32_t, tdb.tdb_ev_global_mask);
 		ta_p->tdb_register_sync_addr = ta_p->uberdata_addr +
-			offsetof(uberdata32_t, uberflags.uf_tdb_register_sync);
+		    offsetof(uberdata32_t, uberflags.uf_tdb_register_sync);
 		ta_p->hash_table_addr = (psaddr_t)uberdata.thr_hash_table;
 		ta_p->hash_size = uberdata.hash_size;
 		if (ps_pdread(ph_p, (psaddr_t)uberdata.tdb.tdb_events,
@@ -580,15 +580,15 @@ __td_ta_get_nthreads(td_thragent_t *ta_p, int *nthread_p)
 
 	if (ta_p->model == PR_MODEL_NATIVE) {
 		nthreads_addr = ta_p->uberdata_addr +
-			offsetof(uberdata_t, nthreads);
+		    offsetof(uberdata_t, nthreads);
 		nzombies_addr = ta_p->uberdata_addr +
-			offsetof(uberdata_t, nzombies);
+		    offsetof(uberdata_t, nzombies);
 	} else {
 #if defined(_LP64) && defined(_SYSCALL32)
 		nthreads_addr = ta_p->uberdata_addr +
-			offsetof(uberdata32_t, nthreads);
+		    offsetof(uberdata32_t, nthreads);
 		nzombies_addr = ta_p->uberdata_addr +
-			offsetof(uberdata32_t, nzombies);
+		    offsetof(uberdata32_t, nzombies);
 #else
 		nthreads_addr = 0;
 		nzombies_addr = 0;
@@ -673,9 +673,9 @@ __td_ta_map_id2thr(td_thragent_t *ta_p, thread_t tid,
 	data.tid = tid;
 	data.found = 0;
 	return_val = __td_ta_thr_iter(ta_p,
-		(td_thr_iter_f *)td_mapper_id2thr, (void *)&data,
-		TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY,
-		TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS);
+	    (td_thr_iter_f *)td_mapper_id2thr, (void *)&data,
+	    TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY,
+	    TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS);
 	if (return_val == TD_OK) {
 		if (data.found == 0)
 			return_val = TD_NOTHR;
@@ -835,10 +835,11 @@ __td_ta_tsd_iter(td_thragent_t *ta_p, td_key_iter_f *cb, void *cbdata_p)
 int
 sigequalset(const sigset_t *s1, const sigset_t *s2)
 {
-	return (s1->__sigbits[0] == s2->__sigbits[0] &&
-		s1->__sigbits[1] == s2->__sigbits[1] &&
-		s1->__sigbits[2] == s2->__sigbits[2] &&
-		s1->__sigbits[3] == s2->__sigbits[3]);
+	return (
+	    s1->__sigbits[0] == s2->__sigbits[0] &&
+	    s1->__sigbits[1] == s2->__sigbits[1] &&
+	    s1->__sigbits[2] == s2->__sigbits[2] &&
+	    s1->__sigbits[3] == s2->__sigbits[3]);
 }
 
 /*
@@ -986,9 +987,9 @@ __td_ta_thr_iter(td_thragent_t *ta_p, td_thr_iter_f *cb,
 			next_lwp_addr = (psaddr_t)ulwp.ul_forw;
 
 			ts_state = ulwp.ul_dead? TD_THR_ZOMBIE :
-				ulwp.ul_stop? TD_THR_STOPPED :
-				ulwp.ul_wchan? TD_THR_SLEEP :
-				TD_THR_ACTIVE;
+			    ulwp.ul_stop? TD_THR_STOPPED :
+			    ulwp.ul_wchan? TD_THR_SLEEP :
+			    TD_THR_ACTIVE;
 			userpri = ulwp.ul_pri;
 			userflags = ulwp.ul_usropts;
 			if (ulwp.ul_dead)
@@ -1010,9 +1011,9 @@ __td_ta_thr_iter(td_thragent_t *ta_p, td_thr_iter_f *cb,
 			next_lwp_addr = (psaddr_t)ulwp.ul_forw;
 
 			ts_state = ulwp.ul_dead? TD_THR_ZOMBIE :
-				ulwp.ul_stop? TD_THR_STOPPED :
-				ulwp.ul_wchan? TD_THR_SLEEP :
-				TD_THR_ACTIVE;
+			    ulwp.ul_stop? TD_THR_STOPPED :
+			    ulwp.ul_wchan? TD_THR_SLEEP :
+			    TD_THR_ACTIVE;
 			userpri = ulwp.ul_pri;
 			userflags = ulwp.ul_usropts;
 			if (ulwp.ul_dead)
@@ -1284,11 +1285,11 @@ td_thr2to(td_thragent_t *ta_p, psaddr_t ts_addr,
 	}
 	ti_p->ti_ro_area = ts_addr;
 	ti_p->ti_ro_size = ulwp->ul_replace?
-		REPLACEMENT_SIZE : sizeof (ulwp_t);
+	    REPLACEMENT_SIZE : sizeof (ulwp_t);
 	ti_p->ti_state = ulwp->ul_dead? TD_THR_ZOMBIE :
-		ulwp->ul_stop? TD_THR_STOPPED :
-		ulwp->ul_wchan? TD_THR_SLEEP :
-		TD_THR_ACTIVE;
+	    ulwp->ul_stop? TD_THR_STOPPED :
+	    ulwp->ul_wchan? TD_THR_SLEEP :
+	    TD_THR_ACTIVE;
 	ti_p->ti_db_suspended = 0;
 	ti_p->ti_type = TD_THR_USER;
 	ti_p->ti_sp = ulwp->ul_sp;
@@ -1328,11 +1329,11 @@ td_thr2to32(td_thragent_t *ta_p, psaddr_t ts_addr,
 	}
 	ti_p->ti_ro_area = ts_addr;
 	ti_p->ti_ro_size = ulwp->ul_replace?
-		REPLACEMENT_SIZE32 : sizeof (ulwp32_t);
+	    REPLACEMENT_SIZE32 : sizeof (ulwp32_t);
 	ti_p->ti_state = ulwp->ul_dead? TD_THR_ZOMBIE :
-		ulwp->ul_stop? TD_THR_STOPPED :
-		ulwp->ul_wchan? TD_THR_SLEEP :
-		TD_THR_ACTIVE;
+	    ulwp->ul_stop? TD_THR_STOPPED :
+	    ulwp->ul_wchan? TD_THR_SLEEP :
+	    TD_THR_ACTIVE;
 	ti_p->ti_db_suspended = 0;
 	ti_p->ti_type = TD_THR_USER;
 	ti_p->ti_sp = (uint32_t)ulwp->ul_sp;
@@ -1394,7 +1395,7 @@ __td_thr_get_info(td_thrhandle_t *th_p, td_thrinfo_t *ti_p)
 		if (ps_pdread(ph_p, psaddr, &ulwp, sizeof (ulwp)) != PS_OK &&
 		    ((void) memset(&ulwp, 0, sizeof (ulwp)),
 		    ps_pdread(ph_p, psaddr, &ulwp, REPLACEMENT_SIZE32)) !=
-				PS_OK)
+		    PS_OK)
 			return_val = TD_DBERR;
 		else
 			td_thr2to32(ta_p, psaddr, &ulwp, ti_p);
@@ -2086,9 +2087,9 @@ __td_thr_validate(const td_thrhandle_t *th_p)
 
 	searcher_data.addr = th_p->th_unique;
 	return_val = __td_ta_thr_iter(th_p->th_ta_p,
-		td_searcher, &searcher_data,
-		TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY,
-		TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS);
+	    td_searcher, &searcher_data,
+	    TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY,
+	    TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS);
 
 	if (return_val == TD_OK && searcher_data.status == 0)
 		return_val = TD_NOTHR;
@@ -2321,38 +2322,11 @@ __td_thr_tlsbase(td_thrhandle_t *th_p, ulong_t moduleid, psaddr_t *base)
  * Currently unused by dbx.
  */
 #pragma weak td_thr_setprio = __td_thr_setprio
+/* ARGSUSED */
 td_err_e
 __td_thr_setprio(td_thrhandle_t *th_p, int ti_pri)
 {
-	struct ps_prochandle *ph_p;
-	pri_t		priority = ti_pri;
-	td_err_e	return_val = TD_OK;
-
-	if (ti_pri < THREAD_MIN_PRIORITY || ti_pri > THREAD_MAX_PRIORITY)
-		return (TD_ERR);
-	if ((ph_p = ph_lock_th(th_p, &return_val)) == NULL)
-		return (return_val);
-
-	if (th_p->th_ta_p->model == PR_MODEL_NATIVE) {
-		ulwp_t *ulwp = (ulwp_t *)th_p->th_unique;
-
-		if (ps_pdwrite(ph_p, (psaddr_t)&ulwp->ul_pri,
-		    &priority, sizeof (priority)) != PS_OK)
-			return_val = TD_DBERR;
-	} else {
-#if defined(_LP64) && defined(_SYSCALL32)
-		ulwp32_t *ulwp = (ulwp32_t *)th_p->th_unique;
-
-		if (ps_pdwrite(ph_p, (psaddr_t)&ulwp->ul_pri,
-		    &priority, sizeof (priority)) != PS_OK)
-			return_val = TD_DBERR;
-#else
-		return_val = TD_ERR;
-#endif	/* _SYSCALL32 */
-	}
-
-	ph_unlock(th_p->th_ta_p);
-	return (return_val);
+	return (TD_NOCAPAB);
 }
 
 /*
@@ -2526,7 +2500,7 @@ sync_get_info_common(const td_synchandle_t *sh_p, struct ps_prochandle *ph_p,
 		if (si_p->si_state.mutex_locked) {
 			if (si_p->si_shared_type & USYNC_PROCESS)
 				si_p->si_ownerpid =
-					generic_so.lock.mutex_ownerpid;
+				    generic_so.lock.mutex_ownerpid;
 			si_p->si_owner.th_ta_p = sh_p->sh_ta_p;
 			si_p->si_owner.th_unique = generic_so.lock.mutex_owner;
 		}
@@ -2539,8 +2513,8 @@ sync_get_info_common(const td_synchandle_t *sh_p, struct ps_prochandle *ph_p,
 		    sizeof (generic_so.condition.flags.flag));
 		si_p->si_size = sizeof (generic_so.condition);
 		si_p->si_has_waiters =
-			(generic_so.condition.cond_waiters_user |
-			generic_so.condition.cond_waiters_kernel)? 1 : 0;
+		    (generic_so.condition.cond_waiters_user |
+		    generic_so.condition.cond_waiters_kernel)? 1 : 0;
 		break;
 	case SEMA_MAGIC:
 		if (trunc && ps_pdread(ph_p, sh_p->sh_unique,
@@ -2558,7 +2532,7 @@ sync_get_info_common(const td_synchandle_t *sh_p, struct ps_prochandle *ph_p,
 		si_p->si_data = (psaddr_t)generic_so.semaphore.count;
 		break;
 	case RWL_MAGIC:
-	    {
+	{
 		uint32_t rwstate;
 
 		if (trunc && ps_pdread(ph_p, sh_p->sh_unique,
@@ -2575,10 +2549,10 @@ sync_get_info_common(const td_synchandle_t *sh_p, struct ps_prochandle *ph_p,
 			si_p->si_is_wlock = 1;
 			si_p->si_owner.th_ta_p = sh_p->sh_ta_p;
 			si_p->si_owner.th_unique =
-				generic_so.rwlock.rwlock_owner;
+			    generic_so.rwlock.rwlock_owner;
 			if (si_p->si_shared_type & USYNC_PROCESS)
 				si_p->si_ownerpid =
-					generic_so.rwlock.rwlock_ownerpid;
+				    generic_so.rwlock.rwlock_ownerpid;
 		} else {
 			si_p->si_state.nreaders = (rwstate & URW_READERS_MASK);
 		}
@@ -2587,7 +2561,7 @@ sync_get_info_common(const td_synchandle_t *sh_p, struct ps_prochandle *ph_p,
 		/* this is useless but the old interface provided it */
 		si_p->si_data = (psaddr_t)generic_so.rwlock.readers;
 		break;
-	    }
+	}
 	default:
 		return (TD_BADSH);
 	}
@@ -2756,7 +2730,7 @@ __td_sync_get_stats(const td_synchandle_t *sh_p, td_syncstats_t *ss_p)
 		return_val = TD_BADSH;
 	else
 		return_val = read_sync_stats(ta_p, hashaddr,
-			sh_p->sh_unique, &sync_stats);
+		    sh_p->sh_unique, &sync_stats);
 	if (return_val != TD_OK)
 		goto out;
 
@@ -2766,103 +2740,103 @@ __td_sync_get_stats(const td_synchandle_t *sh_p, td_syncstats_t *ss_p)
 	 */
 	switch (sync_stats.un.type) {
 	case TDB_MUTEX:
-	    {
+	{
 		td_mutex_stats_t *msp = &ss_p->ss_un.mutex;
 
 		ss_p->ss_info.si_type = TD_SYNC_MUTEX;
 		ss_p->ss_info.si_size = sizeof (mutex_t);
 		msp->mutex_lock =
-			sync_stats.un.mutex.mutex_lock;
+		    sync_stats.un.mutex.mutex_lock;
 		msp->mutex_sleep =
-			sync_stats.un.mutex.mutex_sleep;
+		    sync_stats.un.mutex.mutex_sleep;
 		msp->mutex_sleep_time =
-			sync_stats.un.mutex.mutex_sleep_time;
+		    sync_stats.un.mutex.mutex_sleep_time;
 		msp->mutex_hold_time =
-			sync_stats.un.mutex.mutex_hold_time;
+		    sync_stats.un.mutex.mutex_hold_time;
 		msp->mutex_try =
-			sync_stats.un.mutex.mutex_try;
+		    sync_stats.un.mutex.mutex_try;
 		msp->mutex_try_fail =
-			sync_stats.un.mutex.mutex_try_fail;
+		    sync_stats.un.mutex.mutex_try_fail;
 		if (sync_stats.sync_addr >= ta_p->hash_table_addr &&
 		    (ix = sync_stats.sync_addr - ta_p->hash_table_addr)
 		    < ta_p->hash_size * sizeof (thr_hash_table_t))
 			msp->mutex_internal =
-				ix / sizeof (thr_hash_table_t) + 1;
+			    ix / sizeof (thr_hash_table_t) + 1;
 		break;
-	    }
+	}
 	case TDB_COND:
-	    {
+	{
 		td_cond_stats_t *csp = &ss_p->ss_un.cond;
 
 		ss_p->ss_info.si_type = TD_SYNC_COND;
 		ss_p->ss_info.si_size = sizeof (cond_t);
 		csp->cond_wait =
-			sync_stats.un.cond.cond_wait;
+		    sync_stats.un.cond.cond_wait;
 		csp->cond_timedwait =
-			sync_stats.un.cond.cond_timedwait;
+		    sync_stats.un.cond.cond_timedwait;
 		csp->cond_wait_sleep_time =
-			sync_stats.un.cond.cond_wait_sleep_time;
+		    sync_stats.un.cond.cond_wait_sleep_time;
 		csp->cond_timedwait_sleep_time =
-			sync_stats.un.cond.cond_timedwait_sleep_time;
+		    sync_stats.un.cond.cond_timedwait_sleep_time;
 		csp->cond_timedwait_timeout =
-			sync_stats.un.cond.cond_timedwait_timeout;
+		    sync_stats.un.cond.cond_timedwait_timeout;
 		csp->cond_signal =
-			sync_stats.un.cond.cond_signal;
+		    sync_stats.un.cond.cond_signal;
 		csp->cond_broadcast =
-			sync_stats.un.cond.cond_broadcast;
+		    sync_stats.un.cond.cond_broadcast;
 		if (sync_stats.sync_addr >= ta_p->hash_table_addr &&
 		    (ix = sync_stats.sync_addr - ta_p->hash_table_addr)
 		    < ta_p->hash_size * sizeof (thr_hash_table_t))
 			csp->cond_internal =
-				ix / sizeof (thr_hash_table_t) + 1;
+			    ix / sizeof (thr_hash_table_t) + 1;
 		break;
-	    }
+	}
 	case TDB_RWLOCK:
-	    {
+	{
 		td_rwlock_stats_t *rwsp = &ss_p->ss_un.rwlock;
 
 		ss_p->ss_info.si_type = TD_SYNC_RWLOCK;
 		ss_p->ss_info.si_size = sizeof (rwlock_t);
 		rwsp->rw_rdlock =
-			sync_stats.un.rwlock.rw_rdlock;
+		    sync_stats.un.rwlock.rw_rdlock;
 		rwsp->rw_rdlock_try =
-			sync_stats.un.rwlock.rw_rdlock_try;
+		    sync_stats.un.rwlock.rw_rdlock_try;
 		rwsp->rw_rdlock_try_fail =
-			sync_stats.un.rwlock.rw_rdlock_try_fail;
+		    sync_stats.un.rwlock.rw_rdlock_try_fail;
 		rwsp->rw_wrlock =
-			sync_stats.un.rwlock.rw_wrlock;
+		    sync_stats.un.rwlock.rw_wrlock;
 		rwsp->rw_wrlock_hold_time =
-			sync_stats.un.rwlock.rw_wrlock_hold_time;
+		    sync_stats.un.rwlock.rw_wrlock_hold_time;
 		rwsp->rw_wrlock_try =
-			sync_stats.un.rwlock.rw_wrlock_try;
+		    sync_stats.un.rwlock.rw_wrlock_try;
 		rwsp->rw_wrlock_try_fail =
-			sync_stats.un.rwlock.rw_wrlock_try_fail;
+		    sync_stats.un.rwlock.rw_wrlock_try_fail;
 		break;
-	    }
+	}
 	case TDB_SEMA:
-	    {
+	{
 		td_sema_stats_t *ssp = &ss_p->ss_un.sema;
 
 		ss_p->ss_info.si_type = TD_SYNC_SEMA;
 		ss_p->ss_info.si_size = sizeof (sema_t);
 		ssp->sema_wait =
-			sync_stats.un.sema.sema_wait;
+		    sync_stats.un.sema.sema_wait;
 		ssp->sema_wait_sleep =
-			sync_stats.un.sema.sema_wait_sleep;
+		    sync_stats.un.sema.sema_wait_sleep;
 		ssp->sema_wait_sleep_time =
-			sync_stats.un.sema.sema_wait_sleep_time;
+		    sync_stats.un.sema.sema_wait_sleep_time;
 		ssp->sema_trywait =
-			sync_stats.un.sema.sema_trywait;
+		    sync_stats.un.sema.sema_trywait;
 		ssp->sema_trywait_fail =
-			sync_stats.un.sema.sema_trywait_fail;
+		    sync_stats.un.sema.sema_trywait_fail;
 		ssp->sema_post =
-			sync_stats.un.sema.sema_post;
+		    sync_stats.un.sema.sema_post;
 		ssp->sema_max_count =
-			sync_stats.un.sema.sema_max_count;
+		    sync_stats.un.sema.sema_max_count;
 		ssp->sema_min_count =
-			sync_stats.un.sema.sema_min_count;
+		    sync_stats.un.sema.sema_min_count;
 		break;
-	    }
+	}
 	default:
 		return_val = TD_BADSH;
 		break;
@@ -3064,8 +3038,8 @@ __td_sync_waiters(const td_synchandle_t *sh_p, td_thr_iter_f *cb, void *cb_data)
 	wcb.waiter_cb_arg = cb_data;
 	wcb.errcode = TD_OK;
 	return_val = __td_ta_thr_iter(sh_p->sh_ta_p, waiters_cb, &wcb,
-		TD_THR_SLEEP, TD_THR_LOWEST_PRIORITY,
-		TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS);
+	    TD_THR_SLEEP, TD_THR_LOWEST_PRIORITY,
+	    TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS);
 
 	if (return_val != TD_OK)
 		return (return_val);
diff --git a/usr/src/lib/libldap5/sources/ldap/prldap/ldappr-threads.c b/usr/src/lib/libldap5/sources/ldap/prldap/ldappr-threads.c
index b6b8a0e27a..0f362759c7 100644
--- a/usr/src/lib/libldap5/sources/ldap/prldap/ldappr-threads.c
+++ b/usr/src/lib/libldap5/sources/ldap/prldap/ldappr-threads.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -658,8 +658,16 @@ prldap_nspr_idle_primordial_thread(void *arg) {
 	 * Make sure PR_Init finishes before any other thread can continue
 	 */
 	(void) mutex_lock(&nspr_idle_lock);
-	if (PR_Initialized() == PR_FALSE)
+	if (PR_Initialized() == PR_FALSE) {
+		/*
+		 * PR_Init() changes the current thread's
+		 * priority.  Save and restore the priority.
+		 */
+		int priority;
+		(void) thr_getprio(thr_self(), &priority);
 		PR_Init(PR_USER_THREAD, PR_PRIORITY_NORMAL, 0);
+		(void) thr_setprio(thr_self(), priority);
+	}
 	nspr_pr_init_is_done = 1;
 	(void) cond_signal(&nspr_idle_cond);
 	(void) mutex_unlock(&nspr_idle_lock);
@@ -714,8 +722,16 @@ prldap_nspr_init(void) {
 
 		if (thr_self() == 1) {
 			/* main thread */
-			if (PR_Initialized() == PR_FALSE)
- 				PR_Init(PR_USER_THREAD, PR_PRIORITY_NORMAL, 0);
+			if (PR_Initialized() == PR_FALSE) {
+				/*
+				 * PR_Init() changes the current thread's
+				 * priority.  Save and restore the priority.
+				 */
+				int priority;
+				(void) thr_getprio(thr_self(), &priority);
+				PR_Init(PR_USER_THREAD, PR_PRIORITY_NORMAL, 0);
+				(void) thr_setprio(thr_self(), priority);
+			}
 			nspr_initialized = 1;
 		} else {
 			if (thr_create(NULL, NULL,
diff --git a/usr/src/uts/common/disp/class.c b/usr/src/uts/common/disp/class.c
index 8e83a839ee..c6cecdb012 100644
--- a/usr/src/uts/common/disp/class.c
+++ b/usr/src/uts/common/disp/class.c
@@ -18,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -36,6 +37,7 @@
 #include <sys/modctl.h>
 #include <sys/disp.h>
 #include <sys/sysmacros.h>
+#include <sys/schedctl.h>
 
 static int getcidbyname_locked(char *, id_t *);
 
@@ -178,7 +180,7 @@ getcidbyname(char *clname, id_t *cidp)
  * tp into the buffer pointed to by parmsp.
  */
 void
-parmsget(kthread_id_t tp, pcparms_t *parmsp)
+parmsget(kthread_t *tp, pcparms_t *parmsp)
 {
 	parmsp->pc_cid = tp->t_cid;
 	CL_PARMSGET(tp, parmsp->pc_clparms);
@@ -225,7 +227,7 @@ int
 parmsout(pcparms_t *parmsp, pc_vaparms_t *vaparmsp)
 {
 	return (CL_PARMSOUT(&sclass[parmsp->pc_cid], parmsp->pc_clparms,
-		vaparmsp));
+	    vaparmsp));
 }
 
 
@@ -238,7 +240,7 @@ parmsout(pcparms_t *parmsp, pc_vaparms_t *vaparmsp)
  * has the appropriate permissions.
  */
 int
-parmsset(pcparms_t *parmsp, kthread_id_t targtp)
+parmsset(pcparms_t *parmsp, kthread_t *targtp)
 {
 	caddr_t	clprocp;
 	int	error;
@@ -310,11 +312,12 @@ parmsset(pcparms_t *parmsp, kthread_id_t targtp)
 		 * Not changing class
 		 */
 		error = CL_PARMSSET(targtp, parmsp->pc_clparms,
-					curthread->t_cid, reqpcredp);
+		    curthread->t_cid, reqpcredp);
 		crfree(reqpcredp);
 		if (error)
 			return (error);
 	}
+	schedctl_set_cidpri(targtp);
 	return (0);
 }
 
diff --git a/usr/src/uts/common/disp/fss.c b/usr/src/uts/common/disp/fss.c
index e132ff3397..e52a9d89aa 100644
--- a/usr/src/uts/common/disp/fss.c
+++ b/usr/src/uts/common/disp/fss.c
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -224,6 +224,7 @@ static void	fss_sleep(kthread_t *);
 static void	fss_tick(kthread_t *);
 static void	fss_wakeup(kthread_t *);
 static int	fss_donice(kthread_t *, cred_t *, int, int *);
+static int	fss_doprio(kthread_t *, cred_t *, int, int *);
 static pri_t	fss_globpri(kthread_t *);
 static void	fss_yield(kthread_t *);
 static void	fss_nullsys();
@@ -263,7 +264,8 @@ static struct classfuncs fss_classfuncs = {
 	fss_donice,
 	fss_globpri,
 	fss_nullsys,	/* set_process_group */
-	fss_yield
+	fss_yield,
+	fss_doprio,
 };
 
 int
@@ -954,6 +956,7 @@ fss_change_priority(kthread_t *t, fssproc_t *fssproc)
 	new_pri = fssproc->fss_umdpri;
 	ASSERT(new_pri >= 0 && new_pri <= fss_maxglobpri);
 
+	t->t_cpri = fssproc->fss_upri;
 	fssproc->fss_flags &= ~FSSRESTORE;
 	if (t == curthread || t->t_state == TS_ONPROC) {
 		/*
@@ -1271,11 +1274,14 @@ fss_vaparmsout(void *parmsp, pc_vaparms_t *vaparmsp)
 	return (0);
 }
 
+/*
+ * Return the user mode scheduling priority range.
+ */
 static int
 fss_getclpri(pcpri_t *pcprip)
 {
-	pcprip->pc_clpmax = fss_maxumdpri;
-	pcprip->pc_clpmin = 0;
+	pcprip->pc_clpmax = fss_maxupri;
+	pcprip->pc_clpmin = -fss_maxupri;
 	return (0);
 }
 
@@ -1881,8 +1887,7 @@ fss_swapout(kthread_t *t, int flags)
 	if (INHERITED(t) ||
 	    (fssproc->fss_flags & FSSKPRI) ||
 	    (t->t_proc_flag & TP_LWPEXIT) ||
-	    (t->t_state & (TS_ZOMB | TS_FREE | TS_STOPPED |
-		TS_ONPROC | TS_WAIT)) ||
+	    (t->t_state & (TS_ZOMB|TS_FREE|TS_STOPPED|TS_ONPROC|TS_WAIT)) ||
 	    !(t->t_schedflag & TS_LOAD) ||
 	    !(SWAP_OK(t)))
 		return (-1);
@@ -2237,7 +2242,7 @@ fss_tick(kthread_t *t)
 				call_cpu_surrender = B_TRUE;
 			}
 		} else if (t->t_state == TS_ONPROC &&
-			    t->t_pri < t->t_disp_queue->disp_maxrunpri) {
+		    t->t_pri < t->t_disp_queue->disp_maxrunpri) {
 			/*
 			 * If there is a higher-priority thread which is
 			 * waiting for a processor, then thread surrenders
@@ -2392,6 +2397,38 @@ fss_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
 }
 
 /*
+ * Increment the priority of the specified thread by incr and
+ * return the new value in *retvalp.
+ */
+static int
+fss_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp)
+{
+	int newpri;
+	fssproc_t *fssproc = FSSPROC(t);
+	fssparms_t fssparms;
+
+	/*
+	 * If there is no change to priority, just return current setting.
+	 */
+	if (incr == 0) {
+		*retvalp = fssproc->fss_upri;
+		return (0);
+	}
+
+	newpri = fssproc->fss_upri + incr;
+	if (newpri > fss_maxupri || newpri < -fss_maxupri)
+		return (EINVAL);
+
+	*retvalp = newpri;
+	fssparms.fss_uprilim = fssparms.fss_upri = newpri;
+
+	/*
+	 * Reset the uprilim and upri values of the thread.
+	 */
+	return (fss_parmsset(t, &fssparms, (id_t)0, cr));
+}
+
+/*
  * Return the global scheduling priority that would be assigned to a thread
  * entering the fair-sharing class with the fss_upri.
  */
@@ -2618,12 +2655,12 @@ fss_changepset(kthread_t *t, void *newcp, fssbuf_t *projbuf,
 	thread_lock(t);
 	if (t->t_state == TS_RUN || t->t_state == TS_ONPROC ||
 	    t->t_state == TS_WAIT)
-	    fss_inactive(t);
+		fss_inactive(t);
 	fssproc->fss_proj = fssproj_new;
 	fssproc->fss_fsspri = 0;
 	if (t->t_state == TS_RUN || t->t_state == TS_ONPROC ||
 	    t->t_state == TS_WAIT)
-	    fss_active(t);
+		fss_active(t);
 	thread_unlock(t);
 	mutex_exit(&fsspset_new->fssps_lock);
 
diff --git a/usr/src/uts/common/disp/fx.c b/usr/src/uts/common/disp/fx.c
index b4899e0edf..08a67f671f 100644
--- a/usr/src/uts/common/disp/fx.c
+++ b/usr/src/uts/common/disp/fx.c
@@ -18,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -191,6 +192,7 @@ static void	fx_sleep(kthread_t *);
 static void	fx_tick(kthread_t *);
 static void	fx_wakeup(kthread_t *);
 static int	fx_donice(kthread_t *, cred_t *, int, int *);
+static int	fx_doprio(kthread_t *, cred_t *, int, int *);
 static pri_t	fx_globpri(kthread_t *);
 static void	fx_yield(kthread_t *);
 static void	fx_nullsys();
@@ -238,6 +240,7 @@ static struct classfuncs fx_classfuncs = {
 	fx_globpri,
 	fx_nullsys,	/* set_process_group */
 	fx_yield,
+	fx_doprio,
 };
 
 
@@ -282,7 +285,7 @@ fx_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
 	 */
 	for (i = 0; i < FX_CB_LISTS; i++) {
 		fx_cb_plisthead[i].fx_cb_next = fx_cb_plisthead[i].fx_cb_prev =
-			&fx_cb_plisthead[i];
+		    &fx_cb_plisthead[i];
 	}
 
 	/*
@@ -498,8 +501,7 @@ fx_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp,
 		fxpp->fx_uprilim = reqfxuprilim;
 		fxpp->fx_pri = reqfxupri;
 
-		fxpp->fx_nice = NZERO - (NZERO * reqfxupri)
-			/ fx_maxupri;
+		fxpp->fx_nice = NZERO - (NZERO * reqfxupri) / fx_maxupri;
 
 		if (((fxkparmsp->fx_cflags & FX_DOTQ) == 0) ||
 		    (fxkparmsp->fx_tqntm == FX_TQDEF)) {
@@ -694,14 +696,13 @@ fx_getclinfo(void *infop)
 
 
 /*
- * Return the global scheduling priority ranges for the fixed-priority
- * class in pcpri_t structure.
+ * Return the user mode scheduling priority range.
  */
 static int
 fx_getclpri(pcpri_t *pcprip)
 {
-	pcprip->pc_clpmax = fx_dptbl[fx_maxumdpri].fx_globpri;
-	pcprip->pc_clpmin = fx_dptbl[0].fx_globpri;
+	pcprip->pc_clpmax = fx_maxupri;
+	pcprip->pc_clpmin = 0;
 	return (0);
 }
 
@@ -753,7 +754,7 @@ fx_parmsin(void *parmsp)
 		return (EINVAL);
 
 	if ((fxparmsp->fx_tqsecs == 0 && fxparmsp->fx_tqnsecs == 0) ||
-		fxparmsp->fx_tqnsecs >= NANOSEC)
+	    fxparmsp->fx_tqnsecs >= NANOSEC)
 		return (EINVAL);
 
 	cflags = (fxparmsp->fx_upri != FX_NOCHANGE ? FX_DOUPRI : 0);
@@ -1158,7 +1159,7 @@ fx_preempt(kthread_t *t)
 		clock_t new_quantum =  (clock_t)fxpp->fx_pquantum;
 		pri_t	newpri = fxpp->fx_pri;
 		FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie,
-			&new_quantum, &newpri);
+		    &new_quantum, &newpri);
 		FX_ADJUST_QUANTUM(new_quantum);
 		if ((int)new_quantum != fxpp->fx_pquantum) {
 			fxpp->fx_pquantum = (int)new_quantum;
@@ -1299,7 +1300,7 @@ fx_tick(kthread_t *t)
 		clock_t new_quantum =  (clock_t)fxpp->fx_pquantum;
 		pri_t	newpri = fxpp->fx_pri;
 		FX_CB_TICK(FX_CALLB(fxpp), fxpp->fx_cookie,
-			&new_quantum, &newpri);
+		    &new_quantum, &newpri);
 		FX_ADJUST_QUANTUM(new_quantum);
 		if ((int)new_quantum != fxpp->fx_pquantum) {
 			fxpp->fx_pquantum = (int)new_quantum;
@@ -1359,7 +1360,7 @@ fx_tick(kthread_t *t)
 			call_cpu_surrender = B_TRUE;
 		}
 	} else if (t->t_state == TS_ONPROC &&
-		    t->t_pri < t->t_disp_queue->disp_maxrunpri) {
+	    t->t_pri < t->t_disp_queue->disp_maxrunpri) {
 		call_cpu_surrender = B_TRUE;
 	}
 
@@ -1398,7 +1399,7 @@ fx_wakeup(kthread_t *t)
 		clock_t new_quantum =  (clock_t)fxpp->fx_pquantum;
 		pri_t	newpri = fxpp->fx_pri;
 		FX_CB_WAKEUP(FX_CALLB(fxpp), fxpp->fx_cookie,
-			&new_quantum, &newpri);
+		    &new_quantum, &newpri);
 		FX_ADJUST_QUANTUM(new_quantum);
 		if ((int)new_quantum != fxpp->fx_pquantum) {
 			fxpp->fx_pquantum = (int)new_quantum;
@@ -1441,7 +1442,7 @@ fx_yield(kthread_t *t)
 		clock_t new_quantum =  (clock_t)fxpp->fx_pquantum;
 		pri_t	newpri = fxpp->fx_pri;
 		FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie,
-				&new_quantum, &newpri);
+		    &new_quantum, &newpri);
 		FX_ADJUST_QUANTUM(new_quantum);
 		if ((int)new_quantum != fxpp->fx_pquantum) {
 			fxpp->fx_pquantum = (int)new_quantum;
@@ -1474,7 +1475,6 @@ fx_yield(kthread_t *t)
 	setbackdq(t);
 }
 
-
 /*
  * Increment the nice value of the specified thread by incr and
  * return the new value in *retvalp.
@@ -1517,7 +1517,7 @@ fx_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
 		newnice = 0;
 
 	fxkparms.fx_uprilim = fxkparms.fx_upri =
-		-((newnice - NZERO) * fx_maxupri) / NZERO;
+	    -((newnice - NZERO) * fx_maxupri) / NZERO;
 
 	fxkparms.fx_cflags = FX_DOUPRILIM | FX_DOUPRI;
 
@@ -1546,6 +1546,40 @@ fx_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
 	return (0);
 }
 
+/*
+ * Increment the priority of the specified thread by incr and
+ * return the new value in *retvalp.
+ */
+static int
+fx_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp)
+{
+	int		newpri;
+	fxproc_t	*fxpp = (fxproc_t *)(t->t_cldata);
+	fxkparms_t	fxkparms;
+
+	ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
+
+	/* If there's no change to priority, just return current setting */
+	if (incr == 0) {
+		*retvalp = fxpp->fx_pri;
+		return (0);
+	}
+
+	newpri = fxpp->fx_pri + incr;
+	if (newpri > fx_maxupri || newpri < 0)
+		return (EINVAL);
+
+	*retvalp = newpri;
+	fxkparms.fx_uprilim = fxkparms.fx_upri = newpri;
+	fxkparms.fx_tqntm = FX_NOCHANGE;
+	fxkparms.fx_cflags = FX_DOUPRILIM | FX_DOUPRI;
+
+	/*
+	 * Reset the uprilim and upri values of the thread.
+	 */
+	return (fx_parmsset(t, (void *)&fxkparms, (id_t)0, cr));
+}
+
 static void
 fx_change_priority(kthread_t *t, fxproc_t *fxpp)
 {
@@ -1554,6 +1588,7 @@ fx_change_priority(kthread_t *t, fxproc_t *fxpp)
 	ASSERT(THREAD_LOCK_HELD(t));
 	new_pri = fx_dptbl[fxpp->fx_pri].fx_globpri;
 	ASSERT(new_pri >= 0 && new_pri <= fx_maxglobpri);
+	t->t_cpri = fxpp->fx_pri;
 	if (t == curthread || t->t_state == TS_ONPROC) {
 		/* curthread is always onproc */
 		cpu_t	*cp = t->t_disp_queue->disp_cpu;
diff --git a/usr/src/uts/common/disp/priocntl.c b/usr/src/uts/common/disp/priocntl.c
index 9197dc815b..ae863472b0 100644
--- a/usr/src/uts/common/disp/priocntl.c
+++ b/usr/src/uts/common/disp/priocntl.c
@@ -18,15 +18,15 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
 /*	  All Rights Reserved  	*/
 
-
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
 #include <sys/types.h>
@@ -52,6 +52,7 @@
 #include <sys/uadmin.h>
 #include <sys/cmn_err.h>
 #include <sys/policy.h>
+#include <sys/schedctl.h>
 
 /*
  * Structure used to pass arguments to the proccmp() function.
@@ -62,7 +63,7 @@
 struct pcmpargs {
 	id_t	*pcmp_cidp;
 	int	*pcmp_cntp;
-	kthread_id_t	*pcmp_retthreadp;
+	kthread_t **pcmp_retthreadp;
 };
 
 /*
@@ -115,9 +116,10 @@ copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap, uio_seg_t seg)
 #endif
 
 static int donice(procset_t *, pcnice_t *);
+static int doprio(procset_t *, pcprio_t *);
 static int proccmp(proc_t *, struct pcmpargs *);
 static int setparms(proc_t *, struct stprmargs *);
-extern int threadcmp(struct pcmpargs *, kthread_id_t);
+extern int threadcmp(struct pcmpargs *, kthread_t *);
 
 /*
  * The priocntl system call.
@@ -129,6 +131,7 @@ priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg,
 	pcinfo_t		pcinfo;
 	pcparms_t		pcparms;
 	pcnice_t		pcnice;
+	pcprio_t		pcprio;
 	pcadmin_t		pcadmin;
 	pcpri_t			pcpri;
 	procset_t		procset;
@@ -138,7 +141,7 @@ priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg,
 	char			clname[PC_CLNMSZ];
 	char			*outstr;
 	int			count;
-	kthread_id_t		retthreadp;
+	kthread_t		*retthreadp;
 	proc_t			*initpp;
 	int			clnullflag;
 	int			error = 0;
@@ -340,7 +343,7 @@ priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg,
 			 * call parmsset() (which does the real work).
 			 */
 			if ((procset.p_lidtype != P_LWPID) ||
-				(procset.p_ridtype != P_LWPID)) {
+			    (procset.p_ridtype != P_LWPID)) {
 				error1 = dotoprocs(&procset, setparms,
 				    (char *)&stprmargs);
 			}
@@ -524,6 +527,7 @@ priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg,
 				error = error1;
 			if (error) {
 				if (retthreadp != NULL)
+				    /* CSTYLED */
 				    mutex_exit(&(ttoproc(retthreadp)->p_lock));
 				ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock));
 				return (set_errno(error));
@@ -644,7 +648,7 @@ priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg,
 		 */
 		mutex_enter(&ualock);
 		error = CL_ADMIN(&sclass[pcadmin.pc_cid], pcadmin.pc_cladmin,
-				CRED());
+		    CRED());
 		mutex_exit(&ualock);
 		break;
 
@@ -678,6 +682,22 @@ priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg,
 		}
 		break;
 
+	case PC_DOPRIO:
+		/*
+		 * Get pcprio and procset structures from the user.
+		 */
+		if ((*copyinfn)(arg, &pcprio, sizeof (pcprio)) ||
+		    (*copyinfn)(psp, &procset, sizeof (procset)))
+			return (set_errno(EFAULT));
+
+		error = doprio(&procset, &pcprio);
+
+		if (!error && (pcprio.pc_op == PC_GETPRIO)) {
+			if ((*copyoutfn)(&pcprio, arg, sizeof (pcprio)))
+				return (set_errno(EFAULT));
+		}
+		break;
+
 	case PC_SETDFLCL:
 		if (secpolicy_dispadm(CRED()) != 0)
 			return (set_errno(EPERM));
@@ -738,7 +758,8 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 static int
 proccmp(proc_t *pp, struct pcmpargs *argp)
 {
-	kthread_id_t	tx, ty;
+	kthread_t	*tx;
+	kthread_t	*ty;
 	int		last_pri = -1;
 	int		tx_pri;
 	int		found = 0;
@@ -800,9 +821,9 @@ proccmp(proc_t *pp, struct pcmpargs *argp)
 
 
 int
-threadcmp(struct pcmpargs *argp, kthread_id_t tp)
+threadcmp(struct pcmpargs *argp, kthread_t *tp)
 {
-	kthread_id_t	tx;
+	kthread_t	*tx;
 	proc_t		*pp;
 
 	ASSERT(MUTEX_HELD(&(ttoproc(tp))->p_lock));
@@ -857,7 +878,7 @@ static int
 setparms(proc_t *targpp, struct stprmargs *stprmp)
 {
 	int error = 0;
-	kthread_id_t t;
+	kthread_t *t;
 	int err;
 
 	mutex_enter(&targpp->p_lock);
@@ -885,7 +906,7 @@ setparms(proc_t *targpp, struct stprmargs *stprmp)
 int
 setthreadnice(pcnice_t *pcnice, kthread_t *tp)
 {
-	int error = 0;
+	int error;
 	int nice;
 	int inc;
 	id_t rtcid;
@@ -898,9 +919,9 @@ setthreadnice(pcnice_t *pcnice, kthread_t *tp)
 	 * must be unaffected by a call to setpriority().
 	 */
 	error = getcidbyname("RT", &rtcid);
-	if ((error == 0) && (tp->t_cid == rtcid)) {
+	if (error == 0 && tp->t_cid == rtcid) {
 		if (pcnice->pc_op == PC_SETNICE)
-			return (error);
+			return (0);
 	}
 
 	if ((error = CL_DONICE(tp, CRED(), 0, &nice)) != 0)
@@ -922,6 +943,7 @@ setthreadnice(pcnice_t *pcnice, kthread_t *tp)
 		inc = pcnice->pc_val - nice;
 
 		error = CL_DONICE(tp, CRED(), inc, &inc);
+		schedctl_set_cidpri(tp);
 	}
 
 	return (error);
@@ -932,7 +954,7 @@ setprocnice(proc_t *pp, pcnice_t *pcnice)
 {
 	kthread_t *tp;
 	int retval = 0;
-	int error = 0;
+	int error;
 
 	ASSERT(MUTEX_HELD(&pidlock));
 	mutex_enter(&pp->p_lock);
@@ -1033,3 +1055,170 @@ donice(procset_t *procset, pcnice_t *pcnice)
 
 	return (err);
 }
+
+int
+setthreadprio(pcprio_t *pcprio, kthread_t *tp)
+{
+	int prio = 0;
+	int incr;
+	int error;
+
+	ASSERT(MUTEX_HELD(&pidlock));
+	ASSERT(MUTEX_HELD(&(ttoproc(tp)->p_lock)));
+
+	if (pcprio->pc_op == PC_SETPRIO && pcprio->pc_cid != tp->t_cid) {
+		/*
+		 * Target thread must change to new class.
+		 * See comments in parmsset(), from where this code was copied.
+		 */
+		void *bufp = NULL;
+		caddr_t clprocp = (caddr_t)tp->t_cldata;
+		id_t oldcid = tp->t_cid;
+
+		error = CL_CANEXIT(tp, NULL);
+		if (error)
+			return (error);
+		if (CL_ALLOC(&bufp, pcprio->pc_cid, KM_NOSLEEP) != 0)
+			return (ENOMEM);
+		error = CL_ENTERCLASS(tp, pcprio->pc_cid, NULL, CRED(), bufp);
+		if (error) {
+			CL_FREE(pcprio->pc_cid, bufp);
+			return (error);
+		}
+		CL_EXITCLASS(oldcid, clprocp);
+		schedctl_set_cidpri(tp);
+	}
+
+	if ((error = CL_DOPRIO(tp, CRED(), 0, &prio)) != 0)
+		return (error);
+
+	if (pcprio->pc_op == PC_GETPRIO) {
+		/*
+		 * If we are not setting the priority, we should return the
+		 * highest priority pertaining to any of the specified threads.
+		 */
+		if (prio > pcprio->pc_val) {
+			pcprio->pc_cid = tp->t_cid;
+			pcprio->pc_val = prio;
+		}
+	} else if (prio != pcprio->pc_val) {
+		/*
+		 * Try to change the priority of the thread.
+		 */
+		incr = pcprio->pc_val - prio;
+		error = CL_DOPRIO(tp, CRED(), incr, &prio);
+		schedctl_set_cidpri(tp);
+	}
+
+	return (error);
+}
+
+int
+setprocprio(proc_t *pp, pcprio_t *pcprio)
+{
+	kthread_t *tp;
+	int retval = 0;
+	int error;
+
+	ASSERT(MUTEX_HELD(&pidlock));
+	mutex_enter(&pp->p_lock);
+
+	if ((tp = pp->p_tlist) == NULL) {
+		mutex_exit(&pp->p_lock);
+		return (ESRCH);
+	}
+
+	/*
+	 * Check permissions before changing the prio value.
+	 */
+	if (pcprio->pc_op == PC_SETPRIO) {
+		if (!prochasprocperm(pp, curproc, CRED())) {
+			mutex_exit(&pp->p_lock);
+			return (EPERM);
+		}
+	}
+
+	do {
+		error = setthreadprio(pcprio, tp);
+		if (error)
+			retval = error;
+	} while ((tp = tp->t_forw) != pp->p_tlist);
+
+	mutex_exit(&pp->p_lock);
+	return (retval);
+}
+
+/*
+ * Set the class and priority of the specified LWP or set of processes.
+ */
+static int
+doprio(procset_t *procset, pcprio_t *pcprio)
+{
+	int err_proc = 0;
+	int err_thread = 0;
+	int err = 0;
+
+	/*
+	 * Sanity check.
+	 */
+	if (pcprio->pc_op != PC_GETPRIO && pcprio->pc_op != PC_SETPRIO)
+		return (EINVAL);
+	if (pcprio->pc_op == PC_SETPRIO &&
+	    (pcprio->pc_cid >= loaded_classes || pcprio->pc_cid < 1))
+		return (EINVAL);
+
+	/*
+	 * If it is a PC_GETPRIO operation then set pc_val to the smallest
+	 * possible prio value to help us find the highest priority
+	 * pertaining to any of the specified processes.
+	 */
+	if (pcprio->pc_op == PC_GETPRIO)
+		pcprio->pc_val = SHRT_MIN;
+
+	if (procset->p_lidtype != P_LWPID ||
+	    procset->p_ridtype != P_LWPID)
+		err_proc = dotoprocs(procset, setprocprio, (char *)pcprio);
+
+	if (procset->p_lidtype == P_LWPID || procset->p_ridtype == P_LWPID) {
+		err_thread = dotolwp(procset, setthreadprio, (char *)pcprio);
+		/*
+		 * dotolwp() can return with p_lock held.  This is required
+		 * for the priocntl GETPARMS case.  So, here we just release
+		 * the p_lock.
+		 */
+		if (MUTEX_HELD(&curproc->p_lock))
+			mutex_exit(&curproc->p_lock);
+
+		/*
+		 * If we were called for a single LWP, then ignore ESRCH
+		 * returned by the previous dotoprocs() call.
+		 */
+		if (err_proc == ESRCH)
+			err_proc = 0;
+	}
+
+	/*
+	 * dotoprocs() ignores the init process if it is in the set, unless
+	 * it was the only process found. We want to make sure init is not
+	 * excluded if we're going PC_GETPRIO operation.
+	 */
+	if (pcprio->pc_op == PC_GETPRIO) {
+		proc_t *initpp;
+
+		mutex_enter(&pidlock);
+		initpp = prfind(P_INITPID);
+		if (initpp != NULL && procinset(initpp, procset))
+			err = setprocprio(initpp, pcprio);
+		mutex_exit(&pidlock);
+	}
+
+	/*
+	 * We're returning the latest error here that we've got back from
+	 * the setthreadprio() or setprocprio(). That is, err_thread and/or
+	 * err_proc can be replaced by err.
+	 */
+	if (!err)
+		err = err_thread ? err_thread : err_proc;
+
+	return (err);
+}
diff --git a/usr/src/uts/common/disp/rt.c b/usr/src/uts/common/disp/rt.c
index 2b60fbe24e..43b42d5298 100644
--- a/usr/src/uts/common/disp/rt.c
+++ b/usr/src/uts/common/disp/rt.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,15 +18,15 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
 /*	  All Rights Reserved  	*/
 
-
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
 #include <sys/types.h>
@@ -48,6 +47,7 @@
 #include <sys/rtpriocntl.h>
 #include <sys/kmem.h>
 #include <sys/systm.h>
+#include <sys/schedctl.h>
 #include <sys/errno.h>
 #include <sys/cpuvar.h>
 #include <sys/vmsystm.h>
@@ -122,6 +122,7 @@ static int	rt_vaparmsin(void *, pc_vaparms_t *);
 static int	rt_vaparmsout(void *, pc_vaparms_t *);
 static int	rt_parmsset(kthread_t *, void *, id_t, cred_t *);
 static int	rt_donice(kthread_t *, cred_t *, int, int *);
+static int	rt_doprio(kthread_t *, cred_t *, int, int *);
 static void	rt_exitclass(void *);
 static int	rt_canexit(kthread_t *, cred_t *);
 static void	rt_forkret(kthread_t *, kthread_t *);
@@ -182,6 +183,7 @@ static struct classfuncs rt_classfuncs = {
 	rt_globpri,
 	rt_nullsys,	/* set_process_group */
 	rt_yield,
+	rt_doprio,
 };
 
 /*
@@ -534,16 +536,16 @@ rt_getclinfo(void *infop)
 }
 
 /*
- * Return the global scheduling priority ranges of the realtime
- * class in pcpri_t structure.
+ * Return the user mode scheduling priority range.
  */
 static int
 rt_getclpri(pcpri_t *pcprip)
 {
-	pcprip->pc_clpmax = rt_dptbl[rt_maxpri].rt_globpri;
-	pcprip->pc_clpmin = rt_dptbl[0].rt_globpri;
+	pcprip->pc_clpmax = rt_maxpri;
+	pcprip->pc_clpmin = 0;
 	return (0);
 }
+
 static void
 rt_nullsys()
 {
@@ -1041,6 +1043,35 @@ rt_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
 	return (EINVAL);
 }
 
+/*
+ * Increment the priority of the specified thread by incr and
+ * return the new value in *retvalp.
+ */
+static int
+rt_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp)
+{
+	int newpri;
+	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
+	rtkparms_t rtkparms;
+
+	/* If there's no change to the priority, just return current setting */
+	if (incr == 0) {
+		*retvalp = rtpp->rt_pri;
+		return (0);
+	}
+
+	newpri = rtpp->rt_pri + incr;
+	if (newpri > rt_maxpri || newpri < 0)
+		return (EINVAL);
+
+	*retvalp = newpri;
+	rtkparms.rt_pri = newpri;
+	rtkparms.rt_tqntm = RT_NOCHANGE;
+	rtkparms.rt_tqsig = 0;
+	rtkparms.rt_cflags = RT_DOPRI;
+	return (rt_parmsset(t, &rtkparms, rt_cid, cr));
+}
+
 static int
 rt_alloc(void **p, int flag)
 {
@@ -1070,6 +1101,7 @@ rt_change_priority(kthread_t *t, rtproc_t *rtpp)
 
 	new_pri = rt_dptbl[rtpp->rt_pri].rt_globpri;
 
+	t->t_cpri = rtpp->rt_pri;
 	if (t == curthread || t->t_state == TS_ONPROC) {
 		cpu_t	*cp = t->t_disp_queue->disp_cpu;
 		THREAD_CHANGE_PRI(t, new_pri);
diff --git a/usr/src/uts/common/disp/sysclass.c b/usr/src/uts/common/disp/sysclass.c
index d48cc3145e..7323a0fc17 100644
--- a/usr/src/uts/common/disp/sysclass.c
+++ b/usr/src/uts/common/disp/sysclass.c
@@ -20,14 +20,13 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
 /*	  All Rights Reserved  	*/
 
-
 #pragma ident	"%Z%%M%	%I%	%E% SMI"	/* from SVr4.0 1.12 */
 
 #include <sys/types.h>
@@ -59,14 +58,15 @@
 
 pri_t		sys_init(id_t, int, classfuncs_t **);
 static int	sys_getclpri(pcpri_t *);
-static int	sys_fork(kthread_id_t, kthread_id_t, void *);
-static int	sys_enterclass(kthread_id_t, id_t, void *, cred_t *, void *);
-static int	sys_canexit(kthread_id_t, cred_t *);
+static int	sys_fork(kthread_t *, kthread_t *, void *);
+static int	sys_enterclass(kthread_t *, id_t, void *, cred_t *, void *);
+static int	sys_canexit(kthread_t *, cred_t *);
 static int	sys_nosys();
-static int	sys_donice(kthread_id_t, cred_t *, int, int *);
-static void	sys_forkret(kthread_id_t, kthread_id_t);
+static int	sys_donice(kthread_t *, cred_t *, int, int *);
+static int	sys_doprio(kthread_t *, cred_t *, int, int *);
+static void	sys_forkret(kthread_t *, kthread_t *);
 static void	sys_nullsys();
-static pri_t	sys_swappri(kthread_id_t, int);
+static pri_t	sys_swappri(kthread_t *, int);
 static int	sys_alloc(void **, int);
 
 struct classfuncs sys_classfuncs = {
@@ -107,6 +107,7 @@ struct classfuncs sys_classfuncs = {
 		(pri_t (*)())sys_nosys,	/* globpri */
 		sys_nullsys,	/* set_process_group */
 		sys_nullsys,	/* yield */
+		sys_doprio,
 	}
 
 };
@@ -130,14 +131,14 @@ static int
 sys_getclpri(pcpri_t *pcprip)
 {
 	pcprip->pc_clpmax = maxclsyspri;
-	pcprip->pc_clpmin = 0;
+	pcprip->pc_clpmin = minclsyspri;
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 sys_enterclass(t, cid, parmsp, reqpcredp, bufp)
-	kthread_id_t	t;
+	kthread_t	*t;
 	id_t		cid;
 	void		*parmsp;
 	cred_t		*reqpcredp;
@@ -148,7 +149,7 @@ sys_enterclass(t, cid, parmsp, reqpcredp, bufp)
 
 /* ARGSUSED */
 static int
-sys_canexit(kthread_id_t t, cred_t *reqpcredp)
+sys_canexit(kthread_t *t, cred_t *reqpcredp)
 {
 	return (0);
 }
@@ -156,8 +157,8 @@ sys_canexit(kthread_id_t t, cred_t *reqpcredp)
 /* ARGSUSED */
 static int
 sys_fork(t, ct, bufp)
-	kthread_id_t t;
-	kthread_id_t ct;
+	kthread_t *t;
+	kthread_t *ct;
 	void	*bufp;
 {
 	/*
@@ -170,8 +171,8 @@ sys_fork(t, ct, bufp)
 /* ARGSUSED */
 static void
 sys_forkret(t, ct)
-	kthread_id_t t;
-	kthread_id_t ct;
+	kthread_t *t;
+	kthread_t *ct;
 {
 	register proc_t *pp = ttoproc(t);
 	register proc_t *cp = ttoproc(ct);
@@ -196,7 +197,7 @@ sys_forkret(t, ct)
 /* ARGSUSED */
 static pri_t
 sys_swappri(t, flags)
-	kthread_id_t	t;
+	kthread_t	*t;
 	int		flags;
 {
 	return (-1);
@@ -216,11 +217,14 @@ sys_nullsys()
 
 /* ARGSUSED */
 static int
-sys_donice(t, cr, incr, retvalp)
-	kthread_id_t	t;
-	cred_t		*cr;
-	int		incr;
-	int		*retvalp;
+sys_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
+{
+	return (EINVAL);
+}
+
+/* ARGSUSED */
+static int
+sys_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp)
 {
 	return (EINVAL);
 }
diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c
index 928b594602..27e6034f05 100644
--- a/usr/src/uts/common/disp/thread.c
+++ b/usr/src/uts/common/disp/thread.c
@@ -18,6 +18,7 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
@@ -71,6 +72,7 @@
 #include <sys/sdt.h>
 #include <sys/reboot.h>
 #include <sys/kdi.h>
+#include <sys/schedctl.h>
 #include <sys/waitq.h>
 #include <sys/cpucaps.h>
 #include <sys/kiconv.h>
@@ -1760,25 +1762,17 @@ thread_change_epri(kthread_t *t, pri_t disp_pri)
 	state = t->t_state;
 
 	/*
-	 * If it's not on a queue, change the priority with
-	 * impunity.
+	 * If it's not on a queue, change the priority with impunity.
 	 */
 	if ((state & (TS_SLEEP | TS_RUN | TS_WAIT)) == 0) {
 		t->t_epri = disp_pri;
-
 		if (state == TS_ONPROC) {
 			cpu_t *cp = t->t_disp_queue->disp_cpu;
 
 			if (t == cp->cpu_dispthread)
 				cp->cpu_dispatch_pri = DISP_PRIO(t);
 		}
-		return;
-	}
-
-	/*
-	 * It's either on a sleep queue or a run queue.
-	 */
-	if (state == TS_SLEEP) {
+	} else if (state == TS_SLEEP) {
 		/*
 		 * Take the thread out of its sleep queue.
 		 * Change the inherited priority.
@@ -1805,7 +1799,8 @@ thread_change_epri(kthread_t *t, pri_t disp_pri)
 		t->t_epri = disp_pri;
 		setbackdq(t);
 	}
-}	/* end of thread_change_epri */
+	schedctl_set_cidpri(t);
+}
 
 /*
  * Function: Change the t_pri field of a thread.
@@ -1825,8 +1820,7 @@ thread_change_pri(kthread_t *t, pri_t disp_pri, int front)
 	THREAD_WILLCHANGE_PRI(t, disp_pri);
 
 	/*
-	 * If it's not on a queue, change the priority with
-	 * impunity.
+	 * If it's not on a queue, change the priority with impunity.
 	 */
 	if ((state & (TS_SLEEP | TS_RUN | TS_WAIT)) == 0) {
 		t->t_pri = disp_pri;
@@ -1837,13 +1831,7 @@ thread_change_pri(kthread_t *t, pri_t disp_pri, int front)
 			if (t == cp->cpu_dispthread)
 				cp->cpu_dispatch_pri = DISP_PRIO(t);
 		}
-		return (0);
-	}
-
-	/*
-	 * It's either on a sleep queue or a run queue.
-	 */
-	if (state == TS_SLEEP) {
+	} else if (state == TS_SLEEP) {
 		/*
 		 * If the priority has changed, take the thread out of
 		 * its sleep queue and change the priority.
@@ -1880,5 +1868,6 @@ thread_change_pri(kthread_t *t, pri_t disp_pri, int front)
 			setbackdq(t);
 		}
 	}
+	schedctl_set_cidpri(t);
 	return (on_rq);
 }
diff --git a/usr/src/uts/common/disp/ts.c b/usr/src/uts/common/disp/ts.c
index e071a80ab5..53612cf2bc 100644
--- a/usr/src/uts/common/disp/ts.c
+++ b/usr/src/uts/common/disp/ts.c
@@ -20,14 +20,13 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
 /*	  All Rights Reserved  	*/
 
-
 #pragma ident	"%Z%%M%	%I%	%E% SMI"	/* from SVr4.0 1.23 */
 
 #include <sys/types.h>
@@ -196,6 +195,7 @@ static int	ts_vaparmsout(void *, pc_vaparms_t *);
 static int	ts_parmsset(kthread_t *, void *, id_t, cred_t *);
 static void	ts_exit(kthread_t *);
 static int	ts_donice(kthread_t *, cred_t *, int, int *);
+static int	ts_doprio(kthread_t *, cred_t *, int, int *);
 static void	ts_exitclass(void *);
 static int	ts_canexit(kthread_t *, cred_t *);
 static void	ts_forkret(kthread_t *, kthread_t *);
@@ -221,6 +221,7 @@ static void	ts_free(void *);
 
 pri_t		ia_init(id_t, int, classfuncs_t **);
 static int	ia_getclinfo(void *);
+static int	ia_getclpri(pcpri_t *);
 static int	ia_parmsin(void *);
 static int	ia_vaparmsin(void *, pc_vaparms_t *);
 static int	ia_vaparmsout(void *, pc_vaparms_t *);
@@ -274,6 +275,7 @@ static struct classfuncs ts_classfuncs = {
 	ts_globpri,
 	ts_nullsys,	/* set_process_group */
 	ts_yield,
+	ts_doprio,
 };
 
 /*
@@ -290,7 +292,7 @@ static struct classfuncs ia_classfuncs = {
 	ts_parmsout,
 	ia_vaparmsin,
 	ia_vaparmsout,
-	ts_getclpri,
+	ia_getclpri,
 	ts_alloc,
 	ts_free,
 
@@ -318,6 +320,7 @@ static struct classfuncs ia_classfuncs = {
 	ts_globpri,
 	ia_set_process_group,
 	ts_yield,
+	ts_doprio,
 };
 
 
@@ -615,8 +618,7 @@ ts_enterclass(kthread_t *t, id_t cid, void *parmsp,
 
 		tspp->ts_uprilim = reqtsuprilim;
 		tspp->ts_upri = reqtsupri;
-		tspp->ts_nice = NZERO - (NZERO * reqtsupri)
-			/ ts_maxupri;
+		tspp->ts_nice = NZERO - (NZERO * reqtsupri) / ts_maxupri;
 	}
 	TS_NEWUMDPRI(tspp);
 
@@ -788,14 +790,22 @@ ia_getclinfo(void *infop)
 
 
 /*
- * Return the global scheduling priority ranges for the timesharing
- * class in pcpri_t structure.
+ * Return the user mode scheduling priority range.
  */
 static int
 ts_getclpri(pcpri_t *pcprip)
 {
-	pcprip->pc_clpmax = ts_dptbl[ts_maxumdpri].ts_globpri;
-	pcprip->pc_clpmin = ts_dptbl[0].ts_globpri;
+	pcprip->pc_clpmax = ts_maxupri;
+	pcprip->pc_clpmin = -ts_maxupri;
+	return (0);
+}
+
+
+static int
+ia_getclpri(pcpri_t *pcprip)
+{
+	pcprip->pc_clpmax = ia_maxupri;
+	pcprip->pc_clpmin = -ia_maxupri;
 	return (0);
 }
 
@@ -833,7 +843,6 @@ ia_parmsget(kthread_t *t, void *parmsp)
 		iaparmsp->ia_mode = IA_SET_INTERACTIVE;
 	else
 		iaparmsp->ia_mode = IA_INTERACTIVE_OFF;
-	iaparmsp->ia_nice = tspp->ts_nice;
 }
 
 
@@ -1759,7 +1768,7 @@ ts_tick(kthread_t *t)
 			TRACE_2(TR_FAC_DISP, TR_TICK,
 			    "tick:tid %p old pri %d", t, oldpri);
 		} else if (t->t_state == TS_ONPROC &&
-			    t->t_pri < t->t_disp_queue->disp_maxrunpri) {
+		    t->t_pri < t->t_disp_queue->disp_maxrunpri) {
 			call_cpu_surrender = B_TRUE;
 		}
 	}
@@ -2107,7 +2116,7 @@ ts_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
 		newnice = 0;
 
 	tsparms.ts_uprilim = tsparms.ts_upri =
-		-((newnice - NZERO) * ts_maxupri) / NZERO;
+	    -((newnice - NZERO) * ts_maxupri) / NZERO;
 	/*
 	 * Reset the uprilim and upri values of the thread.
 	 * Call ts_parmsset even if thread is interactive since we're
@@ -2130,6 +2139,38 @@ ts_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
 	return (0);
 }
 
+/*
+ * Increment the priority of the specified thread by incr and
+ * return the new value in *retvalp.
+ */
+static int
+ts_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp)
+{
+	int		newpri;
+	tsproc_t	*tspp = (tsproc_t *)(t->t_cldata);
+	tsparms_t	tsparms;
+
+	ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
+
+	/* If there's no change to the priority, just return current setting */
+	if (incr == 0) {
+		*retvalp = tspp->ts_upri;
+		return (0);
+	}
+
+	newpri = tspp->ts_upri + incr;
+	if (newpri > ts_maxupri || newpri < -ts_maxupri)
+		return (EINVAL);
+
+	*retvalp = newpri;
+	tsparms.ts_uprilim = tsparms.ts_upri = newpri;
+	/*
+	 * Reset the uprilim and upri values of the thread.
+	 * Call ts_parmsset even if thread is interactive since we're
+	 * not changing mode.
+	 */
+	return (ts_parmsset(t, &tsparms, 0, cr));
+}
 
 /*
  * ia_set_process_group marks foreground processes as interactive
@@ -2324,6 +2365,7 @@ ts_change_priority(kthread_t *t, tsproc_t *tspp)
 	new_pri = ts_dptbl[tspp->ts_umdpri].ts_globpri;
 	ASSERT(new_pri >= 0 && new_pri <= ts_maxglobpri);
 	tspp->ts_flags &= ~TSRESTORE;
+	t->t_cpri = tspp->ts_upri;
 	if (t == curthread || t->t_state == TS_ONPROC) {
 		/* curthread is always onproc */
 		cpu_t	*cp = t->t_disp_queue->disp_cpu;
diff --git a/usr/src/uts/common/fs/proc/prcontrol.c b/usr/src/uts/common/fs/proc/prcontrol.c
index 227b732fc3..e67ba67e04 100644
--- a/usr/src/uts/common/fs/proc/prcontrol.c
+++ b/usr/src/uts/common/fs/proc/prcontrol.c
@@ -338,11 +338,11 @@ pr_control(long cmd, arg_t *argp, prnode_t *pnp, cred_t *cr)
 				break;
 			}
 
-		timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
-		if ((error = pr_wait_stop(pnp, timeo)) != 0)
-			return (error);
+			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
+			if ((error = pr_wait_stop(pnp, timeo)) != 0)
+				return (error);
 
-		break;
+			break;
 		}
 
 	case PCRUN:	/* make lwp or process runnable */
@@ -960,10 +960,8 @@ pr_control32(int32_t cmd, arg32_t *argp, prnode_t *pnp, cred_t *cr)
 		}
 
 	case PCSPRIV:	/* set the process privileges */
-		{
-			error = pr_spriv(p, &argp->prpriv, cr);
-			break;
-		}
+		error = pr_spriv(p, &argp->prpriv, cr);
+		break;
 
 	case PCSZONE:	/* set the process's zoneid */
 		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
@@ -1548,6 +1546,7 @@ pr_nice(proc_t *p, int nice, cred_t *cr)
 	do {
 		ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
 		err = CL_DONICE(t, cr, nice, (int *)NULL);
+		schedctl_set_cidpri(t);
 		if (error == 0)
 			error = err;
 	} while ((t = t->t_forw) != p->p_tlist);
@@ -1925,7 +1924,7 @@ pr_watch(prnode_t *pnp, prwatch_t *pwp, int *unlocked)
 	pwa->wa_eaddr = (caddr_t)vaddr + size;
 	pwa->wa_flags = (ulong_t)wflags;
 
-	error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0) ?
+	error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0)?
 	    clear_watched_area(p, pwa) : set_watched_area(p, pwa);
 
 	if (p == curproc) {
diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c
index 1162410939..c821d6f964 100644
--- a/usr/src/uts/common/os/exec.c
+++ b/usr/src/uts/common/os/exec.c
@@ -18,6 +18,7 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
@@ -28,7 +29,6 @@
 /*	Copyright (c) 1988 AT&T	*/
 /*	  All Rights Reserved  	*/
 
-
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/sysmacros.h>
@@ -57,6 +57,7 @@
 #include <sys/prsystm.h>
 #include <sys/modctl.h>
 #include <sys/vmparam.h>
+#include <sys/door.h>
 #include <sys/schedctl.h>
 #include <sys/utrap.h>
 #include <sys/systeminfo.h>
@@ -1441,12 +1442,10 @@ stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
 		if ((error = stk_add(args, args->pathname, UIO_SYSSPACE)) != 0)
 			return (error);
 		if (args->brandname != NULL &&
-		    (error = stk_add(args, args->brandname,
-		    UIO_SYSSPACE)) != 0)
+		    (error = stk_add(args, args->brandname, UIO_SYSSPACE)) != 0)
 			return (error);
 		if (args->emulator != NULL &&
-		    (error = stk_add(args, args->emulator,
-		    UIO_SYSSPACE)) != 0)
+		    (error = stk_add(args, args->emulator, UIO_SYSSPACE)) != 0)
 			return (error);
 	}
 
diff --git a/usr/src/uts/common/os/fork.c b/usr/src/uts/common/os/fork.c
index 2ed10db14a..74d791ac47 100644
--- a/usr/src/uts/common/os/fork.c
+++ b/usr/src/uts/common/os/fork.c
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -592,6 +592,7 @@ cfork(int isvfork, int isfork1, int flags)
 		 * and disappear before CL_FORKRET() is called.
 		 */
 		CL_FORKRET(curthread, cp->p_tlist);
+		schedctl_set_cidpri(curthread);
 		ASSERT(MUTEX_NOT_HELD(&pidlock));
 	}
 
@@ -602,8 +603,7 @@ forklwperr:
 		if (avl_numnodes(&p->p_wpage) != 0) {
 			/* restore watchpoints to parent */
 			as = p->p_as;
-			AS_LOCK_ENTER(as, &as->a_lock,
-			    RW_WRITER);
+			AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 			as->a_wpage = p->p_wpage;
 			avl_create(&p->p_wpage, wp_compare,
 			    sizeof (struct watched_page),
diff --git a/usr/src/uts/common/os/lwp.c b/usr/src/uts/common/os/lwp.c
index a9f1aa2588..39ca56ac7d 100644
--- a/usr/src/uts/common/os/lwp.c
+++ b/usr/src/uts/common/os/lwp.c
@@ -410,6 +410,11 @@ grow:
 		if (p != curproc || curthread->t_cid != cid) {
 			err = CL_ENTERCLASS(t, cid, NULL, NULL, bufp);
 			t->t_pri = pri;	/* CL_ENTERCLASS may have changed it */
+			/*
+			 * We don't call schedctl_set_cidpri(t) here
+			 * because the schedctl data is not yet set
+			 * up for the newly-created lwp.
+			 */
 		} else {
 			t->t_clfuncs = &(sclass[cid].cl_funcs->thread);
 			err = CL_FORK(curthread, t, bufp);
diff --git a/usr/src/uts/common/os/pool.c b/usr/src/uts/common/os/pool.c
index 03a2f7121a..80b05f90e0 100644
--- a/usr/src/uts/common/os/pool.c
+++ b/usr/src/uts/common/os/pool.c
@@ -18,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -44,6 +45,7 @@
 #include <sys/atomic.h>
 #include <sys/zone.h>
 #include <sys/policy.h>
+#include <sys/schedctl.h>
 
 /*
  * RESOURCE POOLS
@@ -1217,6 +1219,7 @@ pool_change_class(proc_t *p, id_t cid)
 			ret = CL_ENTERCLASS(t, cid, NULL, NULL, *buf);
 			ASSERT(ret == 0);
 			CL_EXITCLASS(oldcid, cldata);
+			schedctl_set_cidpri(t);
 			*buf++ = NULL;
 		}
 	} while ((t = t->t_forw) != p->p_tlist);
diff --git a/usr/src/uts/common/os/schedctl.c b/usr/src/uts/common/os/schedctl.c
index 98c8457523..4a5ccc9944 100644
--- a/usr/src/uts/common/os/schedctl.c
+++ b/usr/src/uts/common/os/schedctl.c
@@ -119,6 +119,9 @@ schedctl(void)
 		thread_lock(t);	/* protect against ts_tick and ts_update */
 		t->t_schedctl = ssp;
 		t->t_sc_uaddr = uaddr;
+		ssp->sc_cid = t->t_cid;
+		ssp->sc_cpri = t->t_cpri;
+		ssp->sc_priority = DISP_PRIO(t);
 		thread_unlock(t);
 	}
 
@@ -204,7 +207,7 @@ schedctl_proc_cleanup(void)
  * Called by resume just before switching away from the current thread.
  * Save new thread state.
  */
-void
+static void
 schedctl_save(sc_shared_t *ssp)
 {
 	ssp->sc_state = curthread->t_state;
@@ -215,7 +218,7 @@ schedctl_save(sc_shared_t *ssp)
  * Called by resume after switching to the current thread.
  * Save new thread state and CPU.
  */
-void
+static void
 schedctl_restore(sc_shared_t *ssp)
 {
 	ssp->sc_state = SC_ONPROC;
@@ -227,7 +230,7 @@ schedctl_restore(sc_shared_t *ssp)
  * On fork, remove inherited mappings from the child's address space.
  * The child's threads must call schedctl() to get new shared mappings.
  */
-void
+static void
 schedctl_fork(kthread_t *pt, kthread_t *ct)
 {
 	proc_t *pp = ttoproc(pt);
@@ -253,7 +256,7 @@ schedctl_fork(kthread_t *pt, kthread_t *ct)
 
 /*
  * Returns non-zero if the specified thread shouldn't be preempted at this time.
- * Called by ts_preempt, ts_tick, and ts_update.
+ * Called by ts_preempt(), ts_tick(), and ts_update().
  */
 int
 schedctl_get_nopreempt(kthread_t *t)
@@ -265,7 +268,7 @@ schedctl_get_nopreempt(kthread_t *t)
 
 /*
  * Sets the value of the nopreempt field for the specified thread.
- * Called by ts_preempt to clear the field on preemption.
+ * Called by ts_preempt() to clear the field on preemption.
  */
 void
 schedctl_set_nopreempt(kthread_t *t, short val)
@@ -276,10 +279,11 @@ schedctl_set_nopreempt(kthread_t *t, short val)
 
 
 /*
- * Sets the value of the yield field for the specified thread.  Called by
- * ts_preempt and ts_tick to set the field, and ts_yield to clear it.
- * The kernel never looks at this field so we don't need a schedctl_get_yield
- * function.
+ * Sets the value of the yield field for the specified thread.
+ * Called by ts_preempt() and ts_tick() to set the field, and
+ * ts_yield() to clear it.
+ * The kernel never looks at this field so we don't need a
+ * schedctl_get_yield() function.
  */
 void
 schedctl_set_yield(kthread_t *t, short val)
@@ -290,6 +294,24 @@ schedctl_set_yield(kthread_t *t, short val)
 
 
 /*
+ * Sets the values of the cid and priority fields for the specified thread.
+ * Called from thread_change_pri(), thread_change_epri(), THREAD_CHANGE_PRI().
+ * Called following calls to CL_FORKRET() and CL_ENTERCLASS().
+ */
+void
+schedctl_set_cidpri(kthread_t *t)
+{
+	sc_shared_t *tdp = t->t_schedctl;
+
+	if (tdp != NULL) {
+		tdp->sc_cid = t->t_cid;
+		tdp->sc_cpri = t->t_cpri;
+		tdp->sc_priority = DISP_PRIO(t);
+	}
+}
+
+
+/*
  * Returns non-zero if the specified thread has requested that all
  * signals be blocked.  Called by signal-related code that tests
  * the signal mask of a thread that may not be the current thread
@@ -443,7 +465,7 @@ schedctl_init(void)
 }
 
 
-int
+static int
 schedctl_shared_alloc(sc_shared_t **kaddrp, uintptr_t *uaddrp)
 {
 	proc_t		*p = curproc;
diff --git a/usr/src/uts/common/sys/class.h b/usr/src/uts/common/sys/class.h
index 9988ca3190..9ec496fdbb 100644
--- a/usr/src/uts/common/sys/class.h
+++ b/usr/src/uts/common/sys/class.h
@@ -18,15 +18,15 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*	Copyright (c) 1988 AT&T	*/
 /*	  All Rights Reserved  	*/
 
-
 #ifndef _SYS_CLASS_H
 #define	_SYS_CLASS_H
 
@@ -72,29 +72,30 @@ typedef struct class_ops {
 } class_ops_t;
 
 typedef struct thread_ops {
-	int	(*cl_enterclass)(kthread_id_t, id_t, void *, cred_t *, void *);
+	int	(*cl_enterclass)(kthread_t *, id_t, void *, cred_t *, void *);
 	void	(*cl_exitclass)(void *);
-	int	(*cl_canexit)(kthread_id_t, cred_t *);
-	int	(*cl_fork)(kthread_id_t, kthread_id_t, void *);
-	void	(*cl_forkret)(kthread_id_t, kthread_id_t);
-	void	(*cl_parmsget)(kthread_id_t, void *);
-	int	(*cl_parmsset)(kthread_id_t, void *, id_t, cred_t *);
-	void	(*cl_stop)(kthread_id_t, int, int);
-	void	(*cl_exit)(kthread_id_t);
-	void	(*cl_active)(kthread_id_t);
-	void	(*cl_inactive)(kthread_id_t);
-	pri_t	(*cl_swapin)(kthread_id_t, int);
-	pri_t 	(*cl_swapout)(kthread_id_t, int);
-	void 	(*cl_trapret)(kthread_id_t);
-	void	(*cl_preempt)(kthread_id_t);
-	void	(*cl_setrun)(kthread_id_t);
-	void	(*cl_sleep)(kthread_id_t);
-	void	(*cl_tick)(kthread_id_t);
-	void	(*cl_wakeup)(kthread_id_t);
-	int	(*cl_donice)(kthread_id_t, cred_t *, int, int *);
-	pri_t	(*cl_globpri)(kthread_id_t);
+	int	(*cl_canexit)(kthread_t *, cred_t *);
+	int	(*cl_fork)(kthread_t *, kthread_t *, void *);
+	void	(*cl_forkret)(kthread_t *, kthread_t *);
+	void	(*cl_parmsget)(kthread_t *, void *);
+	int	(*cl_parmsset)(kthread_t *, void *, id_t, cred_t *);
+	void	(*cl_stop)(kthread_t *, int, int);
+	void	(*cl_exit)(kthread_t *);
+	void	(*cl_active)(kthread_t *);
+	void	(*cl_inactive)(kthread_t *);
+	pri_t	(*cl_swapin)(kthread_t *, int);
+	pri_t 	(*cl_swapout)(kthread_t *, int);
+	void 	(*cl_trapret)(kthread_t *);
+	void	(*cl_preempt)(kthread_t *);
+	void	(*cl_setrun)(kthread_t *);
+	void	(*cl_sleep)(kthread_t *);
+	void	(*cl_tick)(kthread_t *);
+	void	(*cl_wakeup)(kthread_t *);
+	int	(*cl_donice)(kthread_t *, cred_t *, int, int *);
+	pri_t	(*cl_globpri)(kthread_t *);
 	void	(*cl_set_process_group)(pid_t, pid_t, pid_t);
-	void	(*cl_yield)(kthread_id_t);
+	void	(*cl_yield)(kthread_t *);
+	int	(*cl_doprio)(kthread_t *, cred_t *, int, int *);
 } thread_ops_t;
 
 typedef struct classfuncs {
@@ -134,8 +135,8 @@ extern int	getcid(char *, id_t *);
 extern int	getcidbyname(char *, id_t *);
 extern int	parmsin(pcparms_t *, pc_vaparms_t *);
 extern int	parmsout(pcparms_t *, pc_vaparms_t *);
-extern int	parmsset(pcparms_t *, kthread_id_t);
-extern void	parmsget(kthread_id_t, pcparms_t *);
+extern int	parmsset(pcparms_t *, kthread_t *);
+extern void	parmsget(kthread_t *, pcparms_t *);
 extern int	vaparmsout(char *, pcparms_t *, pc_vaparms_t *, uio_seg_t);
 
 #endif
@@ -207,6 +208,9 @@ extern int	vaparmsout(char *, pcparms_t *, pc_vaparms_t *, uio_seg_t);
 #define	CL_DONICE(t, cr, inc, ret) \
 	(*(t)->t_clfuncs->cl_donice)(t, cr, inc, ret)
 
+#define	CL_DOPRIO(t, cr, inc, ret) \
+	(*(t)->t_clfuncs->cl_doprio)(t, cr, inc, ret)
+
 #define	CL_GLOBPRI(t)		(*(t)->t_clfuncs->cl_globpri)(t)
 
 #define	CL_SET_PROCESS_GROUP(t, s, b, f) \
diff --git a/usr/src/uts/common/sys/iapriocntl.h b/usr/src/uts/common/sys/iapriocntl.h
index 2c97d4d6e5..ad12db44ae 100644
--- a/usr/src/uts/common/sys/iapriocntl.h
+++ b/usr/src/uts/common/sys/iapriocntl.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,15 +18,15 @@
  *
  * CDDL HEADER END
  */
-/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
-/*	  All Rights Reserved  	*/
-
 
 /*
- * Copyright (c) 2001 by Sun Microsystems, Inc.
- * All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
  */
 
+/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
+/*	  All Rights Reserved  	*/
+
 #ifndef _SYS_IAPRIOCNTL_H
 #define	_SYS_IAPRIOCNTL_H
 
@@ -53,7 +52,6 @@ typedef struct iaparms {
 	pri_t	ia_uprilim;		/* user priority limit */
 	pri_t	ia_upri;		/* user priority */
 	int	ia_mode;		/* interactive on/off */
-	int	ia_nice;		/* present nice value */
 } iaparms_t;
 
 typedef struct iaclass {
@@ -71,7 +69,6 @@ typedef struct iainfo {
 #define	IANPROCS	60
 #define	IA_INTERACTIVE_OFF	0x00	/* thread is not interactive */
 #define	IA_SET_INTERACTIVE	0x01	/* thread is interactive */
-#define	IA_NICED		0x02	/* thread has been niced */
 #define	IA_BOOST	10		/* value for boost */
 
 /*
diff --git a/usr/src/uts/common/sys/priocntl.h b/usr/src/uts/common/sys/priocntl.h
index 6475ed0a4c..1f88f47c66 100644
--- a/usr/src/uts/common/sys/priocntl.h
+++ b/usr/src/uts/common/sys/priocntl.h
@@ -19,14 +19,14 @@
  * CDDL HEADER END
  */
 
-/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
-/*	  All Rights Reserved  	*/
-
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
+/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
+/*	  All Rights Reserved  	*/
+
 #ifndef _SYS_PRIOCNTL_H
 #define	_SYS_PRIOCNTL_H
 
@@ -58,14 +58,15 @@ extern long	priocntl(), priocntlset();
 #define	PC_SETPARMS	2	/* Set scheduling parameters */
 #define	PC_GETPARMS	3	/* Get scheduling parameters */
 #define	PC_ADMIN	4	/* Scheduler administration (used by */
-				/*   dispadmin(1M), not for general use) */
-#define	PC_GETPRIRANGE	5	/* Get global priority range for a class */
+				/* dispadmin(1M), not for general use) */
+#define	PC_GETPRIRANGE	5	/* Get priority range for a class */
 				/* posix.4 scheduling, not for general use */
 #define	PC_DONICE	6	/* Set or get nice value */
 #define	PC_SETXPARMS	7	/* Set extended scheduling parameters */
 #define	PC_GETXPARMS	8	/* Get extended scheduling parameters */
 #define	PC_SETDFLCL	9	/* Set default class, not for general use */
 #define	PC_GETDFLCL	10	/* Get default class, not for general use */
+#define	PC_DOPRIO	11	/* Set or get priority, not for general use */
 
 #define	PC_CLNULL	-1
 
@@ -76,6 +77,9 @@ extern long	priocntl(), priocntlset();
 #define	PC_GETNICE	0
 #define	PC_SETNICE	1
 
+#define	PC_GETPRIO	0
+#define	PC_SETPRIO	1
+
 typedef struct pcinfo {
 	id_t	pc_cid;			/* class id */
 	char	pc_clname[PC_CLNMSZ];	/* class name */
@@ -92,6 +96,12 @@ typedef struct pcnice {
 	int	pc_op;			/* type of operation, set or get */
 } pcnice_t;
 
+typedef struct pcprio {
+	int	pc_op;			/* type of operation, set or get */
+	id_t	pc_cid;			/* class id */
+	int	pc_val;			/* priority value */
+} pcprio_t;
+
 /*
  * The following is used by the priocntl(2) varargs interface (command
  * codes: PC_SETXPARMS and PC_GETXPARMS).
@@ -141,8 +151,8 @@ typedef struct {
 
 typedef struct pcpri {
 	id_t	pc_cid;			/* process class */
-	pri_t	pc_clpmax;		/* class global priority max */
-	pri_t	pc_clpmin;		/* class global priority min */
+	pri_t	pc_clpmax;		/* class priority max */
+	pri_t	pc_clpmin;		/* class priority min */
 } pcpri_t;
 
 /*
diff --git a/usr/src/uts/common/sys/schedctl.h b/usr/src/uts/common/sys/schedctl.h
index 74a534d48c..010b343669 100644
--- a/usr/src/uts/common/sys/schedctl.h
+++ b/usr/src/uts/common/sys/schedctl.h
@@ -43,13 +43,6 @@ extern "C" {
 
 #include <sys/types.h>
 #include <sys/processor.h>
-#ifdef	_KERNEL
-#include <sys/mutex.h>
-#include <sys/thread.h>
-#include <sys/vnode.h>
-#include <sys/cpuvar.h>
-#include <sys/door.h>
-#endif	/* _KERNEL */
 
 /*
  * This "public" portion of the sc_shared data is used by libsched/libc.
@@ -69,7 +62,10 @@ typedef struct sc_shared {
 	volatile char	sc_sigblock;	/* all signals blocked */
 	volatile uchar_t sc_flgs;	/* set only by curthread; see below */
 	volatile processorid_t sc_cpu;	/* last CPU on which LWP ran */
-	int		sc_pad;
+	volatile char	sc_cid;		/* scheduling class id */
+	volatile char	sc_cpri;	/* class priority, -128..127 */
+	volatile uchar_t sc_priority;	/* dispatch priority, 0..255 */
+	char		sc_pad;
 	sc_public_t	sc_preemptctl;	/* preemption control data */
 } sc_shared_t;
 
@@ -100,6 +96,7 @@ void	schedctl_proc_cleanup(void);
 int	schedctl_get_nopreempt(kthread_t *);
 void	schedctl_set_nopreempt(kthread_t *, short);
 void	schedctl_set_yield(kthread_t *, short);
+void	schedctl_set_cidpri(kthread_t *);
 int	schedctl_sigblock(kthread_t *);
 void	schedctl_finish_sigblock(kthread_t *);
 int	schedctl_cancel_pending(void);
diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h
index 7302289ea1..78ded14796 100644
--- a/usr/src/uts/common/sys/thread.h
+++ b/usr/src/uts/common/sys/thread.h
@@ -121,6 +121,7 @@ typedef struct _kthread {
 	uint_t	t_state;	/* thread state	(protected by thread_lock) */
 	pri_t	t_pri;		/* assigned thread priority */
 	pri_t	t_epri;		/* inherited thread priority */
+	pri_t	t_cpri;		/* thread scheduling class priority */
 	char	t_writer;	/* sleeping in lwp_rwlock_lock(RW_WRITE_LOCK) */
 	label_t	t_pcb;		/* pcb, save area when switching */
 	lwpchan_t t_lwpchan;	/* reason for blocking */
@@ -584,6 +585,7 @@ caddr_t	thread_stk_init(caddr_t);	/* init thread stack */
 	pri_t __new_pri = (pri);					\
 	DTRACE_SCHED2(change__pri, kthread_t *, (t), pri_t, __new_pri);	\
 	(t)->t_pri = __new_pri;						\
+	schedctl_set_cidpri(t);						\
 }
 
 /*
diff --git a/usr/src/uts/common/syscall/nice.c b/usr/src/uts/common/syscall/nice.c
index 55db136f7b..056486575c 100644
--- a/usr/src/uts/common/syscall/nice.c
+++ b/usr/src/uts/common/syscall/nice.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,10 +18,14 @@
  *
  * CDDL HEADER END
  */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
 /*	  All Rights Reserved  	*/
-/*	Copyright (c) 1994 Sun Microsystems, Inc. */
-
 
 #pragma ident	"%Z%%M%	%I%	%E% SMI"	/* from SVr4.0 1.15 */
 
@@ -36,6 +39,7 @@
 #include <sys/debug.h>
 #include <sys/class.h>
 #include <sys/mutex.h>
+#include <sys/schedctl.h>
 
 /*
  * We support the nice system call for compatibility although
@@ -49,13 +53,14 @@ nice(int niceness)
 {
 	int error = 0;
 	int err, retval;
-	kthread_id_t t;
-	proc_t	*p = curproc;
+	kthread_t *t;
+	proc_t *p = curproc;
 
 	mutex_enter(&p->p_lock);
 	t = p->p_tlist;
 	do {
 		err = CL_DONICE(t, CRED(), niceness, &retval);
+		schedctl_set_cidpri(t);
 		if (error == 0 && err)
 			error = set_errno(err);
 	} while ((t = t->t_forw) != p->p_tlist);
author	raf <none@none>	2008-03-20 14:44:26 -0700
committer	raf <none@none>	2008-03-20 14:44:26 -0700
commit	d4204c85a44d2589b9afff2c81db7044e97f2d1d (patch)
tree	c46b62d4b195c18f308a9612c919ac3000e2db40
parent	32fd284719e6d05c88b17f3b407c96e4aef0c1ee (diff)
download	illumos-gate-d4204c85a44d2589b9afff2c81db7044e97f2d1d.tar.gz