diff options
author | raf <none@none> | 2008-03-20 14:44:26 -0700 |
---|---|---|
committer | raf <none@none> | 2008-03-20 14:44:26 -0700 |
commit | d4204c85a44d2589b9afff2c81db7044e97f2d1d (patch) | |
tree | c46b62d4b195c18f308a9612c919ac3000e2db40 | |
parent | 32fd284719e6d05c88b17f3b407c96e4aef0c1ee (diff) | |
download | illumos-gate-d4204c85a44d2589b9afff2c81db7044e97f2d1d.tar.gz |
PSARC 2007/661 delete sched_nice
PSARC 2008/039 POSIX scheduling interfaces
6647542 POSIX scheduling should be compatible with Solaris scheduling classes
--HG--
rename : usr/src/lib/libc/inc/rtsched.h => deleted_files/usr/src/lib/libc/inc/rtsched.h
rename : usr/src/lib/libc/port/threads/rtsched.c => deleted_files/usr/src/lib/libc/port/threads/rtsched.c
47 files changed, 2046 insertions, 1609 deletions
diff --git a/usr/src/lib/libc/inc/rtsched.h b/deleted_files/usr/src/lib/libc/inc/rtsched.h index 90ae11c3b2..90ae11c3b2 100644 --- a/usr/src/lib/libc/inc/rtsched.h +++ b/deleted_files/usr/src/lib/libc/inc/rtsched.h diff --git a/usr/src/lib/libc/port/threads/rtsched.c b/deleted_files/usr/src/lib/libc/port/threads/rtsched.c index c76e0b722a..c76e0b722a 100644 --- a/usr/src/lib/libc/port/threads/rtsched.c +++ b/deleted_files/usr/src/lib/libc/port/threads/rtsched.c diff --git a/usr/src/cmd/mdb/common/modules/libc/libc.c b/usr/src/cmd/mdb/common/modules/libc/libc.c index 584aea7ad4..18114b0021 100644 --- a/usr/src/cmd/mdb/common/modules/libc/libc.c +++ b/usr/src/cmd/mdb/common/modules/libc/libc.c @@ -477,15 +477,15 @@ d_ulwp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) ulwp.ul_ustack.ss_size, stack_flags(&ulwp.ul_ustack)); - HD("ix lwpid pri mappedpri policy pri_mapped"); + HD("ix lwpid pri epri policy cid"); mdb_printf(OFFSTR "%-10d %-10d %-10d %-10d %-10d %d\n", OFFSET(ul_ix), ulwp.ul_ix, ulwp.ul_lwpid, ulwp.ul_pri, - ulwp.ul_mappedpri, + ulwp.ul_epri, ulwp.ul_policy, - ulwp.ul_pri_mapped); + ulwp.ul_cid); HD("cursig pleasestop stop signalled dead unwind"); mdb_printf(OFFSTR "%-10d ", @@ -520,15 +520,15 @@ d_ulwp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) ulwp.ul_max_spinners, ulwp.ul_door_noreserve); - HD("queue_fifo c'w'defer e'detect' async_safe pad1 save_state"); + HD("queue_fifo c'w'defer e'detect' async_safe rt rtqueued"); mdb_printf(OFFSTR "%-10d %-10d %-10d %-10d %-10d %d\n", OFFSET(ul_queue_fifo), ulwp.ul_queue_fifo, ulwp.ul_cond_wait_defer, ulwp.ul_error_detection, ulwp.ul_async_safe, - ulwp.ul_pad1, - ulwp.ul_save_state); + ulwp.ul_rt, + ulwp.ul_rtqueued); HD("adapt'spin queue_spin critical sigdefer vfork"); mdb_printf(OFFSTR "%-10d %-10d %-10d %-10d %d\n", @@ -607,12 +607,11 @@ d_ulwp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) prt_addr(ulwp.ul_sleepq, 1), prt_addr(ulwp.ul_cvmutex, 0)); - HD("mxchain epri emappedpri"); - mdb_printf(OFFSTR "%s %-10d %d\n", + HD("mxchain save_state"); + mdb_printf(OFFSTR "%s %d\n", OFFSET(ul_mxchain), prt_addr(ulwp.ul_mxchain, 1), - ulwp.ul_epri, - ulwp.ul_emappedpri); + ulwp.ul_save_state); HD("rdlockcnt rd_rwlock rd_count"); mdb_printf(OFFSTR "%-21d %s %d\n", @@ -629,7 +628,7 @@ d_ulwp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) prt_addr(ulwp.ul_tpdp, 0)); HD("siglink s'l'spin s'l'spin2 s'l'sleep s'l'wakeup"); - mdb_printf(OFFSTR "%s %-10d %-10d %-10d %-10d\n", + mdb_printf(OFFSTR "%s %-10d %-10d %-10d %d\n", OFFSET(ul_siglink), prt_addr(ulwp.ul_siglink, 1), ulwp.ul_spin_lock_spin, @@ -637,6 +636,13 @@ d_ulwp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) ulwp.ul_spin_lock_sleep, ulwp.ul_spin_lock_wakeup); + HD("&queue_root rtclassid pilocks"); + mdb_printf(OFFSTR "%s %-10d %d\n", + OFFSET(ul_queue_root), + prt_addr((void *)(addr + OFFSET(ul_queue_root)), 1), + ulwp.ul_rtclassid, + ulwp.ul_pilocks); + /* * The remainder of the ulwp_t structure * is invalid if this is a replacement. diff --git a/usr/src/cmd/priocntl/rtpriocntl.c b/usr/src/cmd/priocntl/rtpriocntl.c index 8a7340589f..12efb5dc83 100644 --- a/usr/src/cmd/priocntl/rtpriocntl.c +++ b/usr/src/cmd/priocntl/rtpriocntl.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -88,8 +88,8 @@ int main(int argc, char *argv[]) { int c; - int lflag, dflag, sflag, pflag, tflag, rflag, eflag, iflag; - int qflag; + int lflag, dflag, sflag, pflag; + int tflag, rflag, eflag, iflag, qflag; pri_t rtpri; long tqntm; long res; @@ -101,8 +101,8 @@ main(int argc, char *argv[]) (void) strlcpy(cmdpath, argv[0], MAXPATHLEN); (void) strlcpy(basenm, basename(argv[0]), BASENMSZ); - qflag = - lflag = dflag = sflag = pflag = tflag = rflag = eflag = iflag = 0; + lflag = dflag = sflag = pflag = 0; + tflag = rflag = eflag = iflag = qflag = 0; while ((c = getopt(argc, argv, "ldsp:t:r:q:ec:i:")) != -1) { switch (c) { @@ -246,7 +246,7 @@ main(int argc, char *argv[]) /* - * Print our class name and the maximum configured real-time priority. + * Print our class name and the configured user priority range. */ static void print_rtinfo(void) @@ -260,7 +260,7 @@ print_rtinfo(void) if (priocntl(0, 0, PC_GETCID, (caddr_t)&pcinfo) == -1) fatalerr("\tCan't get maximum configured RT priority\n"); - (void) printf("\tMaximum Configured RT Priority: %d\n", + (void) printf("\tConfigured RT User Priority Range: 0 through %d\n", ((rtinfo_t *)pcinfo.pc_clinfo)->rt_maxpri); } diff --git a/usr/src/cmd/smserverd/smediad.c b/usr/src/cmd/smserverd/smediad.c index d9512604e0..b69be16694 100644 --- a/usr/src/cmd/smserverd/smediad.c +++ b/usr/src/cmd/smserverd/smediad.c @@ -18,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -2969,7 +2970,9 @@ main(int argc, char **argv) if (pmclose) { (void) pthread_attr_init(&attr); (void) pthread_attr_setscope(&attr, - PTHREAD_SCOPE_SYSTEM | PTHREAD_CREATE_DETACHED); + PTHREAD_SCOPE_SYSTEM); + (void) pthread_attr_setdetachstate(&attr, + PTHREAD_CREATE_DETACHED); if (pthread_create(NULL, &attr, closedown, NULL) != 0) { syslog(LOG_ERR, gettext( "cannot create closedown thread")); @@ -2998,7 +3001,9 @@ main(int argc, char **argv) svcstart_level = get_run_level(); (void) pthread_attr_init(&attr); (void) pthread_attr_setscope(&attr, - PTHREAD_SCOPE_SYSTEM | PTHREAD_CREATE_DETACHED); + PTHREAD_SCOPE_SYSTEM); + (void) pthread_attr_setdetachstate(&attr, + PTHREAD_CREATE_DETACHED); if (pthread_create(NULL, &attr, closedown, NULL) != 0) { syslog(LOG_ERR, gettext( "cannot create closedown thread")); diff --git a/usr/src/head/sched.h b/usr/src/head/sched.h index 5be792bbd3..7460ff82c3 100644 --- a/usr/src/head/sched.h +++ b/usr/src/head/sched.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 1993-2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -37,24 +37,21 @@ extern "C" { #endif struct sched_param { - int sched_priority; /* process execution scheduling priority */ - int sched_nicelim; /* nice value limit for SCHED_OTHER policy */ - int sched_nice; /* nice value for SCHED_OTHER policy */ - int sched_pad[6]; /* pad to the same size as pcparms_t of */ - /* sys/priocntl.h */ - /* sizeof(sched_priority) + */ - /* sizeof(pcparms_t.pc_clparms) */ + int sched_priority; /* scheduling priority */ + int sched_pad[8]; }; /* - * POSIX scheduling policies + * POSIX scheduling policies */ -#define SCHED_OTHER 0 -#define SCHED_FIFO 1 /* run to completion */ -#define SCHED_RR 2 /* round-robin */ -#define SCHED_SYS 3 /* sys scheduling class */ -#define SCHED_IA 4 /* interactive class */ -#define _SCHED_NEXT 5 /* first unassigned policy number */ +#define SCHED_OTHER 0 /* traditional time-sharing scheduling class */ +#define SCHED_FIFO 1 /* real-time class: run to completion */ +#define SCHED_RR 2 /* real-time class: round-robin */ +#define SCHED_SYS 3 /* system scheduling class */ +#define SCHED_IA 4 /* interactive time-sharing class */ +#define SCHED_FSS 5 /* fair-share scheduling class */ +#define SCHED_FX 6 /* fixed-priority scheduling class */ +#define _SCHED_NEXT 7 /* first unassigned policy number */ /* * function prototypes diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile index e1d52f7c61..9946d95e00 100644 --- a/usr/src/lib/libc/amd64/Makefile +++ b/usr/src/lib/libc/amd64/Makefile @@ -743,7 +743,6 @@ THREADSOBJS= \ pthr_mutex.o \ pthr_rwlock.o \ pthread.o \ - rtsched.o \ rwlock.o \ scalls.o \ sema.o \ @@ -917,8 +916,6 @@ CFLAGS64 += -xinline= THREAD_DEBUG = $(NOT_RELEASE_BUILD)THREAD_DEBUG = -DTHREAD_DEBUG -CFLAGS64 += $(THREAD_DEBUG) - ALTPICS= $(TRACEOBJS:%=pics/%) $(DYNLIB) := PICS += $(ROOTFS_LIBDIR64)/libc_i18n.a @@ -926,7 +923,7 @@ $(DYNLIB) := BUILD.SO = $(LD) -o $@ -G $(DYNFLAGS) $(PICS) $(ALTPICS) MAPFILES = ../port/mapfile-vers mapfile-vers -CPPFLAGS= -D_REENTRANT -D$(MACH64) -D__$(MACH64) \ +CPPFLAGS= -D_REENTRANT -D$(MACH64) -D__$(MACH64) $(THREAD_DEBUG) \ -I. -I$(LIBCBASE)/inc -I../inc $(CPPFLAGS.master) -I/usr/include ASFLAGS= $(AS_PICFLAGS) -P -D__STDC__ -D_ASM $(CPPFLAGS) \ $(amd64_AS_XARCH) @@ -1021,9 +1018,9 @@ TIL= \ pthr_rwlock.o \ pthread.o \ rand.o \ - rtsched.o \ rwlock.o \ scalls.o \ + sched.o \ sema.o \ sigaction.o \ sigev_thread.o \ diff --git a/usr/src/lib/libc/amd64/threads/machdep.c b/usr/src/lib/libc/amd64/threads/machdep.c index d600fb4cd5..cd96dfb1d9 100644 --- a/usr/src/lib/libc/amd64/threads/machdep.c +++ b/usr/src/lib/libc/amd64/threads/machdep.c @@ -18,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -78,11 +79,15 @@ _thr_setup(ulwp_t *self) self->ul_ustack.ss_size = self->ul_stksiz; self->ul_ustack.ss_flags = 0; (void) _private_setustack(&self->ul_ustack); + + update_sched(self); tls_setup(); /* signals have been deferred until now */ sigon(self); + if (self->ul_cancel_pending == 2 && !self->ul_cancel_disabled) + return (NULL); /* cancelled by pthread_create() */ return (self->ul_startpc(self->ul_startarg)); } @@ -148,7 +153,7 @@ __csigsetjmp(sigjmp_buf env, int savemask, gregset_t rs) ucp->uc_stack = self->ul_ustack; else { ucp->uc_stack.ss_sp = - (void *)(self->ul_stktop - self->ul_stksiz); + (void *)(self->ul_stktop - self->ul_stksiz); ucp->uc_stack.ss_size = self->ul_stksiz; ucp->uc_stack.ss_flags = 0; } diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com index c773e4f8a0..93e3a5cb69 100644 --- a/usr/src/lib/libc/i386/Makefile.com +++ b/usr/src/lib/libc/i386/Makefile.com @@ -788,7 +788,6 @@ THREADSOBJS= \ pthr_mutex.o \ pthr_rwlock.o \ pthread.o \ - rtsched.o \ rwlock.o \ scalls.o \ sema.o \ @@ -964,8 +963,6 @@ CFLAGS += $(XINLINE) THREAD_DEBUG = $(NOT_RELEASE_BUILD)THREAD_DEBUG = -DTHREAD_DEBUG -CFLAGS += $(THREAD_DEBUG) - ALTPICS= $(TRACEOBJS:%=pics/%) $(DYNLIB) := PICS += $(ROOTFS_LIBDIR)/libc_i18n.a @@ -977,7 +974,7 @@ MAPFILES = ../port/mapfile-vers ../i386/mapfile-vers # EXTN_CPPFLAGS and EXTN_CFLAGS set in enclosing Makefile # CFLAGS += $(EXTN_CFLAGS) -CPPFLAGS= -D_REENTRANT -Di386 $(EXTN_CPPFLAGS) \ +CPPFLAGS= -D_REENTRANT -Di386 $(EXTN_CPPFLAGS) $(THREAD_DEBUG) \ -I$(LIBCBASE)/inc -I../inc $(CPPFLAGS.master) ASFLAGS= $(AS_PICFLAGS) -P -D__STDC__ -D_ASM $(CPPFLAGS) $(i386_AS_XARCH) @@ -1085,9 +1082,9 @@ TIL= \ pthr_rwlock.o \ pthread.o \ rand.o \ - rtsched.o \ rwlock.o \ scalls.o \ + sched.o \ sema.o \ sigaction.o \ sigev_thread.o \ diff --git a/usr/src/lib/libc/i386/threads/machdep.c b/usr/src/lib/libc/i386/threads/machdep.c index 56e7446924..e28e2cd480 100644 --- a/usr/src/lib/libc/i386/threads/machdep.c +++ b/usr/src/lib/libc/i386/threads/machdep.c @@ -18,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -104,11 +105,14 @@ _thr_setup(ulwp_t *self) self->ul_ustack.ss_flags = 0; (void) _private_setustack(&self->ul_ustack); + update_sched(self); tls_setup(); /* signals have been deferred until now */ sigon(self); + if (self->ul_cancel_pending == 2 && !self->ul_cancel_disabled) + return (NULL); /* cancelled by pthread_create() */ return (self->ul_startpc(self->ul_startarg)); } @@ -171,7 +175,7 @@ __csigsetjmp(greg_t cs, greg_t ss, greg_t gs, ucp->uc_stack = self->ul_ustack; else { ucp->uc_stack.ss_sp = - (void *)(self->ul_stktop - self->ul_stksiz); + (void *)(self->ul_stktop - self->ul_stksiz); ucp->uc_stack.ss_size = self->ul_stksiz; ucp->uc_stack.ss_flags = 0; } diff --git a/usr/src/lib/libc/inc/thr_uberdata.h b/usr/src/lib/libc/inc/thr_uberdata.h index 0fc0b652d0..4456b3d3df 100644 --- a/usr/src/lib/libc/inc/thr_uberdata.h +++ b/usr/src/lib/libc/inc/thr_uberdata.h @@ -207,7 +207,6 @@ typedef union { * to be optimized for speed. */ - /* double the default stack size for 64-bit processes */ #ifdef _LP64 #define MINSTACK (8 * 1024) @@ -216,18 +215,10 @@ typedef union { #define MINSTACK (4 * 1024) #define DEFAULTSTACK (1024 * 1024) #endif -#define TSD_NKEYS _POSIX_THREAD_KEYS_MAX - -#define THREAD_MIN_PRIORITY 0 -#define THREAD_MAX_PRIORITY 127 - -#define PRIO_SET 0 /* set priority and policy */ -#define PRIO_SET_PRIO 1 /* set priority only */ -#define PRIO_INHERIT 2 -#define PRIO_DISINHERIT 3 #define MUTEX_TRY 0 #define MUTEX_LOCK 1 +#define MUTEX_NOCEIL 0x40 #if defined(__x86) @@ -359,35 +350,83 @@ typedef struct { /* - * Sleep queues for USYNC_THREAD condvars and mutexes. - * The size and alignment is 64 bytes to reduce cache conflicts. + * Sleep queue root for USYNC_THREAD condvars and mutexes. + * There is a default queue root for each queue head (see below). + * Also, each ulwp_t contains a queue root that can be used + * when the thread is enqueued on the queue, if necessary + * (when more than one wchan hashes to the same queue head). + */ +typedef struct queue_root { + struct queue_root *qr_next; + struct queue_root *qr_prev; + struct ulwp *qr_head; + struct ulwp *qr_tail; + void *qr_wchan; + uint32_t qr_rtcount; + uint32_t qr_qlen; + uint32_t qr_qmax; +} queue_root_t; + +#ifdef _SYSCALL32 +typedef struct queue_root32 { + caddr32_t qr_next; + caddr32_t qr_prev; + caddr32_t qr_head; + caddr32_t qr_tail; + caddr32_t qr_wchan; + uint32_t qr_rtcount; + uint32_t qr_qlen; + uint32_t qr_qmax; +} queue_root32_t; +#endif + +/* + * Sleep queue heads for USYNC_THREAD condvars and mutexes. + * The size and alignment is 128 bytes to reduce cache conflicts. + * Each queue head points to a list of queue roots, defined above. + * Each queue head contains a default queue root for use when only one + * is needed. It is always at the tail of the queue root hash chain. */ typedef union { - uint64_t qh_64[8]; + uint64_t qh_64[16]; struct { mutex_t q_lock; uint8_t q_qcnt; - uint8_t q_pad[7]; - uint64_t q_lockcount; + uint8_t q_type; /* MX or CV */ + uint8_t q_pad1[2]; + uint32_t q_lockcount; uint32_t q_qlen; uint32_t q_qmax; - struct ulwp *q_head; - struct ulwp *q_tail; + void *q_wchan; /* valid only while locked */ + struct queue_root *q_root; /* valid only while locked */ + struct queue_root *q_hlist; +#if !defined(_LP64) + caddr_t q_pad2[3]; +#endif + queue_root_t q_def_root; + uint32_t q_hlen; + uint32_t q_hmax; } qh_qh; } queue_head_t; #define qh_lock qh_qh.q_lock #define qh_qcnt qh_qh.q_qcnt +#define qh_type qh_qh.q_type +#if defined(THREAD_DEBUG) #define qh_lockcount qh_qh.q_lockcount #define qh_qlen qh_qh.q_qlen #define qh_qmax qh_qh.q_qmax -#define qh_head qh_qh.q_head -#define qh_tail qh_qh.q_tail - -/* queue types passed to queue_lock() and enqueue() */ +#endif +#define qh_wchan qh_qh.q_wchan +#define qh_root qh_qh.q_root +#define qh_hlist qh_qh.q_hlist +#define qh_def_root qh_qh.q_def_root +#define qh_hlen qh_qh.q_hlen +#define qh_hmax qh_qh.q_hmax + +/* queue types passed to queue_lock() */ #define MX 0 #define CV 1 -#define FIFOQ 0x10 /* or'ing with FIFOQ asks for FIFO queueing */ #define QHASHSHIFT 9 /* number of hashing bits */ #define QHASHSIZE (1 << QHASHSHIFT) /* power of 2 (1<<9 == 512) */ #define QUEUE_HASH(wchan, type) ((uint_t) \ @@ -397,17 +436,29 @@ typedef union { extern queue_head_t *queue_lock(void *, int); extern void queue_unlock(queue_head_t *); -extern void enqueue(queue_head_t *, struct ulwp *, void *, int); -extern struct ulwp *dequeue(queue_head_t *, void *, int *); -extern struct ulwp *queue_waiter(queue_head_t *, void *); -extern struct ulwp *queue_unlink(queue_head_t *, +extern void enqueue(queue_head_t *, struct ulwp *, int); +extern struct ulwp *dequeue(queue_head_t *, int *); +extern struct ulwp **queue_slot(queue_head_t *, struct ulwp **, int *); +extern struct ulwp *queue_waiter(queue_head_t *); +extern int dequeue_self(queue_head_t *); +extern void queue_unlink(queue_head_t *, struct ulwp **, struct ulwp *); -extern uint8_t dequeue_self(queue_head_t *, void *); extern void unsleep_self(void); extern void spin_lock_set(mutex_t *); extern void spin_lock_clear(mutex_t *); /* + * Scheduling class information structure. + */ +typedef struct { + short pcc_state; + short pcc_policy; + pri_t pcc_primin; + pri_t pcc_primax; + pcinfo_t pcc_info; +} pcclass_t; + +/* * Memory block for chain of owned ceiling mutexes. */ typedef struct mxchain { @@ -491,10 +542,10 @@ typedef struct ulwp { stack_t ul_ustack; /* current stack boundaries */ int ul_ix; /* hash index */ lwpid_t ul_lwpid; /* thread id, aka the lwp id */ - pri_t ul_pri; /* priority known to the library */ - pri_t ul_mappedpri; /* priority known to the application */ + pri_t ul_pri; /* scheduling priority */ + pri_t ul_epri; /* real-time ceiling priority */ char ul_policy; /* scheduling policy */ - char ul_pri_mapped; /* != 0 means ul_mappedpri is valid */ + char ul_cid; /* scheduling class id */ union { struct { char cursig; /* deferred signal number */ @@ -524,8 +575,8 @@ typedef struct ulwp { char ul_cond_wait_defer; /* thread_cond_wait_defer */ char ul_error_detection; /* thread_error_detection */ char ul_async_safe; /* thread_async_safe */ - char ul_pad1; - char ul_save_state; /* bind_guard() interface to ld.so.1 */ + char ul_rt; /* found on an RT queue */ + char ul_rtqueued; /* was RT when queued */ int ul_adaptive_spin; /* thread_adaptive_spin */ int ul_queue_spin; /* thread_queue_spin */ volatile int ul_critical; /* non-zero == in a critical region */ @@ -543,8 +594,8 @@ typedef struct ulwp { int ul_errno; /* per-thread errno */ int *ul_errnop; /* pointer to errno or self->ul_errno */ __cleanup_t *ul_clnup_hdr; /* head of cleanup handlers list */ - uberflags_t *volatile ul_schedctl_called; /* ul_schedctl is set up */ - volatile sc_shared_t *volatile ul_schedctl; /* schedctl data */ + uberflags_t *ul_schedctl_called; /* ul_schedctl is set up */ + volatile sc_shared_t *ul_schedctl; /* schedctl data */ int ul_bindflags; /* bind_guard() interface to ld.so.1 */ uint_t ul_libc_locks; /* count of cancel_safe_mutex_lock()s */ tsd_t *ul_stsd; /* slow TLS for keys >= TSD_NFAST */ @@ -562,8 +613,7 @@ typedef struct ulwp { queue_head_t *ul_sleepq; /* sleep queue thread is waiting on */ mutex_t *ul_cvmutex; /* mutex dropped when waiting on a cv */ mxchain_t *ul_mxchain; /* chain of owned ceiling mutexes */ - pri_t ul_epri; /* effective scheduling priority */ - pri_t ul_emappedpri; /* effective mapped priority */ + int ul_save_state; /* bind_guard() interface to ld.so.1 */ uint_t ul_rdlockcnt; /* # entries in ul_readlock array */ /* 0 means there is but a single entry */ union { /* single entry or pointer to array */ @@ -584,6 +634,9 @@ typedef struct ulwp { uint_t ul_spin_lock_spin2; uint_t ul_spin_lock_sleep; uint_t ul_spin_lock_wakeup; + queue_root_t ul_queue_root; /* root of a sleep queue */ + id_t ul_rtclassid; /* real-time class id */ + uint_t ul_pilocks; /* count of PI locks held */ /* the following members *must* be last in the structure */ /* they are discarded when ulwp is replaced on thr_exit() */ sigset_t ul_sigmask; /* thread's current signal mask */ @@ -889,10 +942,10 @@ typedef struct ulwp32 { stack32_t ul_ustack; /* current stack boundaries */ int ul_ix; /* hash index */ lwpid_t ul_lwpid; /* thread id, aka the lwp id */ - pri_t ul_pri; /* priority known to the library */ - pri_t ul_mappedpri; /* priority known to the application */ + pri_t ul_pri; /* scheduling priority */ + pri_t ul_epri; /* real-time ceiling priority */ char ul_policy; /* scheduling policy */ - char ul_pri_mapped; /* != 0 means ul_mappedpri is valid */ + char ul_cid; /* scheduling class id */ union { struct { char cursig; /* deferred signal number */ @@ -922,8 +975,8 @@ typedef struct ulwp32 { char ul_cond_wait_defer; /* thread_cond_wait_defer */ char ul_error_detection; /* thread_error_detection */ char ul_async_safe; /* thread_async_safe */ - char ul_pad1; - char ul_save_state; /* bind_guard() interface to ld.so.1 */ + char ul_rt; /* found on an RT queue */ + char ul_rtqueued; /* was RT when queued */ int ul_adaptive_spin; /* thread_adaptive_spin */ int ul_queue_spin; /* thread_queue_spin */ int ul_critical; /* non-zero == in a critical region */ @@ -960,8 +1013,7 @@ typedef struct ulwp32 { caddr32_t ul_sleepq; /* sleep queue thread is waiting on */ caddr32_t ul_cvmutex; /* mutex dropped when waiting on a cv */ caddr32_t ul_mxchain; /* chain of owned ceiling mutexes */ - pri_t ul_epri; /* effective scheduling priority */ - pri_t ul_emappedpri; /* effective mapped priority */ + int ul_save_state; /* bind_guard() interface to ld.so.1 */ uint_t ul_rdlockcnt; /* # entries in ul_readlock array */ /* 0 means there is but a single entry */ union { /* single entry or pointer to array */ @@ -982,6 +1034,9 @@ typedef struct ulwp32 { uint_t ul_spin_lock_spin2; uint_t ul_spin_lock_sleep; uint_t ul_spin_lock_wakeup; + queue_root32_t ul_queue_root; /* root of a sleep queue */ + id_t ul_rtclassid; /* real-time class id */ + uint_t ul_pilocks; /* count of PI locks held */ /* the following members *must* be last in the structure */ /* they are discarded when ulwp is replaced on thr_exit() */ sigset32_t ul_sigmask; /* thread's current signal mask */ @@ -1096,6 +1151,10 @@ extern greg_t stkptr(void); #define __attribute__(string) #endif +/* Fetch the dispatch (kernel) priority of a thread */ +#define real_priority(ulwp) \ + ((ulwp)->ul_schedctl? (ulwp)->ul_schedctl->sc_priority : 0) + /* * Implementation functions. Not visible outside of the library itself. */ @@ -1105,8 +1164,8 @@ extern void setgregs(ulwp_t *, gregset_t); extern void thr_panic(const char *); #pragma rarely_called(thr_panic) extern ulwp_t *find_lwp(thread_t); -extern int real_priority(ulwp_t *); extern void finish_init(void); +extern void update_sched(ulwp_t *); extern void queue_alloc(void); extern void tsd_exit(void); extern void tsd_free(ulwp_t *); @@ -1356,6 +1415,8 @@ extern int __mutex_lock(mutex_t *); extern int __mutex_trylock(mutex_t *); extern int __mutex_unlock(mutex_t *); extern int mutex_is_held(mutex_t *); +extern int mutex_lock_internal(mutex_t *, timespec_t *, int); +extern int mutex_unlock_internal(mutex_t *, int); extern int _cond_init(cond_t *, int, void *); extern int _cond_signal(cond_t *); @@ -1385,8 +1446,7 @@ extern int _thr_continue(thread_t); extern int _thr_create(void *, size_t, void *(*)(void *), void *, long, thread_t *); extern int _thrp_create(void *, size_t, void *(*)(void *), void *, long, - thread_t *, pri_t, int, size_t); -extern int _thr_getprio(thread_t, int *); + thread_t *, size_t); extern int _thr_getspecific(thread_key_t, void **); extern int _thr_join(thread_t, thread_t *, void **); extern int _thr_keycreate(thread_key_t *, PFrV); @@ -1407,13 +1467,15 @@ extern void _thr_terminate(void *); extern void _thr_exit(void *); extern void _thrp_exit(void); +extern const pcclass_t *get_info_by_class(id_t); +extern const pcclass_t *get_info_by_policy(int); +extern void _membar_producer(void); +extern void _membar_consumer(void); extern const thrattr_t *def_thrattr(void); -extern int _thread_setschedparam_main(pthread_t, int, - const struct sched_param *, int); -extern int _validate_rt_prio(int, int); -extern int _thrp_setlwpprio(lwpid_t, int, int); -extern pri_t map_rtpri_to_gp(pri_t); -extern int get_info_by_policy(int); +extern id_t setparam(idtype_t, id_t, int, int); +extern id_t setprio(idtype_t, id_t, int, int *); +extern id_t getparam(idtype_t, id_t, int *, struct sched_param *); +extern long _private_priocntl(idtype_t, id_t, int, void *); /* * System call wrappers (direct interfaces to the kernel) diff --git a/usr/src/lib/libc/port/gen/priocntl.c b/usr/src/lib/libc/port/gen/priocntl.c index 36bd4ddb38..71aae38c63 100644 --- a/usr/src/lib/libc/port/gen/priocntl.c +++ b/usr/src/lib/libc/port/gen/priocntl.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,20 +29,19 @@ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ - -#include "synonyms.h" -#include <sys/types.h> -#include <sys/procset.h> -#include <sys/priocntl.h> -#include <stdarg.h> -#include <errno.h> +#include "synonyms.h" +#include <sys/types.h> +#include <sys/procset.h> +#include <sys/priocntl.h> +#include <stdarg.h> +#include <errno.h> /* - * The declaration of __priocntlset() and __priocntl() was in prior releases - * in <sys/priocntl.h>. They are used to define PC_VERSION at compile time, - * based on the contents of the header file. This behavior is now changed. - * Old binaries call __priocntl() and __priocntlset() instead priocntl() and - * priocntlset(). New binaries call priocntl() and priocntlset(). + * The declarations of __priocntlset() and __priocntl() were in prior releases + * in <sys/priocntl.h>. They are used to define PC_VERSION at compile time, + * based on the contents of the header file. This behavior is now changed. + * Old binaries call __priocntl() and __priocntlset() instead of priocntl() + * and priocntlset(). New binaries call priocntl() and priocntlset(). */ /* @@ -50,12 +49,6 @@ */ extern long __priocntlset(int, procset_t *, int, caddr_t, ...); -/* - * prototype declaration - */ -long __priocntl(int, idtype_t, id_t, int, caddr_t); - - static int pc_vaargs2parms(va_list valist, pc_vaparms_t *vp); long @@ -68,6 +61,23 @@ __priocntl(int pc_version, idtype_t idtype, id_t id, int cmd, caddr_t arg) return (__priocntlset(pc_version, &procset, cmd, arg, 0)); } +/* + * Internally to libc, we call this function rather than priocntl() + * when the cmd is not PC_GETXPARMS or PC_SETXPARMS. We do this + * for the sake of calling common code in various places. One of + * these places is in spawn() and spawnp(), where we must not call + * any function that is exported from libc while in the child of vfork(). + */ +long +_private_priocntl(idtype_t idtype, id_t id, int cmd, void *arg) +{ + extern long _private__priocntlset(int, procset_t *, int, caddr_t, ...); + procset_t procset; + + setprocset(&procset, POP_AND, idtype, id, P_ALL, 0); + return (_private__priocntlset(PC_VERSION, &procset, cmd, arg, 0)); +} + /*VARARGS3*/ long diff --git a/usr/src/lib/libc/port/gen/setpriority.c b/usr/src/lib/libc/port/gen/setpriority.c index bb5f2848ee..d737c2ca19 100644 --- a/usr/src/lib/libc/port/gen/setpriority.c +++ b/usr/src/lib/libc/port/gen/setpriority.c @@ -18,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -41,6 +42,7 @@ #include "synonyms.h" #include <string.h> +#include <limits.h> #include <sys/types.h> #include <sys/time.h> #include <sys/resource.h> @@ -138,7 +140,7 @@ getpriority(int which, id_t who) pcnice.pc_val = 0; pcnice.pc_op = PC_GETNICE; - if (priocntl(idtype, id, PC_DONICE, (caddr_t)&pcnice) == -1) + if (priocntl(idtype, id, PC_DONICE, &pcnice) == -1) return (-1); else return (pcnice.pc_val); @@ -172,50 +174,41 @@ setpriority(int which, id_t who, int prio) else id = who; - if (prio > 19) - prio = 19; - else if (prio < -20) - prio = -20; + if (prio > NZERO - 1) + prio = NZERO - 1; + else if (prio < -NZERO) + prio = -NZERO; pcnice.pc_val = prio; pcnice.pc_op = PC_SETNICE; - ret = priocntl(idtype, id, PC_DONICE, (caddr_t)&pcnice); + ret = priocntl(idtype, id, PC_DONICE, &pcnice); if (ret != 0 && errno == EPERM) { - int incr; - int tmp; pcnice_t gpcnice = { 0, PC_GETNICE }; - priv_set_t *pset; + priv_set_t *pset = NULL; /* * The priocntl PC_DONICE subcommand returns EPERM if we lack * sufficient privileges to carry out the operation, but - * setpriority(3C) needs to return EACCES. We can't just change - * EPERM to EACCES, because there are other conditions which - * legitimately cause EPERM (such as an euid/ruid mismatch + * setpriority(3C) may need to return EACCES. We can't just + * change EPERM to EACCES, because there are other conditions + * which legitimately cause EPERM (such as an euid/ruid mismatch * between the current process and the target.). - */ - if ((tmp = priocntl(idtype, id, PC_DONICE, - (caddr_t)&gpcnice)) != 0) - return (tmp); - - incr = prio - gpcnice.pc_val; - - if ((pset = priv_allocset()) == NULL || - getppriv(PRIV_EFFECTIVE, pset) != 0) - return (-1); - - /* + * * setpriority(3C) must return EACCES if we lack the privilege * checked for below and we are trying to increase the process * priority (by lowering the numeric value of its priority). */ - if ((incr < 0 || incr > 2 * NZERO) && - !priv_ismember(pset, "proc_priocntl")) - errno = EACCES; - - priv_freeset(pset); + if (priocntl(idtype, id, PC_DONICE, &gpcnice) == 0 && + prio < gpcnice.pc_val) { + if ((pset = priv_allocset()) != NULL && + getppriv(PRIV_EFFECTIVE, pset) == 0 && + !priv_ismember(pset, "proc_priocntl")) + errno = EACCES; + if (pset != NULL) + priv_freeset(pset); + } } return (ret); diff --git a/usr/src/lib/libc/port/rt/sched.c b/usr/src/lib/libc/port/rt/sched.c index cfa7259a96..701e07f894 100644 --- a/usr/src/lib/libc/port/rt/sched.c +++ b/usr/src/lib/libc/port/rt/sched.c @@ -20,245 +20,314 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include "synonyms.h" -#include "mtlib.h" -#include <sys/types.h> +#include "thr_uberdata.h" #include <sched.h> -#include <errno.h> -#include <limits.h> -#include <unistd.h> -#include <sys/priocntl.h> -#include <sys/rtpriocntl.h> #include <sys/tspriocntl.h> -#include <sys/rt.h> -#include <sys/ts.h> -#include <thread.h> -#include <string.h> -#include <stdlib.h> -#include "rtsched.h" +#include <sys/rtpriocntl.h> +#include <sys/fxpriocntl.h> /* - * The following variables are used for caching information + * The following array is used for caching information * for priocntl scheduling classes. */ -struct pcclass ts_class; -struct pcclass rt_class; -struct pcclass ia_class; -struct pcclass sys_class; +static pcclass_t sched_class[] = { + {0, SCHED_OTHER, 0, 0, {-1, "TS", 0}}, + {0, SCHED_FIFO, 0, 0, {-1, "RT", 0}}, + {0, SCHED_RR, 0, 0, {-1, "RT", 0}}, + {0, SCHED_SYS, 0, 0, {0, "SYS", 0}}, + {0, SCHED_IA, 0, 0, {-1, "IA", 0}}, + {0, SCHED_FSS, 0, 0, {-1, "FSS", 0}}, + {0, SCHED_FX, 0, 0, {-1, "FX", 0}}, + /* + * Allow unknown (to us) scheduling classes. + * The kernel allows space for exactly 10 scheduling classes + * (see the definitions of 'sclass' and 'nclass' in the kernel). + * We need that number of available slots here. + * If the kernel space is changed, this has to change too. + */ + {0, -1, 0, 0, {-1, "", 0}}, + {0, -1, 0, 0, {-1, "", 0}}, + {0, -1, 0, 0, {-1, "", 0}}, + {0, -1, 0, 0, {-1, "", 0}}, + {0, -1, 0, 0, {-1, "", 0}}, + {0, -1, 0, 0, {-1, "", 0}}, + {0, -1, 0, 0, {-1, "", 0}}, + {0, -1, 0, 0, {-1, "", 0}}, + {0, -1, 0, 0, {-1, "", 0}}, + {0, -1, 0, 0, {-1, "", 0}}, +}; + +#define NPOLICY (sizeof (sched_class) / sizeof (pcclass_t)) + +#if _SCHED_NEXT != SCHED_FX + 1 +#error "fatal: _SCHED_NEXT != SCHED_FX + 1" +#endif + +static mutex_t class_lock = DEFAULTMUTEX; /* protects sched_class[] */ -static rtdpent_t *rt_dptbl; /* RT class parameter table */ - -typedef struct { /* type definition for generic class-specific parameters */ - int pc_clparms[PC_CLINFOSZ]; -} pc_clparms_t; +/* + * Helper function for get_info_by_policy(), below. + * Don't let a manufactured policy number duplicate + * the class of one of our base policy numbers. + */ +static int +is_base_class(const char *clname) +{ + const pcclass_t *pccp; + int policy; -static int map_gp_to_rtpri(pri_t); + for (policy = 0, pccp = sched_class; + policy < _SCHED_NEXT; + policy++, pccp++) { + if (strcmp(clname, pccp->pcc_info.pc_clname) == 0) + return (1); + } + return (0); +} /* - * cache priocntl information on scheduling classes by policy + * Cache priocntl information on scheduling class by policy. */ -int +const pcclass_t * get_info_by_policy(int policy) { - char *pccname; - struct pcclass *pccp; + pcclass_t *pccp = &sched_class[policy]; + pcpri_t pcpri; + pri_t prio; + int base = 0; - if (policy < 0) { + if ((uint_t)policy >= NPOLICY || pccp->pcc_state < 0) { errno = EINVAL; - return (-1); + return (NULL); + } + + if (pccp->pcc_state > 0) + return (pccp); + + lmutex_lock(&class_lock); + + /* get class info (the system class is known to have class-id == 0) */ + if (pccp->pcc_policy == -1) { + /* policy number not defined in <sched.h> */ + ASSERT(policy >= _SCHED_NEXT); + pccp->pcc_info.pc_cid = policy - _SCHED_NEXT; + if (_private_priocntl(0, 0, PC_GETCLINFO, &pccp->pcc_info) + == -1 || + (base = is_base_class(pccp->pcc_info.pc_clname)) != 0) { + pccp->pcc_info.pc_clname[0] = '\0'; + pccp->pcc_info.pc_cid = -1; + /* + * If we duplicated a base class, permanently + * disable this policy entry. Else allow for + * dynamic loading of scheduling classes. + */ + if (base) { + _membar_producer(); + pccp->pcc_state = -1; + } + errno = EINVAL; + lmutex_unlock(&class_lock); + return (NULL); + } + pccp->pcc_policy = policy; + } else if (policy != SCHED_SYS && + _private_priocntl(0, 0, PC_GETCID, &pccp->pcc_info) == -1) { + _membar_producer(); + pccp->pcc_state = -1; + errno = EINVAL; + lmutex_unlock(&class_lock); + return (NULL); } switch (policy) { - case SCHED_FIFO: - case SCHED_RR: - pccp = &rt_class; - pccname = "RT"; - break; case SCHED_OTHER: - pccp = &ts_class; - pccname = "TS"; - break; - case SCHED_SYS: - pccp = &sys_class; - pccname = "sys"; - break; - case SCHED_IA: - pccp = &ia_class; - pccname = "IA"; + prio = ((tsinfo_t *)pccp->pcc_info.pc_clinfo)->ts_maxupri; + pccp->pcc_primin = -prio; + pccp->pcc_primax = prio; break; - default: - return (policy); - } - if (pccp->pcc_state != 0) { - if (pccp->pcc_state < 0) - errno = ENOSYS; - return (pccp->pcc_state); - } - - /* get class's info */ - (void) strcpy(pccp->pcc_info.pc_clname, pccname); - if (policy == SCHED_SYS) - pccp->pcc_info.pc_cid = 0; - else if (priocntl(P_PID, 0, PC_GETCID, (caddr_t)&(pccp->pcc_info)) < 0) - return (-1); - - if (policy == SCHED_FIFO || policy == SCHED_RR) { - pcadmin_t pcadmin; - rtadmin_t rtadmin; - size_t rtdpsize; - - /* get RT class dispatch table in rt_dptbl */ - pcadmin.pc_cid = rt_class.pcc_info.pc_cid; - pcadmin.pc_cladmin = (caddr_t)&rtadmin; - rtadmin.rt_cmd = RT_GETDPSIZE; - if (priocntl(P_PID, 0, PC_ADMIN, (caddr_t)&pcadmin) < 0) - return (-1); - rtdpsize = (size_t)(rtadmin.rt_ndpents * sizeof (rtdpent_t)); - if (rt_dptbl == NULL && - (rt_dptbl = lmalloc(rtdpsize)) == NULL) { - errno = EAGAIN; - return (-1); - } - rtadmin.rt_dpents = rt_dptbl; - rtadmin.rt_cmd = RT_GETDPTBL; - if (priocntl(P_PID, 0, PC_ADMIN, (caddr_t)&pcadmin) < 0) - return (-1); + case SCHED_FIFO: + case SCHED_RR: + prio = ((rtinfo_t *)pccp->pcc_info.pc_clinfo)->rt_maxpri; pccp->pcc_primin = 0; - pccp->pcc_primax = ((rtinfo_t *)rt_class.pcc_info.pc_clinfo)-> - rt_maxpri; - } else if (policy == SCHED_OTHER) { - pri_t prio; - - prio = ((tsinfo_t *)ts_class.pcc_info.pc_clinfo)->ts_maxupri/3; - pccp->pcc_primin = -prio; pccp->pcc_primax = prio; - } else { - /* non-RT scheduling class */ - pcpri_t pcpri; - + break; + default: /* - * get class's global priority's min, max, and - * translate them into RT priority level (index) via rt_dptbl. + * All other policy numbers, including policy numbers + * not defined in <sched.h>. */ pcpri.pc_cid = pccp->pcc_info.pc_cid; - if (priocntl(0, 0, PC_GETPRIRANGE, (caddr_t)&pcpri) < 0) - return (-1); - pccp->pcc_primax = map_gp_to_rtpri(pcpri.pc_clpmax); - pccp->pcc_primin = map_gp_to_rtpri(pcpri.pc_clpmin); + if (_private_priocntl(0, 0, PC_GETPRIRANGE, &pcpri) == 0) { + pccp->pcc_primin = pcpri.pc_clpmin; + pccp->pcc_primax = pcpri.pc_clpmax; + } + break; } + _membar_producer(); pccp->pcc_state = 1; - return (1); + lmutex_unlock(&class_lock); + return (pccp); } -/* - * Translate global scheduling priority to RT class's user priority. - * Use the gp values in the rt_dptbl to do a reverse mapping - * of a given gpri value relative to the index range of rt_dptbl. - */ -static int -map_gp_to_rtpri(pri_t gpri) +const pcclass_t * +get_info_by_class(id_t classid) { - rtdpent_t *rtdp; - pri_t pri; + pcinfo_t pcinfo; + pcclass_t *pccp; + int policy; - /* need RT class info before we can translate priorities */ - if (rt_dptbl == NULL && get_info_by_policy(SCHED_FIFO) < 0) - return (-1); + if (classid < 0) { + errno = EINVAL; + return (NULL); + } - if (gpri <= rt_dptbl[rt_class.pcc_primin].rt_globpri) { - pri = gpri - rt_dptbl[rt_class.pcc_primin].rt_globpri + \ - rt_class.pcc_primin; - } else if (gpri >= rt_dptbl[rt_class.pcc_primax].rt_globpri) { - pri = gpri - rt_dptbl[rt_class.pcc_primax].rt_globpri + \ - rt_class.pcc_primax; - } else { - pri = rt_class.pcc_primin + 1; - for (rtdp = rt_dptbl+1; rtdp->rt_globpri < gpri; ++rtdp, ++pri) - ; - if (rtdp->rt_globpri > gpri) - --pri; + /* determine if we already know this classid */ + for (policy = 0, pccp = sched_class; + policy < NPOLICY; + policy++, pccp++) { + if (pccp->pcc_state > 0 && pccp->pcc_info.pc_cid == classid) + return (pccp); } - return (pri); + pcinfo.pc_cid = classid; + if (_private_priocntl(0, 0, PC_GETCLINFO, &pcinfo) == -1) { + if (classid == 0) /* no kernel info for sys class */ + return (get_info_by_policy(SCHED_SYS)); + return (NULL); + } + + for (policy = 0, pccp = sched_class; + policy < NPOLICY; + policy++, pccp++) { + if (pccp->pcc_state == 0 && + strcmp(pcinfo.pc_clname, pccp->pcc_info.pc_clname) == 0) + return (get_info_by_policy(pccp->pcc_policy)); + } + + /* + * We have encountered an unknown (to us) scheduling class. + * Manufacture a policy number for it. Hopefully we still + * have room in the sched_class[] table. + */ + policy = _SCHED_NEXT + classid; + if (policy >= NPOLICY) { + errno = EINVAL; + return (NULL); + } + lmutex_lock(&class_lock); + pccp = &sched_class[policy]; + pccp->pcc_policy = policy; + (void) strlcpy(pccp->pcc_info.pc_clname, pcinfo.pc_clname, PC_CLNMSZ); + lmutex_unlock(&class_lock); + return (get_info_by_policy(pccp->pcc_policy)); } /* - * Translate RT class's user priority to global scheduling priority. + * Helper function: get process or lwp current scheduling policy. */ -pri_t -map_rtpri_to_gp(pri_t pri) +static const pcclass_t * +get_parms(idtype_t idtype, id_t id, pcparms_t *pcparmp) { - rtdpent_t *rtdp; - pri_t gpri; - - if (rt_class.pcc_state == 0) - (void) get_info_by_policy(SCHED_FIFO); - - /* First case is the default case, other two are seldomly taken */ - if (pri <= rt_dptbl[rt_class.pcc_primin].rt_globpri) { - gpri = pri + rt_dptbl[rt_class.pcc_primin].rt_globpri - - rt_class.pcc_primin; - } else if (pri >= rt_dptbl[rt_class.pcc_primax].rt_globpri) { - gpri = pri + rt_dptbl[rt_class.pcc_primax].rt_globpri - - rt_class.pcc_primax; - } else { - gpri = rt_dptbl[rt_class.pcc_primin].rt_globpri + 1; - for (rtdp = rt_dptbl+1; rtdp->rt_globpri < pri; ++rtdp, ++gpri) - ; - if (rtdp->rt_globpri > pri) - --gpri; - } - return (gpri); + pcparmp->pc_cid = PC_CLNULL; + if (_private_priocntl(idtype, id, PC_GETPARMS, pcparmp) == -1) + return (NULL); + return (get_info_by_class(pcparmp->pc_cid)); } +/* + * Helper function for setprio() and setparam(), below. + */ static int -get_info_by_class(id_t classid) +set_priority(idtype_t idtype, id_t id, int policy, int prio, + pcparms_t *pcparmp, int settq) { - pcinfo_t pcinfo; + int rv; - /* determine if we already know this classid */ - if (rt_class.pcc_state > 0 && rt_class.pcc_info.pc_cid == classid) - return (1); - if (ts_class.pcc_state > 0 && ts_class.pcc_info.pc_cid == classid) - return (1); - if (sys_class.pcc_state > 0 && sys_class.pcc_info.pc_cid == classid) - return (1); - if (ia_class.pcc_state > 0 && ia_class.pcc_info.pc_cid == classid) - return (1); + switch (policy) { + case SCHED_OTHER: + { + tsparms_t *tsp = (tsparms_t *)pcparmp->pc_clparms; + tsp->ts_uprilim = prio; + tsp->ts_upri = prio; + break; + } + case SCHED_FIFO: + case SCHED_RR: + { + rtparms_t *rtp = (rtparms_t *)pcparmp->pc_clparms; + rtp->rt_tqnsecs = settq? + (policy == SCHED_FIFO? RT_TQINF : RT_TQDEF) : + RT_NOCHANGE; + rtp->rt_pri = prio; + break; + } + default: + { + /* + * Class-independent method for setting the priority. + */ + pcprio_t pcprio; + + pcprio.pc_op = PC_SETPRIO; + pcprio.pc_cid = pcparmp->pc_cid; + pcprio.pc_val = prio; + do { + rv = _private_priocntl(idtype, id, PC_DOPRIO, &pcprio); + } while (rv == -1 && errno == ENOMEM); + return (rv); + } + } - pcinfo.pc_cid = classid; - if (priocntl(0, 0, PC_GETCLINFO, (caddr_t)&pcinfo) < 0) { - if (classid == 0) /* no kernel info for sys class */ - return (get_info_by_policy(SCHED_SYS)); + do { + rv = _private_priocntl(idtype, id, PC_SETPARMS, pcparmp); + } while (rv == -1 && errno == ENOMEM); + return (rv); +} + +/* + * Utility function, private to libc, used by sched_setparam() + * and posix_spawn(). Because it is called by the vfork() child of + * posix_spawn(), we must not call any functions exported from libc. + */ +id_t +setprio(idtype_t idtype, id_t id, int prio, int *policyp) +{ + pcparms_t pcparm; + int policy; + const pcclass_t *pccp; + + if ((pccp = get_parms(idtype, id, &pcparm)) == NULL) + return (-1); + if (prio < pccp->pcc_primin || prio > pccp->pcc_primax) { + errno = EINVAL; return (-1); } - if (rt_class.pcc_state == 0 && strcmp(pcinfo.pc_clname, "RT") == 0) - return (get_info_by_policy(SCHED_FIFO)); - if (ts_class.pcc_state == 0 && strcmp(pcinfo.pc_clname, "TS") == 0) - return (get_info_by_policy(SCHED_OTHER)); - if (ia_class.pcc_state == 0 && strcmp(pcinfo.pc_clname, "IA") == 0) - return (get_info_by_policy(SCHED_IA)); + policy = pccp->pcc_policy; + if (policyp != NULL && + (policy == SCHED_FIFO || policy == SCHED_RR)) { + rtparms_t *rtp = (rtparms_t *)pcparm.pc_clparms; + policy = (rtp->rt_tqnsecs == RT_TQINF? SCHED_FIFO : SCHED_RR); + } - return (1); + if (set_priority(idtype, id, policy, prio, &pcparm, 0) == -1) + return (-1); + if (policyp != NULL) + *policyp = policy; + return (pccp->pcc_info.pc_cid); } int sched_setparam(pid_t pid, const struct sched_param *param) { - pri_t prio = param->sched_priority; - pcparms_t pcparm; - tsparms_t *tsp; - tsinfo_t *tsi; - int scale; - if (pid < 0) { errno = ESRCH; return (-1); @@ -266,48 +335,66 @@ sched_setparam(pid_t pid, const struct sched_param *param) if (pid == 0) pid = P_MYID; - /* get process's current scheduling policy */ - pcparm.pc_cid = PC_CLNULL; - if (priocntl(P_PID, pid, PC_GETPARMS, (caddr_t)&pcparm) == -1) + if (setprio(P_PID, pid, param->sched_priority, NULL) == -1) return (-1); - if (get_info_by_class(pcparm.pc_cid) < 0) + return (0); +} + +id_t +getparam(idtype_t idtype, id_t id, int *policyp, struct sched_param *param) +{ + pcparms_t pcparm; + const pcclass_t *pccp; + int policy; + int priority; + + if ((pccp = get_parms(idtype, id, &pcparm)) == NULL) return (-1); - if (pcparm.pc_cid == rt_class.pcc_info.pc_cid) { - /* SCHED_FIFO or SCHED_RR policy */ - if (prio < rt_class.pcc_primin || prio > rt_class.pcc_primax) { - errno = EINVAL; - return (-1); - } - ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = RT_NOCHANGE; - ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio; - } else if (pcparm.pc_cid == ts_class.pcc_info.pc_cid) { - /* SCHED_OTHER policy */ - tsi = (tsinfo_t *)ts_class.pcc_info.pc_clinfo; - scale = tsi->ts_maxupri; - tsp = (tsparms_t *)pcparm.pc_clparms; - tsp->ts_uprilim = tsp->ts_upri = -(scale * prio) / 20; - } else { + switch (policy = pccp->pcc_policy) { + case SCHED_OTHER: + { + tsparms_t *tsp = (tsparms_t *)pcparm.pc_clparms; + priority = tsp->ts_upri; + break; + } + case SCHED_FIFO: + case SCHED_RR: + { + rtparms_t *rtp = (rtparms_t *)pcparm.pc_clparms; + priority = rtp->rt_pri; + policy = (rtp->rt_tqnsecs == RT_TQINF? SCHED_FIFO : SCHED_RR); + break; + } + default: + { /* - * policy is not defined by POSIX.4. - * just pass parameter data through to priocntl. - * param should contain an image of class-specific parameters - * (after the sched_priority member). + * Class-independent method for getting the priority. */ - *((pc_clparms_t *)pcparm.pc_clparms) = - *((pc_clparms_t *)(&(param->sched_priority)+1)); + pcprio_t pcprio; + + pcprio.pc_op = PC_GETPRIO; + pcprio.pc_cid = 0; + pcprio.pc_val = 0; + if (_private_priocntl(idtype, id, PC_DOPRIO, &pcprio) == 0) + priority = pcprio.pc_val; + else + priority = 0; + break; + } } - return ((int)priocntl(P_PID, pid, PC_SETPARMS, (caddr_t)&pcparm)); + *policyp = policy; + (void) memset(param, 0, sizeof (*param)); + param->sched_priority = priority; + + return (pcparm.pc_cid); } int sched_getparam(pid_t pid, struct sched_param *param) { - pcparms_t pcparm; - pri_t prio; - int scale; - tsinfo_t *tsi; + int policy; if (pid < 0) { errno = ESRCH; @@ -316,49 +403,40 @@ sched_getparam(pid_t pid, struct sched_param *param) if (pid == 0) pid = P_MYID; - pcparm.pc_cid = PC_CLNULL; - if (priocntl(P_PID, pid, PC_GETPARMS, (caddr_t)&pcparm) == -1) - return (-1); - if (get_info_by_class(pcparm.pc_cid) < 0) + if (getparam(P_PID, pid, &policy, param) == -1) return (-1); + return (0); +} - if (pcparm.pc_cid == rt_class.pcc_info.pc_cid) { - param->sched_priority = - ((rtparms_t *)pcparm.pc_clparms)->rt_pri; - } else if (pcparm.pc_cid == ts_class.pcc_info.pc_cid) { - param->sched_nicelim = - ((tsparms_t *)pcparm.pc_clparms)->ts_uprilim; - prio = param->sched_nice = - ((tsparms_t *)pcparm.pc_clparms)->ts_upri; - tsi = (tsinfo_t *)ts_class.pcc_info.pc_clinfo; - scale = tsi->ts_maxupri; - if (scale == 0) - param->sched_priority = 0; - else - param->sched_priority = -(prio * 20) / scale; - } else { - /* - * policy is not defined by POSIX.4 - * just return a copy of pcparams_t image in param. - */ - *((pc_clparms_t *)(&(param->sched_priority)+1)) = - *((pc_clparms_t *)pcparm.pc_clparms); - param->sched_priority = - sched_get_priority_min((int)(pcparm.pc_cid + _SCHED_NEXT)); +/* + * Utility function, private to libc, used by sched_setscheduler() + * and posix_spawn(). Because it is called by the vfork() child of + * posix_spawn(), we must not call any functions exported from libc. + */ +id_t +setparam(idtype_t idtype, id_t id, int policy, int prio) +{ + pcparms_t pcparm; + const pcclass_t *pccp; + + if (policy == SCHED_SYS || + (pccp = get_info_by_policy(policy)) == NULL || + prio < pccp->pcc_primin || prio > pccp->pcc_primax) { + errno = EINVAL; + return (-1); } - return (0); + pcparm.pc_cid = pccp->pcc_info.pc_cid; + if (set_priority(idtype, id, policy, prio, &pcparm, 1) == -1) + return (-1); + return (pccp->pcc_info.pc_cid); } int sched_setscheduler(pid_t pid, int policy, const struct sched_param *param) { pri_t prio = param->sched_priority; - pcparms_t pcparm; int oldpolicy; - tsinfo_t *tsi; - tsparms_t *tsp; - int scale; if ((oldpolicy = sched_getscheduler(pid)) < 0) return (-1); @@ -366,56 +444,7 @@ sched_setscheduler(pid_t pid, int policy, const struct sched_param *param) if (pid == 0) pid = P_MYID; - if (get_info_by_policy(policy) < 0) { - errno = EINVAL; - return (-1); - } - - switch (policy) { - case SCHED_FIFO: - case SCHED_RR: - if (prio < rt_class.pcc_primin || prio > rt_class.pcc_primax) { - errno = EINVAL; - return (-1); - } - pcparm.pc_cid = rt_class.pcc_info.pc_cid; - ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio; - ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = - (policy == SCHED_RR ? RT_TQDEF : RT_TQINF); - break; - - case SCHED_OTHER: - pcparm.pc_cid = ts_class.pcc_info.pc_cid; - tsi = (tsinfo_t *)ts_class.pcc_info.pc_clinfo; - scale = tsi->ts_maxupri; - tsp = (tsparms_t *)pcparm.pc_clparms; - tsp->ts_uprilim = tsp->ts_upri = -(scale * prio) / 20; - break; - - default: - switch (policy) { - case SCHED_SYS: - pcparm.pc_cid = sys_class.pcc_info.pc_cid; - break; - case SCHED_IA: - pcparm.pc_cid = ia_class.pcc_info.pc_cid; - break; - default: - pcparm.pc_cid = policy - _SCHED_NEXT; - break; - } - /* - * policy is not defined by POSIX.4. - * just pass parameter data through to priocntl. - * param should contain an image of class-specific parameters - * (after the sched_priority member). - */ - *((pc_clparms_t *)pcparm.pc_clparms) = - *((pc_clparms_t *)&(param->sched_priority)+1); - } - - /* setting scheduling policy & parameters for the process */ - if (priocntl(P_PID, pid, PC_SETPARMS, (caddr_t)&pcparm) == -1) + if (setparam(P_PID, pid, policy, prio) == -1) return (-1); return (oldpolicy); @@ -425,6 +454,7 @@ int sched_getscheduler(pid_t pid) { pcparms_t pcparm; + const pcclass_t *pccp; int policy; if (pid < 0) { @@ -434,28 +464,13 @@ sched_getscheduler(pid_t pid) if (pid == 0) pid = P_MYID; - /* get scheduling policy & parameters for the process */ - pcparm.pc_cid = PC_CLNULL; - if (priocntl(P_PID, pid, PC_GETPARMS, (caddr_t)&pcparm) == -1) - return (-1); - if (get_info_by_class(pcparm.pc_cid) < 0) + if ((pccp = get_parms(P_PID, pid, &pcparm)) == NULL) return (-1); - if (pcparm.pc_cid == rt_class.pcc_info.pc_cid) - policy = ((((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs == - RT_TQINF ? SCHED_FIFO : SCHED_RR)); - else if (pcparm.pc_cid == ts_class.pcc_info.pc_cid) - policy = SCHED_OTHER; - else if (pcparm.pc_cid == sys_class.pcc_info.pc_cid) - policy = SCHED_SYS; - else if (pcparm.pc_cid == ia_class.pcc_info.pc_cid) - policy = SCHED_IA; - else { - /* - * policy is not defined by POSIX.4 - * return a unique dot4 policy id. - */ - policy = (int)(_SCHED_NEXT + pcparm.pc_cid); + if ((policy = pccp->pcc_policy) == SCHED_FIFO || policy == SCHED_RR) { + policy = + (((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs == RT_TQINF? + SCHED_FIFO : SCHED_RR); } return (policy); @@ -471,25 +486,10 @@ sched_yield(void) int sched_get_priority_max(int policy) { - pcpri_t pcpri; - - if (get_info_by_policy(policy) < 0) - return (-1); - - if (policy == SCHED_FIFO || policy == SCHED_RR) - return (rt_class.pcc_primax); - else if (policy == SCHED_OTHER) - return (ts_class.pcc_primax); - else if (policy == SCHED_SYS) - return (sys_class.pcc_primax); - else if (policy == SCHED_IA) - return (ia_class.pcc_primax); - else { /* policy not in POSIX.4 */ - pcpri.pc_cid = policy - _SCHED_NEXT; - if (priocntl(0, 0, PC_GETPRIRANGE, (caddr_t)&pcpri) == 0) - return (map_gp_to_rtpri(pcpri.pc_clpmax)); - } + const pcclass_t *pccp; + if ((pccp = get_info_by_policy(policy)) != NULL) + return (pccp->pcc_primax); errno = EINVAL; return (-1); } @@ -497,25 +497,10 @@ sched_get_priority_max(int policy) int sched_get_priority_min(int policy) { - pcpri_t pcpri; - - if (get_info_by_policy(policy) < 0) - return (-1); - - if (policy == SCHED_FIFO || policy == SCHED_RR) - return (rt_class.pcc_primin); - else if (policy == SCHED_OTHER) - return (ts_class.pcc_primin); - else if (policy == SCHED_SYS) - return (sys_class.pcc_primin); - else if (policy == SCHED_IA) - return (ia_class.pcc_primin); - else { /* policy not in POSIX.4 */ - pcpri.pc_cid = policy - _SCHED_NEXT; - if (priocntl(0, 0, PC_GETPRIRANGE, (caddr_t)&pcpri) == 0) - return (map_gp_to_rtpri(pcpri.pc_clpmin)); - } + const pcclass_t *pccp; + if ((pccp = get_info_by_policy(policy)) != NULL) + return (pccp->pcc_primin); errno = EINVAL; return (-1); } @@ -524,6 +509,7 @@ int sched_rr_get_interval(pid_t pid, timespec_t *interval) { pcparms_t pcparm; + const pcclass_t *pccp; if (pid < 0) { errno = ESRCH; @@ -532,22 +518,119 @@ sched_rr_get_interval(pid_t pid, timespec_t *interval) if (pid == 0) pid = P_MYID; - if (get_info_by_policy(SCHED_RR) < 0) + if ((pccp = get_parms(P_PID, pid, &pcparm)) == NULL) return (-1); - pcparm.pc_cid = PC_CLNULL; - if (priocntl(P_PID, pid, PC_GETPARMS, (caddr_t)&pcparm) == -1) - return (-1); + /* + * At the moment, we have no class-independent method to fetch + * the process/lwp time quantum. Since SUSv3 does not restrict + * this operation to the real-time class, we return an indefinite + * quantum (tv_sec == 0 and tv_nsec == 0) for scheduling policies + * for which this information isn't available. + */ + interval->tv_sec = 0; + interval->tv_nsec = 0; + + switch (pccp->pcc_policy) { + case SCHED_FIFO: + case SCHED_RR: + { + rtparms_t *rtp = (rtparms_t *)pcparm.pc_clparms; + if (rtp->rt_tqnsecs != RT_TQINF) { + interval->tv_sec = rtp->rt_tqsecs; + interval->tv_nsec = rtp->rt_tqnsecs; + } + } + break; + case SCHED_FX: + { + fxparms_t *fxp = (fxparms_t *)pcparm.pc_clparms; + if (fxp->fx_tqnsecs != FX_TQINF) { + interval->tv_sec = fxp->fx_tqsecs; + interval->tv_nsec = fxp->fx_tqnsecs; + } + } + break; + } + + return (0); +} - if (pcparm.pc_cid == rt_class.pcc_info.pc_cid && - (((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs != RT_TQINF)) { - /* SCHED_RR */ - interval->tv_sec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqsecs; - interval->tv_nsec = - ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs; - return (0); +/* + * Initialize or update ul_policy, ul_cid, and ul_pri. + */ +void +update_sched(ulwp_t *self) +{ + volatile sc_shared_t *scp; + pcparms_t pcparm; + pcprio_t pcprio; + const pcclass_t *pccp; + int priority; + int policy; + + ASSERT(self == curthread); + + enter_critical(self); + + if ((scp = self->ul_schedctl) == NULL && + (scp = setup_schedctl()) == NULL) { /* can't happen? */ + if (self->ul_policy < 0) { + self->ul_cid = 0; + self->ul_pri = 0; + _membar_producer(); + self->ul_policy = SCHED_OTHER; + } + exit_critical(self); + return; } - errno = EINVAL; - return (-1); + if (self->ul_policy >= 0 && + self->ul_cid == scp->sc_cid && + (self->ul_pri == scp->sc_cpri || + (self->ul_epri > 0 && self->ul_epri == scp->sc_cpri))) { + exit_critical(self); + return; + } + + pccp = get_parms(P_LWPID, P_MYID, &pcparm); + if (pccp == NULL) { /* can't happen? */ + self->ul_cid = scp->sc_cid; + self->ul_pri = scp->sc_cpri; + _membar_producer(); + self->ul_policy = SCHED_OTHER; + exit_critical(self); + return; + } + + switch (policy = pccp->pcc_policy) { + case SCHED_OTHER: + priority = ((tsparms_t *)pcparm.pc_clparms)->ts_upri; + break; + case SCHED_FIFO: + case SCHED_RR: + priority = ((rtparms_t *)pcparm.pc_clparms)->rt_pri; + policy = + ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs == RT_TQINF? + SCHED_FIFO : SCHED_RR; + break; + default: + /* + * Class-independent method for getting the priority. + */ + pcprio.pc_op = PC_GETPRIO; + pcprio.pc_cid = 0; + pcprio.pc_val = 0; + if (_private_priocntl(P_LWPID, P_MYID, PC_DOPRIO, &pcprio) == 0) + priority = pcprio.pc_val; + else + priority = 0; + } + + self->ul_cid = pcparm.pc_cid; + self->ul_pri = priority; + _membar_producer(); + self->ul_policy = policy; + + exit_critical(self); } diff --git a/usr/src/lib/libc/port/threads/pthr_attr.c b/usr/src/lib/libc/port/threads/pthr_attr.c index bcae664e13..dc7056c067 100644 --- a/usr/src/lib/libc/port/threads/pthr_attr.c +++ b/usr/src/lib/libc/port/threads/pthr_attr.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -45,7 +45,7 @@ def_thrattr(void) PTHREAD_SCOPE_PROCESS, /* scope */ 0, /* prio */ SCHED_OTHER, /* policy */ - PTHREAD_EXPLICIT_SCHED, /* inherit */ + PTHREAD_INHERIT_SCHED, /* inherit */ 0 /* guardsize */ }; if (thrattr.guardsize == 0) @@ -94,7 +94,7 @@ _pthread_attr_clone(pthread_attr_t *attr, const pthread_attr_t *old_attr) { thrattr_t *ap; const thrattr_t *old_ap = - old_attr? old_attr->__pthread_attrp : def_thrattr(); + old_attr? old_attr->__pthread_attrp : def_thrattr(); if (old_ap == NULL) return (EINVAL); @@ -336,8 +336,7 @@ _pthread_attr_getinheritsched(const pthread_attr_t *attr, int *inherit) } /* - * pthread_attr_setschedpolicy: sets the scheduling policy to SCHED_RR, - * SCHED_FIFO or SCHED_OTHER. + * pthread_attr_setschedpolicy: sets the scheduling policy. */ #pragma weak pthread_attr_setschedpolicy = _pthread_attr_setschedpolicy int @@ -346,9 +345,7 @@ _pthread_attr_setschedpolicy(pthread_attr_t *attr, int policy) thrattr_t *ap; if (attr != NULL && (ap = attr->__pthread_attrp) != NULL && - (policy == SCHED_OTHER || - policy == SCHED_FIFO || - policy == SCHED_RR)) { + policy != SCHED_SYS && get_info_by_policy(policy) != NULL) { ap->policy = policy; return (0); } @@ -382,23 +379,13 @@ _pthread_attr_setschedparam(pthread_attr_t *attr, const struct sched_param *param) { thrattr_t *ap; - int policy; - int pri; - if (attr == NULL || (ap = attr->__pthread_attrp) == NULL) - return (EINVAL); - - policy = ap->policy; - pri = param->sched_priority; - if (policy == SCHED_OTHER) { - if ((pri < THREAD_MIN_PRIORITY || pri > THREAD_MAX_PRIORITY) && - _validate_rt_prio(policy, pri)) - return (EINVAL); - } else if (_validate_rt_prio(policy, pri)) { - return (EINVAL); + if (attr != NULL && (ap = attr->__pthread_attrp) != NULL && + param != NULL) { + ap->prio = param->sched_priority; + return (0); } - ap->prio = pri; - return (0); + return (EINVAL); } /* diff --git a/usr/src/lib/libc/port/threads/pthr_mutex.c b/usr/src/lib/libc/port/threads/pthr_mutex.c index 3eabd3de61..b49d5fb3d0 100644 --- a/usr/src/lib/libc/port/threads/pthr_mutex.c +++ b/usr/src/lib/libc/port/threads/pthr_mutex.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -109,10 +109,11 @@ _pthread_mutexattr_getpshared(const pthread_mutexattr_t *attr, int *pshared) int _pthread_mutexattr_setprioceiling(pthread_mutexattr_t *attr, int prioceiling) { + const pcclass_t *pccp = get_info_by_policy(SCHED_FIFO); mattr_t *ap; if (attr == NULL || (ap = attr->__pthread_mutexattrp) == NULL || - _validate_rt_prio(SCHED_FIFO, prioceiling)) + prioceiling < pccp->pcc_primin || prioceiling > pccp->pcc_primax) return (EINVAL); ap->prioceiling = prioceiling; return (0); @@ -238,23 +239,29 @@ _pthread_mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *attr) /* * pthread_mutex_setprioceiling: sets the prioceiling. + * From the SUSv3 (POSIX) specification for pthread_mutex_setprioceiling(): + * The process of locking the mutex need not + * adhere to the priority protect protocol. + * We pass the MUTEX_NOCEIL flag to mutex_lock_internal() so that + * a non-realtime thread can successfully execute this operation. */ #pragma weak pthread_mutex_setprioceiling = _pthread_mutex_setprioceiling int _pthread_mutex_setprioceiling(pthread_mutex_t *mutex, int ceil, int *oceil) { mutex_t *mp = (mutex_t *)mutex; + const pcclass_t *pccp = get_info_by_policy(SCHED_FIFO); int error; if (!(mp->mutex_type & PTHREAD_PRIO_PROTECT) || - _validate_rt_prio(SCHED_FIFO, ceil) != 0) + ceil < pccp->pcc_primin || ceil > pccp->pcc_primax) return (EINVAL); - error = _private_mutex_lock(mp); + error = mutex_lock_internal(mp, NULL, MUTEX_LOCK | MUTEX_NOCEIL); if (error == 0) { if (oceil) *oceil = mp->mutex_ceiling; - mp->mutex_ceiling = (uint8_t)ceil; - error = _private_mutex_unlock(mp); + mp->mutex_ceiling = ceil; + error = mutex_unlock_internal(mp, 0); } return (error); } diff --git a/usr/src/lib/libc/port/threads/pthread.c b/usr/src/lib/libc/port/threads/pthread.c index baaadae7bd..10a4330faa 100644 --- a/usr/src/lib/libc/port/threads/pthread.c +++ b/usr/src/lib/libc/port/threads/pthread.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -45,6 +45,41 @@ typedef struct __once { #define once_flag oflag.pad32_flag[1] +static int +_thr_setparam(pthread_t tid, int policy, int prio) +{ + ulwp_t *ulwp; + id_t cid; + int error = 0; + + if ((ulwp = find_lwp(tid)) == NULL) { + error = ESRCH; + } else { + if (policy == ulwp->ul_policy && + (policy == SCHED_FIFO || policy == SCHED_RR) && + ulwp->ul_cid == ulwp->ul_rtclassid && + ulwp->ul_epri != 0) { + /* + * Don't change the ceiling priority, + * just the base priority. + */ + if (prio > ulwp->ul_epri) + error = EPERM; + else + ulwp->ul_pri = prio; + } else if ((cid = setparam(P_LWPID, tid, policy, prio)) == -1) { + error = errno; + } else { + ulwp->ul_cid = cid; + ulwp->ul_pri = prio; + _membar_producer(); + ulwp->ul_policy = policy; + } + ulwp_unlock(ulwp, curthread->ul_uberdata); + } + return (error); +} + /* * pthread_create: creates a thread in the current process. * calls common _thrp_create() after copying the attributes. @@ -55,67 +90,56 @@ _pthread_create(pthread_t *thread, const pthread_attr_t *attr, void * (*start_routine)(void *), void *arg) { ulwp_t *self = curthread; - uberdata_t *udp = self->ul_uberdata; const thrattr_t *ap = attr? attr->__pthread_attrp : def_thrattr(); + const pcclass_t *pccp; long flag; pthread_t tid; - int policy; - pri_t priority; int error; - int mapped = 0; - int mappedpri; - int rt = 0; + + update_sched(self); if (ap == NULL) return (EINVAL); - if (ap->inherit == PTHREAD_INHERIT_SCHED) { - policy = self->ul_policy; - priority = self->ul_pri; - mapped = self->ul_pri_mapped; - mappedpri = self->ul_mappedpri; - } else { - policy = ap->policy; - priority = ap->prio; - if (policy == SCHED_OTHER) { - if (priority < THREAD_MIN_PRIORITY || - priority > THREAD_MAX_PRIORITY) { - if (_validate_rt_prio(policy, priority)) - return (EINVAL); - mapped = 1; - mappedpri = priority; - priority = map_rtpri_to_gp(priority); - ASSERT(priority >= THREAD_MIN_PRIORITY && - priority <= THREAD_MAX_PRIORITY); - } - } else if (policy == SCHED_FIFO || policy == SCHED_RR) { - if (_validate_rt_prio(policy, priority)) - return (EINVAL); - if (_private_geteuid() == 0) - rt = 1; - } else { - return (EINVAL); - } - } + /* validate explicit scheduling attributes */ + if (ap->inherit == PTHREAD_EXPLICIT_SCHED && + (ap->policy == SCHED_SYS || + (pccp = get_info_by_policy(ap->policy)) == NULL || + ap->prio < pccp->pcc_primin || ap->prio > pccp->pcc_primax)) + return (EINVAL); flag = ap->scope | ap->detachstate | ap->daemonstate | THR_SUSPENDED; error = _thrp_create(ap->stkaddr, ap->stksize, start_routine, arg, - flag, &tid, priority, policy, ap->guardsize); + flag, &tid, ap->guardsize); if (error == 0) { - int prio_err; - - if (mapped) { + if (ap->inherit == PTHREAD_EXPLICIT_SCHED && + (ap->policy != self->ul_policy || + ap->prio != (self->ul_epri? self->ul_epri : self->ul_pri))) + /* + * The SUSv3 specification requires pthread_create() + * to fail with EPERM if it cannot set the scheduling + * policy and parameters on the new thread. + */ + error = _thr_setparam(tid, ap->policy, ap->prio); + if (error) { + /* + * We couldn't determine this error before + * actually creating the thread. To recover, + * mark the thread detached and cancel it. + * It is as though it was never created. + */ ulwp_t *ulwp = find_lwp(tid); - ulwp->ul_pri_mapped = 1; - ulwp->ul_mappedpri = mappedpri; - ulwp_unlock(ulwp, udp); - } - - if (rt && (prio_err = _thrp_setlwpprio(tid, policy, priority))) - return (prio_err); - - if (thread) + if (ulwp->ul_detached == 0) { + ulwp->ul_detached = 1; + ulwp->ul_usropts |= THR_DETACHED; + (void) __lwp_detach(tid); + } + ulwp->ul_cancel_pending = 2; /* cancelled on creation */ + ulwp->ul_cancel_disabled = 0; + ulwp_unlock(ulwp, self->ul_uberdata); + } else if (thread) { *thread = tid; + } (void) _thr_continue(tid); } @@ -166,135 +190,51 @@ _pthread_equal(pthread_t t1, pthread_t t2) } /* - * pthread_getschedparam: gets the sched parameters in a struct. + * pthread_getschedparam: get the thread's sched parameters. */ #pragma weak pthread_getschedparam = _pthread_getschedparam int _pthread_getschedparam(pthread_t tid, int *policy, struct sched_param *param) { - uberdata_t *udp = curthread->ul_uberdata; ulwp_t *ulwp; + id_t cid; int error = 0; - if (param == NULL || policy == NULL) - error = EINVAL; - else if ((ulwp = find_lwp(tid)) == NULL) + if ((ulwp = find_lwp(tid)) == NULL) { error = ESRCH; - else { - if (ulwp->ul_pri_mapped) - param->sched_priority = ulwp->ul_mappedpri; - else + } else { + cid = getparam(P_LWPID, ulwp->ul_lwpid, policy, param); + if (cid == -1) { + error = errno; + } else if (*policy == ulwp->ul_policy && cid == ulwp->ul_cid && + (*policy == SCHED_FIFO || *policy == SCHED_RR)) { + /* + * Return the defined priority, not the effective + * priority from priority ceiling mutexes. + */ param->sched_priority = ulwp->ul_pri; - *policy = ulwp->ul_policy; - ulwp_unlock(ulwp, udp); + } else { + ulwp->ul_cid = cid; + ulwp->ul_pri = param->sched_priority; + _membar_producer(); + ulwp->ul_policy = *policy; + } + ulwp_unlock(ulwp, curthread->ul_uberdata); } return (error); } -/* - * Besides the obvious arguments, the inheritflag needs to be explained: - * If set to PRIO_SET or PRIO_SET_PRIO, it does the normal, expected work - * of setting thread's assigned scheduling parameters and policy. - * If set to PRIO_INHERIT, it sets the thread's effective priority values - * (t_epri, t_empappedpri), and does not update the assigned priority values - * (t_pri, t_mappedpri). If set to PRIO_DISINHERIT, it clears the thread's - * effective priority values, and reverts the thread, if necessary, back - * to the assigned priority values. - */ +#pragma weak thr_getprio = _thr_getprio int -_thread_setschedparam_main(pthread_t tid, int policy, - const struct sched_param *param, int inheritflag) +_thr_getprio(thread_t tid, int *priority) { - uberdata_t *udp = curthread->ul_uberdata; - ulwp_t *ulwp; - int error = 0; - int prio; - int opolicy; - int mappedprio; - int mapped = 0; - pri_t *mappedprip; - - if (param == NULL) - return (EINVAL); - if ((ulwp = find_lwp(tid)) == NULL) - return (ESRCH); - prio = param->sched_priority; - opolicy = ulwp->ul_policy; - if (inheritflag == PRIO_SET_PRIO) { /* don't change policy */ - policy = opolicy; - inheritflag = PRIO_SET; - } - ASSERT(inheritflag == PRIO_SET || opolicy == policy); - if (inheritflag == PRIO_DISINHERIT) { - ulwp->ul_emappedpri = 0; - ulwp->ul_epri = 0; - prio = ulwp->ul_pri; /* ignore prio in sched_param */ - } - if (policy == SCHED_OTHER) { - /* - * Set thread's policy to OTHER - */ - if (prio < THREAD_MIN_PRIORITY || prio > THREAD_MAX_PRIORITY) { - if (_validate_rt_prio(policy, prio)) { - error = EINVAL; - goto out; - } - mapped = 1; - mappedprio = prio; - prio = map_rtpri_to_gp(prio); - ASSERT(prio >= THREAD_MIN_PRIORITY && - prio <= THREAD_MAX_PRIORITY); - } - /* - * Thread changing from FIFO/RR to OTHER - */ - if (opolicy == SCHED_FIFO || opolicy == SCHED_RR) { - if ((error = _thrp_setlwpprio(tid, policy, prio)) != 0) - goto out; - } - if (inheritflag != PRIO_DISINHERIT) { - if (inheritflag == PRIO_INHERIT) - mappedprip = &ulwp->ul_emappedpri; - else - mappedprip = &ulwp->ul_mappedpri; - if (mapped) { - ulwp->ul_pri_mapped = 1; - *mappedprip = mappedprio; - } else { - ulwp->ul_pri_mapped = 0; - *mappedprip = 0; - } - } - ulwp->ul_policy = policy; - if (inheritflag == PRIO_INHERIT) - ulwp->ul_epri = prio; - else - ulwp->ul_pri = prio; - } else if (policy == SCHED_FIFO || policy == SCHED_RR) { - if (_validate_rt_prio(policy, prio)) - error = EINVAL; - else { - int prio_err; - - if (_private_geteuid() == 0 && - (prio_err = _thrp_setlwpprio(tid, policy, prio))) { - error = prio_err; - goto out; - } + struct sched_param param; + int policy; + int error; - ulwp->ul_policy = policy; - if (inheritflag == PRIO_INHERIT) - ulwp->ul_epri = prio; - else - ulwp->ul_pri = prio; - } - } else { - error = EINVAL; - } - -out: - ulwp_unlock(ulwp, udp); + if ((error = _pthread_getschedparam(tid, &policy, ¶m)) == 0) + *priority = param.sched_priority; return (error); } @@ -306,5 +246,26 @@ int _pthread_setschedparam(pthread_t tid, int policy, const struct sched_param *param) { - return (_thread_setschedparam_main(tid, policy, param, PRIO_SET)); + return (_thr_setparam(tid, policy, param->sched_priority)); +} + +#pragma weak thr_setprio = _thr_setprio +#pragma weak pthread_setschedprio = _thr_setprio +#pragma weak _pthread_setschedprio = _thr_setprio +int +_thr_setprio(thread_t tid, int prio) +{ + struct sched_param param; + int policy; + int error; + + /* + * _pthread_getschedparam() has the side-effect of setting + * the target thread's ul_policy, ul_pri and ul_cid correctly. + */ + if ((error = _pthread_getschedparam(tid, &policy, ¶m)) != 0) + return (error); + if (param.sched_priority == prio) /* no change */ + return (0); + return (_thr_setparam(tid, policy, prio)); } diff --git a/usr/src/lib/libc/port/threads/rwlock.c b/usr/src/lib/libc/port/threads/rwlock.c index 0f58b3a230..5770049bb1 100644 --- a/usr/src/lib/libc/port/threads/rwlock.c +++ b/usr/src/lib/libc/port/threads/rwlock.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -105,7 +105,7 @@ rwl_entry(rwlock_t *rwlp) */ readlockp = lmalloc(nlocks * 2 * sizeof (readlock_t)); (void) _memcpy(readlockp, self->ul_readlock.array, - nlocks * sizeof (readlock_t)); + nlocks * sizeof (readlock_t)); lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t)); self->ul_readlock.array = readlockp; self->ul_rdlockcnt *= 2; @@ -254,7 +254,7 @@ read_lock_try(rwlock_t *rwlp, int ignore_waiters_flag) { volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; uint32_t mask = ignore_waiters_flag? - URW_WRITE_LOCKED : (URW_HAS_WAITERS | URW_WRITE_LOCKED); + URW_WRITE_LOCKED : (URW_HAS_WAITERS | URW_WRITE_LOCKED); uint32_t readers; ulwp_t *self = curthread; @@ -298,8 +298,8 @@ write_lock_try(rwlock_t *rwlp, int ignore_waiters_flag) { volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; uint32_t mask = ignore_waiters_flag? - (URW_WRITE_LOCKED | URW_READERS_MASK) : - (URW_HAS_WAITERS | URW_WRITE_LOCKED | URW_READERS_MASK); + (URW_WRITE_LOCKED | URW_READERS_MASK) : + (URW_HAS_WAITERS | URW_WRITE_LOCKED | URW_READERS_MASK); ulwp_t *self = curthread; uint32_t readers; @@ -347,12 +347,12 @@ rw_queue_release(queue_head_t *qp, rwlock_t *rwlp) volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; uint32_t readers; uint32_t writers; - int nlwpid = 0; - int maxlwps = MAXLWPS; - ulwp_t *self; ulwp_t **ulwpp; ulwp_t *ulwp; - ulwp_t *prev = NULL; + ulwp_t *prev; + int nlwpid = 0; + int more; + int maxlwps = MAXLWPS; lwpid_t buffer[MAXLWPS]; lwpid_t *lwpid = buffer; @@ -366,9 +366,9 @@ rw_queue_release(queue_head_t *qp, rwlock_t *rwlp) writers = 0; /* - * Walk the list of waiters and prepare to wake up as - * many readers as we encounter before encountering - * a writer. If the first thread on the list is a + * Examine the queue of waiters in priority order and prepare + * to wake up as many readers as we encounter before encountering + * a writer. If the highest priority thread on the queue is a * writer, stop there and wake it up. * * We keep track of lwpids that are to be unparked in lwpid[]. @@ -383,13 +383,9 @@ rw_queue_release(queue_head_t *qp, rwlock_t *rwlp) * alloc_lwpids() to allocate a bigger buffer using the mmap() * system call directly since that path acquires no locks. */ - ulwpp = &qp->qh_head; - while ((ulwp = *ulwpp) != NULL) { - if (ulwp->ul_wchan != rwlp) { - prev = ulwp; - ulwpp = &ulwp->ul_link; - continue; - } + while ((ulwpp = queue_slot(qp, &prev, &more)) != NULL) { + ulwp = *ulwpp; + ASSERT(ulwp->ul_wchan == rwlp); if (ulwp->ul_writer) { if (writers != 0 || readers != 0) break; @@ -403,15 +399,17 @@ rw_queue_release(queue_head_t *qp, rwlock_t *rwlp) if (nlwpid == maxlwps) lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); } - (void) queue_unlink(qp, ulwpp, prev); + queue_unlink(qp, ulwpp, prev); + ulwp->ul_sleepq = NULL; + ulwp->ul_wchan = NULL; lwpid[nlwpid++] = ulwp->ul_lwpid; } - if (ulwp == NULL) + if (ulwpp == NULL) atomic_and_32(rwstate, ~URW_HAS_WAITERS); if (nlwpid == 0) { queue_unlock(qp); } else { - self = curthread; + ulwp_t *self = curthread; no_preempt(self); queue_unlock(qp); if (nlwpid == 1) @@ -440,7 +438,6 @@ shared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr) { volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; mutex_t *mp = &rwlp->mutex; - /* LINTED set but not used */ uint32_t readers; int try_flag; int error; @@ -517,6 +514,7 @@ rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr) queue_head_t *qp; ulwp_t *ulwp; int try_flag; + int ignore_waiters_flag; int error = 0; try_flag = (rd_wr & TRY_FLAG); @@ -528,15 +526,18 @@ rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr) } qp = queue_lock(rwlp, MX); -retry: + /* initial attempt to acquire the lock fails if there are waiters */ + ignore_waiters_flag = 0; while (error == 0) { if (rd_wr == READ_LOCK) { - if (read_lock_try(rwlp, 0)) - goto out; + if (read_lock_try(rwlp, ignore_waiters_flag)) + break; } else { - if (write_lock_try(rwlp, 0)) - goto out; + if (write_lock_try(rwlp, ignore_waiters_flag)) + break; } + /* subsequent attempts do not fail due to waiters */ + ignore_waiters_flag = 1; atomic_or_32(rwstate, URW_HAS_WAITERS); readers = *rwstate; ASSERT_CONSISTENT_STATE(readers); @@ -544,10 +545,15 @@ retry: (rd_wr == WRITE_LOCK && (readers & URW_READERS_MASK) != 0)) /* EMPTY */; /* somebody holds the lock */ - else if ((ulwp = queue_waiter(qp, rwlp)) == NULL) { + else if ((ulwp = queue_waiter(qp)) == NULL) { atomic_and_32(rwstate, ~URW_HAS_WAITERS); - break; /* no queued waiters */ + continue; /* no queued waiters, try again */ } else { + /* + * Do a priority check on the queued waiter (the + * highest priority thread on the queue) to see + * if we should defer to him or just grab the lock. + */ int our_pri = real_priority(self); int his_pri = real_priority(ulwp); @@ -557,7 +563,7 @@ retry: * a higher priority than ours. */ if (his_pri <= our_pri) - break; + continue; /* try again */ } else { /* * We defer to a queued thread that has @@ -566,7 +572,7 @@ retry: */ if (his_pri < our_pri || (his_pri == our_pri && !ulwp->ul_writer)) - break; + continue; /* try again */ } } /* @@ -578,33 +584,21 @@ retry: break; } /* - * Enqueue writers ahead of readers of the - * same priority. + * Enqueue writers ahead of readers. */ self->ul_writer = rd_wr; /* *must* be 0 or 1 */ - enqueue(qp, self, rwlp, MX); + enqueue(qp, self, 0); set_parking_flag(self, 1); queue_unlock(qp); if ((error = __lwp_park(tsp, 0)) == EINTR) - error = 0; - self->ul_writer = 0; + error = ignore_waiters_flag = 0; set_parking_flag(self, 0); qp = queue_lock(rwlp, MX); - if (self->ul_sleepq && dequeue_self(qp, rwlp) == 0) + if (self->ul_sleepq && dequeue_self(qp) == 0) atomic_and_32(rwstate, ~URW_HAS_WAITERS); + self->ul_writer = 0; } - if (error == 0) { - if (rd_wr == READ_LOCK) { - if (!read_lock_try(rwlp, 1)) - goto retry; - } else { - if (!write_lock_try(rwlp, 1)) - goto retry; - } - } - -out: queue_unlock(qp); if (!try_flag) { diff --git a/usr/src/lib/libc/port/threads/sema.c b/usr/src/lib/libc/port/threads/sema.c index f2894a6df7..1378facf49 100644 --- a/usr/src/lib/libc/port/threads/sema.c +++ b/usr/src/lib/libc/port/threads/sema.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -123,7 +123,6 @@ sema_wait_impl(sema_t *sp, timespec_t *tsp) } else { /* multithreaded or blocking */ queue_head_t *qp; ulwp_t *ulwp; - int more; lwpid_t lwpid = 0; qp = queue_lock(lsp, CV); @@ -132,7 +131,7 @@ sema_wait_impl(sema_t *sp, timespec_t *tsp) * SUSV3 requires FIFO queueing for semaphores, * at least for SCHED_FIFO and SCHED_RR scheduling. */ - enqueue(qp, self, lsp, CV | FIFOQ); + enqueue(qp, self, 1); lsp->sema_waiters = 1; set_parking_flag(self, 1); queue_unlock(qp); @@ -148,18 +147,17 @@ sema_wait_impl(sema_t *sp, timespec_t *tsp) set_parking_flag(self, 0); qp = queue_lock(lsp, CV); if (self->ul_sleepq) /* timeout or spurious wakeup */ - lsp->sema_waiters = dequeue_self(qp, lsp); + lsp->sema_waiters = dequeue_self(qp); } if (error == 0) lsp->count--; if (lsp->count != 0 && lsp->sema_waiters) { - if ((ulwp = dequeue(qp, lsp, &more)) == NULL) - lsp->sema_waiters = 0; - else { + int more; + if ((ulwp = dequeue(qp, &more)) != NULL) { no_preempt(self); lwpid = ulwp->ul_lwpid; - lsp->sema_waiters = (more? 1 : 0); } + lsp->sema_waiters = more; } queue_unlock(qp); if (lwpid) { @@ -245,20 +243,18 @@ _sema_trywait(sema_t *sp) } else { /* multithreaded */ queue_head_t *qp; ulwp_t *ulwp; - int more; lwpid_t lwpid = 0; qp = queue_lock(lsp, CV); if (lsp->count == 0) error = EBUSY; else if (--lsp->count != 0 && lsp->sema_waiters) { - if ((ulwp = dequeue(qp, lsp, &more)) == NULL) - lsp->sema_waiters = 0; - else { + int more; + if ((ulwp = dequeue(qp, &more)) != NULL) { no_preempt(self); lwpid = ulwp->ul_lwpid; - lsp->sema_waiters = (more? 1 : 0); } + lsp->sema_waiters = more; } queue_unlock(qp); if (lwpid) { @@ -314,20 +310,18 @@ _sema_post(sema_t *sp) } else { /* multithreaded */ queue_head_t *qp; ulwp_t *ulwp; - int more; lwpid_t lwpid = 0; qp = queue_lock(lsp, CV); if (lsp->count >= _semvaluemax) error = EOVERFLOW; else if (lsp->count++ == 0 && lsp->sema_waiters) { - if ((ulwp = dequeue(qp, lsp, &more)) == NULL) - lsp->sema_waiters = 0; - else { + int more; + if ((ulwp = dequeue(qp, &more)) != NULL) { no_preempt(self); lwpid = ulwp->ul_lwpid; - lsp->sema_waiters = (more? 1 : 0); } + lsp->sema_waiters = more; } queue_unlock(qp); if (lwpid) { diff --git a/usr/src/lib/libc/port/threads/spawn.c b/usr/src/lib/libc/port/threads/spawn.c index 2e81ab0634..5c0d505a24 100644 --- a/usr/src/lib/libc/port/threads/spawn.c +++ b/usr/src/lib/libc/port/threads/spawn.c @@ -30,14 +30,9 @@ #include "thr_uberdata.h" #include <sys/libc_kernel.h> #include <sys/procset.h> -#include <sys/rtpriocntl.h> -#include <sys/tspriocntl.h> #include <sys/fork.h> -#include <sys/rt.h> -#include <sys/ts.h> #include <alloca.h> #include <spawn.h> -#include "rtsched.h" #define ALL_POSIX_SPAWN_FLAGS \ (POSIX_SPAWN_RESETIDS | \ @@ -50,8 +45,8 @@ POSIX_SPAWN_WAITPID_NP) typedef struct { - short sa_psflags; /* POSIX_SPAWN_* flags */ - pri_t sa_priority; + int sa_psflags; /* POSIX_SPAWN_* flags */ + int sa_priority; int sa_schedpolicy; pid_t sa_pgroup; sigset_t sa_sigdefault; @@ -70,8 +65,6 @@ typedef struct file_attr { int fa_newfiledes; /* new file descriptor for dup2() */ } file_attr_t; -extern struct pcclass ts_class, rt_class; - extern pid_t _vforkx(int); #pragma unknown_control_flow(_vforkx) extern void *_private_memset(void *, int, size_t); @@ -89,95 +82,6 @@ extern uid_t _private_getuid(void); extern uid_t _private_geteuid(void); extern void _private_exit(int); -/* - * We call this function rather than priocntl() because we must not call - * any function that is exported from libc while in the child of vfork(). - * Also, we are not using PC_GETXPARMS or PC_SETXPARMS so we can use - * the simple call to __priocntlset() rather than the varargs version. - */ -static long -_private_priocntl(idtype_t idtype, id_t id, int cmd, caddr_t arg) -{ - extern long _private__priocntlset(int, procset_t *, int, caddr_t, ...); - procset_t procset; - - setprocset(&procset, POP_AND, idtype, id, P_ALL, 0); - return (_private__priocntlset(PC_VERSION, &procset, cmd, arg, 0)); -} - -/* - * The following two functions are blatently stolen from - * sched_setscheduler() and sched_setparam() in librt. - * This would be a lot easier if librt were folded into libc. - */ -static int -setscheduler(int policy, pri_t prio) -{ - pcparms_t pcparm; - tsinfo_t *tsi; - tsparms_t *tsp; - int scale; - - switch (policy) { - case SCHED_FIFO: - case SCHED_RR: - if (prio < rt_class.pcc_primin || prio > rt_class.pcc_primax) { - errno = EINVAL; - return (-1); - } - pcparm.pc_cid = rt_class.pcc_info.pc_cid; - ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio; - ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = - (policy == SCHED_RR ? RT_TQDEF : RT_TQINF); - break; - - case SCHED_OTHER: - pcparm.pc_cid = ts_class.pcc_info.pc_cid; - tsi = (tsinfo_t *)ts_class.pcc_info.pc_clinfo; - scale = tsi->ts_maxupri; - tsp = (tsparms_t *)pcparm.pc_clparms; - tsp->ts_uprilim = tsp->ts_upri = -(scale * prio) / 20; - break; - - default: - errno = EINVAL; - return (-1); - } - - return (_private_priocntl(P_PID, P_MYID, - PC_SETPARMS, (caddr_t)&pcparm)); -} - -static int -setparam(pcparms_t *pcparmp, pri_t prio) -{ - tsparms_t *tsp; - tsinfo_t *tsi; - int scale; - - if (pcparmp->pc_cid == rt_class.pcc_info.pc_cid) { - /* SCHED_FIFO or SCHED_RR policy */ - if (prio < rt_class.pcc_primin || prio > rt_class.pcc_primax) { - errno = EINVAL; - return (-1); - } - ((rtparms_t *)pcparmp->pc_clparms)->rt_tqnsecs = RT_NOCHANGE; - ((rtparms_t *)pcparmp->pc_clparms)->rt_pri = prio; - } else if (pcparmp->pc_cid == ts_class.pcc_info.pc_cid) { - /* SCHED_OTHER policy */ - tsi = (tsinfo_t *)ts_class.pcc_info.pc_clinfo; - scale = tsi->ts_maxupri; - tsp = (tsparms_t *)pcparmp->pc_clparms; - tsp->ts_uprilim = tsp->ts_upri = -(scale * prio) / 20; - } else { - errno = EINVAL; - return (-1); - } - - return (_private_priocntl(P_PID, P_MYID, - PC_SETPARMS, (caddr_t)pcparmp)); -} - static int perform_flag_actions(spawn_attr_t *sap) { @@ -209,20 +113,11 @@ perform_flag_actions(spawn_attr_t *sap) } if (sap->sa_psflags & POSIX_SPAWN_SETSCHEDULER) { - if (setscheduler(sap->sa_schedpolicy, sap->sa_priority) != 0) + if (setparam(P_LWPID, P_MYID, + sap->sa_schedpolicy, sap->sa_priority) == -1) return (errno); } else if (sap->sa_psflags & POSIX_SPAWN_SETSCHEDPARAM) { - /* - * Get the process's current scheduling parameters, - * then modify to set the new priority. - */ - pcparms_t pcparm; - - pcparm.pc_cid = PC_CLNULL; - if (_private_priocntl(P_PID, P_MYID, - PC_GETPARMS, (caddr_t)&pcparm) == -1) - return (errno); - if (setparam(&pcparm, sap->sa_priority) != 0) + if (setprio(P_LWPID, P_MYID, sap->sa_priority, NULL) == -1) return (errno); } @@ -682,17 +577,6 @@ _posix_spawnattr_setflags( (flags & ~ALL_POSIX_SPAWN_FLAGS)) return (EINVAL); - if (flags & (POSIX_SPAWN_SETSCHEDPARAM | POSIX_SPAWN_SETSCHEDULER)) { - /* - * Populate ts_class and rt_class. - * We will need them in the child of vfork(). - */ - if (rt_class.pcc_state == 0) - (void) get_info_by_policy(SCHED_FIFO); - if (ts_class.pcc_state == 0) - (void) get_info_by_policy(SCHED_OTHER); - } - sap->sa_psflags = flags; return (0); } @@ -789,17 +673,15 @@ _posix_spawnattr_setschedpolicy( { spawn_attr_t *sap = attr->__spawn_attrp; - if (sap == NULL) + if (sap == NULL || schedpolicy == SCHED_SYS) return (EINVAL); - switch (schedpolicy) { - case SCHED_OTHER: - case SCHED_FIFO: - case SCHED_RR: - break; - default: - return (EINVAL); - } + /* + * Cache the policy information for later use + * by the vfork() child of posix_spawn(). + */ + if (get_info_by_policy(schedpolicy) == NULL) + return (errno); sap->sa_schedpolicy = schedpolicy; return (0); diff --git a/usr/src/lib/libc/port/threads/synch.c b/usr/src/lib/libc/port/threads/synch.c index b6a5be6634..626252671a 100644 --- a/usr/src/lib/libc/port/threads/synch.c +++ b/usr/src/lib/libc/port/threads/synch.c @@ -30,9 +30,22 @@ #include "lint.h" #include "thr_uberdata.h" +#include <sys/rtpriocntl.h> #include <sys/sdt.h> #include <atomic.h> +#if defined(THREAD_DEBUG) +#define INCR32(x) (((x) != UINT32_MAX)? (x)++ : 0) +#define INCR(x) ((x)++) +#define DECR(x) ((x)--) +#define MAXINCR(m, x) ((m < ++x)? (m = x) : 0) +#else +#define INCR32(x) +#define INCR(x) +#define DECR(x) +#define MAXINCR(m, x) +#endif + /* * This mutex is initialized to be held by lwp#1. * It is used to block a thread that has returned from a mutex_lock() @@ -120,7 +133,9 @@ int __mutex_init(mutex_t *mp, int type, void *arg) { int basetype = (type & ~ALL_ATTRIBUTES); + const pcclass_t *pccp; int error = 0; + int ceil; if (basetype == USYNC_PROCESS_ROBUST) { /* @@ -134,9 +149,14 @@ __mutex_init(mutex_t *mp, int type, void *arg) basetype = USYNC_PROCESS; } - if (!(basetype == USYNC_THREAD || basetype == USYNC_PROCESS) || + if (type & LOCK_PRIO_PROTECT) + pccp = get_info_by_policy(SCHED_FIFO); + if ((basetype != USYNC_THREAD && basetype != USYNC_PROCESS) || (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) - == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) { + == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT) || + ((type & LOCK_PRIO_PROTECT) && + ((ceil = *(int *)arg) < pccp->pcc_primin || + ceil > pccp->pcc_primax))) { error = EINVAL; } else if (type & LOCK_ROBUST) { /* @@ -156,8 +176,7 @@ __mutex_init(mutex_t *mp, int type, void *arg) _atomic_or_16(&mp->mutex_flag, LOCK_INITED); mp->mutex_magic = MUTEX_MAGIC; } else if (type != mp->mutex_type || - ((type & LOCK_PRIO_PROTECT) && - mp->mutex_ceiling != (*(int *)arg))) { + ((type & LOCK_PRIO_PROTECT) && mp->mutex_ceiling != ceil)) { error = EINVAL; } else if (__mutex_consistent(mp) != 0) { error = EBUSY; @@ -172,14 +191,15 @@ __mutex_init(mutex_t *mp, int type, void *arg) mp->mutex_magic = MUTEX_MAGIC; } - if (error == 0 && (type & LOCK_PRIO_PROTECT)) - mp->mutex_ceiling = (uint8_t)(*(int *)arg); + if (error == 0 && (type & LOCK_PRIO_PROTECT)) { + mp->mutex_ceiling = ceil; + } return (error); } /* - * Delete mp from list of ceil mutexes owned by curthread. + * Delete mp from list of ceiling mutexes owned by curthread. * Return 1 if the head of the chain was updated. */ int @@ -189,17 +209,20 @@ _ceil_mylist_del(mutex_t *mp) mxchain_t **mcpp; mxchain_t *mcp; - mcpp = &self->ul_mxchain; - while ((*mcpp)->mxchain_mx != mp) - mcpp = &(*mcpp)->mxchain_next; - mcp = *mcpp; - *mcpp = mcp->mxchain_next; - lfree(mcp, sizeof (*mcp)); - return (mcpp == &self->ul_mxchain); + for (mcpp = &self->ul_mxchain; + (mcp = *mcpp) != NULL; + mcpp = &mcp->mxchain_next) { + if (mcp->mxchain_mx == mp) { + *mcpp = mcp->mxchain_next; + lfree(mcp, sizeof (*mcp)); + return (mcpp == &self->ul_mxchain); + } + } + return (0); } /* - * Add mp to head of list of ceil mutexes owned by curthread. + * Add mp to the list of ceiling mutexes owned by curthread. * Return ENOMEM if no memory could be allocated. */ int @@ -217,26 +240,30 @@ _ceil_mylist_add(mutex_t *mp) } /* - * Inherit priority from ceiling. The inheritance impacts the effective - * priority, not the assigned priority. See _thread_setschedparam_main(). + * Helper function for _ceil_prio_inherit() and _ceil_prio_waive(), below. + */ +static void +set_rt_priority(ulwp_t *self, int prio) +{ + pcparms_t pcparm; + + pcparm.pc_cid = self->ul_rtclassid; + ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = RT_NOCHANGE; + ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio; + (void) _private_priocntl(P_LWPID, self->ul_lwpid, PC_SETPARMS, &pcparm); +} + +/* + * Inherit priority from ceiling. + * This changes the effective priority, not the assigned priority. */ void -_ceil_prio_inherit(int ceil) +_ceil_prio_inherit(int prio) { ulwp_t *self = curthread; - struct sched_param param; - (void) _memset(¶m, 0, sizeof (param)); - param.sched_priority = ceil; - if (_thread_setschedparam_main(self->ul_lwpid, - self->ul_policy, ¶m, PRIO_INHERIT)) { - /* - * Panic since unclear what error code to return. - * If we do return the error codes returned by above - * called routine, update the man page... - */ - thr_panic("_thread_setschedparam_main() fails"); - } + self->ul_epri = prio; + set_rt_priority(self, prio); } /* @@ -248,30 +275,17 @@ void _ceil_prio_waive(void) { ulwp_t *self = curthread; - struct sched_param param; + mxchain_t *mcp = self->ul_mxchain; + int prio; - (void) _memset(¶m, 0, sizeof (param)); - if (self->ul_mxchain == NULL) { - /* - * No ceil locks held. Zero the epri, revert back to ul_pri. - * Since thread's hash lock is not held, one cannot just - * read ul_pri here...do it in the called routine... - */ - param.sched_priority = self->ul_pri; /* ignored */ - if (_thread_setschedparam_main(self->ul_lwpid, - self->ul_policy, ¶m, PRIO_DISINHERIT)) - thr_panic("_thread_setschedparam_main() fails"); + if (mcp == NULL) { + prio = self->ul_pri; + self->ul_epri = 0; } else { - /* - * Set priority to that of the mutex at the head - * of the ceilmutex chain. - */ - param.sched_priority = - self->ul_mxchain->mxchain_mx->mutex_ceiling; - if (_thread_setschedparam_main(self->ul_lwpid, - self->ul_policy, ¶m, PRIO_INHERIT)) - thr_panic("_thread_setschedparam_main() fails"); + prio = mcp->mxchain_mx->mutex_ceiling; + self->ul_epri = prio; } + set_rt_priority(self, prio); } /* @@ -386,8 +400,7 @@ spin_lock_set(mutex_t *mp) /* * Spin for a while, attempting to acquire the lock. */ - if (self->ul_spin_lock_spin != UINT_MAX) - self->ul_spin_lock_spin++; + INCR32(self->ul_spin_lock_spin); if (mutex_queuelock_adaptive(mp) == 0 || set_lock_byte(&mp->mutex_lockw) == 0) { mp->mutex_owner = (uintptr_t)self; @@ -397,8 +410,7 @@ spin_lock_set(mutex_t *mp) * Try harder if we were previously at a no premption level. */ if (self->ul_preempt > 1) { - if (self->ul_spin_lock_spin2 != UINT_MAX) - self->ul_spin_lock_spin2++; + INCR32(self->ul_spin_lock_spin2); if (mutex_queuelock_adaptive(mp) == 0 || set_lock_byte(&mp->mutex_lockw) == 0) { mp->mutex_owner = (uintptr_t)self; @@ -408,8 +420,7 @@ spin_lock_set(mutex_t *mp) /* * Give up and block in the kernel for the mutex. */ - if (self->ul_spin_lock_sleep != UINT_MAX) - self->ul_spin_lock_sleep++; + INCR32(self->ul_spin_lock_sleep); (void) ___lwp_mutex_timedlock(mp, NULL); mp->mutex_owner = (uintptr_t)self; } @@ -422,8 +433,7 @@ spin_lock_clear(mutex_t *mp) mp->mutex_owner = 0; if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { (void) ___lwp_mutex_wakeup(mp, 0); - if (self->ul_spin_lock_wakeup != UINT_MAX) - self->ul_spin_lock_wakeup++; + INCR32(self->ul_spin_lock_wakeup); } preempt(self); } @@ -436,7 +446,7 @@ queue_alloc(void) { ulwp_t *self = curthread; uberdata_t *udp = self->ul_uberdata; - mutex_t *mp; + queue_head_t *qp; void *data; int i; @@ -449,11 +459,16 @@ queue_alloc(void) PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0)) == MAP_FAILED) thr_panic("cannot allocate thread queue_head table"); - udp->queue_head = (queue_head_t *)data; - for (i = 0; i < 2 * QHASHSIZE; i++) { - mp = &udp->queue_head[i].qh_lock; - mp->mutex_flag = LOCK_INITED; - mp->mutex_magic = MUTEX_MAGIC; + udp->queue_head = qp = (queue_head_t *)data; + for (i = 0; i < 2 * QHASHSIZE; qp++, i++) { + qp->qh_type = (i < QHASHSIZE)? MX : CV; + qp->qh_lock.mutex_flag = LOCK_INITED; + qp->qh_lock.mutex_magic = MUTEX_MAGIC; + qp->qh_hlist = &qp->qh_def_root; +#if defined(THREAD_DEBUG) + qp->qh_hlen = 1; + qp->qh_hmax = 1; +#endif } } @@ -467,31 +482,43 @@ QVERIFY(queue_head_t *qp) { ulwp_t *self = curthread; uberdata_t *udp = self->ul_uberdata; + queue_root_t *qrp; ulwp_t *ulwp; ulwp_t *prev; uint_t index; - uint32_t cnt = 0; + uint32_t cnt; char qtype; void *wchan; ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE); ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); - ASSERT((qp->qh_head != NULL && qp->qh_tail != NULL) || - (qp->qh_head == NULL && qp->qh_tail == NULL)); + for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) { + cnt++; + ASSERT((qrp->qr_head != NULL && qrp->qr_tail != NULL) || + (qrp->qr_head == NULL && qrp->qr_tail == NULL)); + } + ASSERT(qp->qh_hlen == cnt && qp->qh_hmax >= cnt); + qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV; + ASSERT(qp->qh_type == qtype); if (!thread_queue_verify) return; /* real expensive stuff, only for _THREAD_QUEUE_VERIFY */ - qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV; - for (prev = NULL, ulwp = qp->qh_head; ulwp != NULL; - prev = ulwp, ulwp = ulwp->ul_link, cnt++) { - ASSERT(ulwp->ul_qtype == qtype); - ASSERT(ulwp->ul_wchan != NULL); - ASSERT(ulwp->ul_sleepq == qp); - wchan = ulwp->ul_wchan; - index = QUEUE_HASH(wchan, qtype); - ASSERT(&udp->queue_head[index] == qp); - } - ASSERT(qp->qh_tail == prev); + for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) { + for (prev = NULL, ulwp = qrp->qr_head; ulwp != NULL; + prev = ulwp, ulwp = ulwp->ul_link) { + cnt++; + if (ulwp->ul_writer) + ASSERT(prev == NULL || prev->ul_writer); + ASSERT(ulwp->ul_qtype == qtype); + ASSERT(ulwp->ul_wchan != NULL); + ASSERT(ulwp->ul_sleepq == qp); + wchan = ulwp->ul_wchan; + ASSERT(qrp->qr_wchan == wchan); + index = QUEUE_HASH(wchan, qtype); + ASSERT(&udp->queue_head[index] == qp); + } + ASSERT(qrp->qr_tail == prev); + } ASSERT(qp->qh_qlen == cnt); } @@ -509,6 +536,7 @@ queue_lock(void *wchan, int qtype) { uberdata_t *udp = curthread->ul_uberdata; queue_head_t *qp; + queue_root_t *qrp; ASSERT(qtype == MX || qtype == CV); @@ -522,11 +550,20 @@ queue_lock(void *wchan, int qtype) } qp += QUEUE_HASH(wchan, qtype); spin_lock_set(&qp->qh_lock); - /* - * At once per nanosecond, qh_lockcount will wrap after 512 years. - * Were we to care about this, we could peg the value at UINT64_MAX. - */ - qp->qh_lockcount++; + for (qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) + if (qrp->qr_wchan == wchan) + break; + if (qrp == NULL && qp->qh_def_root.qr_head == NULL) { + /* the default queue root is available; use it */ + qrp = &qp->qh_def_root; + qrp->qr_wchan = wchan; + ASSERT(qrp->qr_next == NULL); + ASSERT(qrp->qr_tail == NULL && + qrp->qr_rtcount == 0 && qrp->qr_qlen == 0); + } + qp->qh_wchan = wchan; /* valid until queue_unlock() is called */ + qp->qh_root = qrp; /* valid until queue_unlock() is called */ + INCR32(qp->qh_lockcount); QVERIFY(qp); return (qp); } @@ -549,19 +586,33 @@ queue_unlock(queue_head_t *qp) #define CMP_PRIO(ulwp) ((real_priority(ulwp) << 1) + (ulwp)->ul_writer) void -enqueue(queue_head_t *qp, ulwp_t *ulwp, void *wchan, int qtype) +enqueue(queue_head_t *qp, ulwp_t *ulwp, int force_fifo) { + queue_root_t *qrp; ulwp_t **ulwpp; ulwp_t *next; int pri = CMP_PRIO(ulwp); - int force_fifo = (qtype & FIFOQ); - int do_fifo; - qtype &= ~FIFOQ; - ASSERT(qtype == MX || qtype == CV); ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); ASSERT(ulwp->ul_sleepq != qp); + if ((qrp = qp->qh_root) == NULL) { + /* use the thread's queue root for the linkage */ + qrp = &ulwp->ul_queue_root; + qrp->qr_next = qp->qh_hlist; + qrp->qr_prev = NULL; + qrp->qr_head = NULL; + qrp->qr_tail = NULL; + qrp->qr_wchan = qp->qh_wchan; + qrp->qr_rtcount = 0; + qrp->qr_qlen = 0; + qrp->qr_qmax = 0; + qp->qh_hlist->qr_prev = qrp; + qp->qh_hlist = qrp; + qp->qh_root = qrp; + MAXINCR(qp->qh_hmax, qp->qh_hlen); + } + /* * LIFO queue ordering is unfair and can lead to starvation, * but it gives better performance for heavily contended locks. @@ -580,30 +631,28 @@ enqueue(queue_head_t *qp, ulwp_t *ulwp, void *wchan, int qtype) * This breaks live lock conditions that occur in applications * that are written assuming (incorrectly) that threads acquire * locks fairly, that is, in roughly round-robin order. - * In any event, the queue is maintained in priority order. + * In any event, the queue is maintained in kernel priority order. * - * If we are given the FIFOQ flag in qtype, fifo queueing is forced. + * If force_fifo is non-zero, fifo queueing is forced. * SUSV3 requires this for semaphores. */ - do_fifo = (force_fifo || - ((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0); - - if (qp->qh_head == NULL) { + if (qrp->qr_head == NULL) { /* * The queue is empty. LIFO/FIFO doesn't matter. */ - ASSERT(qp->qh_tail == NULL); - ulwpp = &qp->qh_head; - } else if (do_fifo) { + ASSERT(qrp->qr_tail == NULL); + ulwpp = &qrp->qr_head; + } else if (force_fifo | + (((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0)) { /* * Enqueue after the last thread whose priority is greater * than or equal to the priority of the thread being queued. * Attempt first to go directly onto the tail of the queue. */ - if (pri <= CMP_PRIO(qp->qh_tail)) - ulwpp = &qp->qh_tail->ul_link; + if (pri <= CMP_PRIO(qrp->qr_tail)) + ulwpp = &qrp->qr_tail->ul_link; else { - for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL; + for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL; ulwpp = &next->ul_link) if (pri > CMP_PRIO(next)) break; @@ -614,174 +663,262 @@ enqueue(queue_head_t *qp, ulwp_t *ulwp, void *wchan, int qtype) * than or equal to the priority of the thread being queued. * Hopefully we can go directly onto the head of the queue. */ - for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL; + for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL; ulwpp = &next->ul_link) if (pri >= CMP_PRIO(next)) break; } if ((ulwp->ul_link = *ulwpp) == NULL) - qp->qh_tail = ulwp; + qrp->qr_tail = ulwp; *ulwpp = ulwp; ulwp->ul_sleepq = qp; - ulwp->ul_wchan = wchan; - ulwp->ul_qtype = qtype; - if (qp->qh_qmax < ++qp->qh_qlen) - qp->qh_qmax = qp->qh_qlen; + ulwp->ul_wchan = qp->qh_wchan; + ulwp->ul_qtype = qp->qh_type; + if ((ulwp->ul_schedctl != NULL && + ulwp->ul_schedctl->sc_cid == ulwp->ul_rtclassid) | + ulwp->ul_pilocks) { + ulwp->ul_rtqueued = 1; + qrp->qr_rtcount++; + } + MAXINCR(qrp->qr_qmax, qrp->qr_qlen); + MAXINCR(qp->qh_qmax, qp->qh_qlen); } /* - * Return a pointer to the queue slot of the - * highest priority thread on the queue. - * On return, prevp, if not NULL, will contain a pointer - * to the thread's predecessor on the queue + * Helper function for queue_slot() and queue_slot_rt(). + * Try to find a non-suspended thread on the queue. */ static ulwp_t ** -queue_slot(queue_head_t *qp, void *wchan, int *more, ulwp_t **prevp) +queue_slot_runnable(ulwp_t **ulwpp, ulwp_t **prevp, int rt) { - ulwp_t **ulwpp; ulwp_t *ulwp; - ulwp_t *prev = NULL; - ulwp_t **suspp = NULL; - ulwp_t *susprev; - - ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); + ulwp_t **foundpp = NULL; + int priority = -1; + ulwp_t *prev; + int tpri; - /* - * Find a waiter on the sleep queue. - */ - for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; + for (prev = NULL; + (ulwp = *ulwpp) != NULL; prev = ulwp, ulwpp = &ulwp->ul_link) { - if (ulwp->ul_wchan == wchan) { - if (!ulwp->ul_stop) + if (ulwp->ul_stop) /* skip suspended threads */ + continue; + tpri = rt? CMP_PRIO(ulwp) : 0; + if (tpri > priority) { + foundpp = ulwpp; + *prevp = prev; + priority = tpri; + if (!rt) break; - /* - * Try not to return a suspended thread. - * This mimics the old libthread's behavior. - */ - if (suspp == NULL) { - suspp = ulwpp; - susprev = prev; - } } } + return (foundpp); +} + +/* + * For real-time, we search the entire queue because the dispatch + * (kernel) priorities may have changed since enqueueing. + */ +static ulwp_t ** +queue_slot_rt(ulwp_t **ulwpp_org, ulwp_t **prevp) +{ + ulwp_t **ulwpp = ulwpp_org; + ulwp_t *ulwp = *ulwpp; + ulwp_t **foundpp = ulwpp; + int priority = CMP_PRIO(ulwp); + ulwp_t *prev; + int tpri; - if (ulwp == NULL && suspp != NULL) { - ulwp = *(ulwpp = suspp); - prev = susprev; - suspp = NULL; + for (prev = ulwp, ulwpp = &ulwp->ul_link; + (ulwp = *ulwpp) != NULL; + prev = ulwp, ulwpp = &ulwp->ul_link) { + tpri = CMP_PRIO(ulwp); + if (tpri > priority) { + foundpp = ulwpp; + *prevp = prev; + priority = tpri; + } } - if (ulwp == NULL) { - if (more != NULL) - *more = 0; - return (NULL); + ulwp = *foundpp; + + /* + * Try not to return a suspended thread. + * This mimics the old libthread's behavior. + */ + if (ulwp->ul_stop && + (ulwpp = queue_slot_runnable(ulwpp_org, prevp, 1)) != NULL) { + foundpp = ulwpp; + ulwp = *foundpp; } + ulwp->ul_rt = 1; + return (foundpp); +} - if (prevp != NULL) - *prevp = prev; - if (more == NULL) - return (ulwpp); +ulwp_t ** +queue_slot(queue_head_t *qp, ulwp_t **prevp, int *more) +{ + queue_root_t *qrp; + ulwp_t **ulwpp; + ulwp_t *ulwp; + int rt; + ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); + + if ((qrp = qp->qh_root) == NULL || (ulwp = qrp->qr_head) == NULL) { + *more = 0; + return (NULL); /* no lwps on the queue */ + } + rt = (qrp->qr_rtcount != 0); + *prevp = NULL; + if (ulwp->ul_link == NULL) { /* only one lwp on the queue */ + *more = 0; + ulwp->ul_rt = rt; + return (&qrp->qr_head); + } + *more = 1; + + if (rt) /* real-time queue */ + return (queue_slot_rt(&qrp->qr_head, prevp)); /* - * Scan the remainder of the queue for another waiter. + * Try not to return a suspended thread. + * This mimics the old libthread's behavior. */ - if (suspp != NULL) { - *more = 1; + if (ulwp->ul_stop && + (ulwpp = queue_slot_runnable(&qrp->qr_head, prevp, 0)) != NULL) { + ulwp = *ulwpp; + ulwp->ul_rt = 0; return (ulwpp); } - for (ulwp = ulwp->ul_link; ulwp != NULL; ulwp = ulwp->ul_link) { - if (ulwp->ul_wchan == wchan) { - *more = 1; - return (ulwpp); - } - } - - *more = 0; - return (ulwpp); + /* + * The common case; just pick the first thread on the queue. + */ + ulwp->ul_rt = 0; + return (&qrp->qr_head); } -ulwp_t * +/* + * Common code for unlinking an lwp from a user-level sleep queue. + */ +void queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev) { - ulwp_t *ulwp; + queue_root_t *qrp = qp->qh_root; + queue_root_t *nqrp; + ulwp_t *ulwp = *ulwpp; + ulwp_t *next; - ulwp = *ulwpp; - *ulwpp = ulwp->ul_link; - ulwp->ul_link = NULL; - if (qp->qh_tail == ulwp) - qp->qh_tail = prev; - qp->qh_qlen--; - ulwp->ul_sleepq = NULL; - ulwp->ul_wchan = NULL; + ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); + ASSERT(qp->qh_wchan != NULL && ulwp->ul_wchan == qp->qh_wchan); - return (ulwp); + DECR(qp->qh_qlen); + DECR(qrp->qr_qlen); + if (ulwp->ul_rtqueued) { + ulwp->ul_rtqueued = 0; + qrp->qr_rtcount--; + } + next = ulwp->ul_link; + *ulwpp = next; + ulwp->ul_link = NULL; + if (qrp->qr_tail == ulwp) + qrp->qr_tail = prev; + if (qrp == &ulwp->ul_queue_root) { + /* + * We can't continue to use the unlinked thread's + * queue root for the linkage. + */ + queue_root_t *qr_next = qrp->qr_next; + queue_root_t *qr_prev = qrp->qr_prev; + + if (qrp->qr_tail) { + /* switch to using the last thread's queue root */ + ASSERT(qrp->qr_qlen != 0); + nqrp = &qrp->qr_tail->ul_queue_root; + *nqrp = *qrp; + if (qr_next) + qr_next->qr_prev = nqrp; + if (qr_prev) + qr_prev->qr_next = nqrp; + else + qp->qh_hlist = nqrp; + qp->qh_root = nqrp; + } else { + /* empty queue root; just delete from the hash list */ + ASSERT(qrp->qr_qlen == 0); + if (qr_next) + qr_next->qr_prev = qr_prev; + if (qr_prev) + qr_prev->qr_next = qr_next; + else + qp->qh_hlist = qr_next; + qp->qh_root = NULL; + DECR(qp->qh_hlen); + } + } } ulwp_t * -dequeue(queue_head_t *qp, void *wchan, int *more) +dequeue(queue_head_t *qp, int *more) { ulwp_t **ulwpp; + ulwp_t *ulwp; ulwp_t *prev; - if ((ulwpp = queue_slot(qp, wchan, more, &prev)) == NULL) + if ((ulwpp = queue_slot(qp, &prev, more)) == NULL) return (NULL); - return (queue_unlink(qp, ulwpp, prev)); + ulwp = *ulwpp; + queue_unlink(qp, ulwpp, prev); + ulwp->ul_sleepq = NULL; + ulwp->ul_wchan = NULL; + return (ulwp); } /* * Return a pointer to the highest priority thread sleeping on wchan. */ ulwp_t * -queue_waiter(queue_head_t *qp, void *wchan) +queue_waiter(queue_head_t *qp) { ulwp_t **ulwpp; + ulwp_t *prev; + int more; - if ((ulwpp = queue_slot(qp, wchan, NULL, NULL)) == NULL) + if ((ulwpp = queue_slot(qp, &prev, &more)) == NULL) return (NULL); return (*ulwpp); } -uint8_t -dequeue_self(queue_head_t *qp, void *wchan) +int +dequeue_self(queue_head_t *qp) { ulwp_t *self = curthread; + queue_root_t *qrp; ulwp_t **ulwpp; ulwp_t *ulwp; - ulwp_t *prev = NULL; + ulwp_t *prev; int found = 0; - int more = 0; ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); /* find self on the sleep queue */ - for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; - prev = ulwp, ulwpp = &ulwp->ul_link) { - if (ulwp == self) { - /* dequeue ourself */ - ASSERT(self->ul_wchan == wchan); - (void) queue_unlink(qp, ulwpp, prev); - self->ul_cvmutex = NULL; - self->ul_cv_wake = 0; - found = 1; - break; + if ((qrp = qp->qh_root) != NULL) { + for (prev = NULL, ulwpp = &qrp->qr_head; + (ulwp = *ulwpp) != NULL; + prev = ulwp, ulwpp = &ulwp->ul_link) { + if (ulwp == self) { + queue_unlink(qp, ulwpp, prev); + self->ul_cvmutex = NULL; + self->ul_sleepq = NULL; + self->ul_wchan = NULL; + found = 1; + break; + } } - if (ulwp->ul_wchan == wchan) - more = 1; } if (!found) thr_panic("dequeue_self(): curthread not found on queue"); - if (more) - return (1); - - /* scan the remainder of the queue for another waiter */ - for (ulwp = *ulwpp; ulwp != NULL; ulwp = ulwp->ul_link) { - if (ulwp->ul_wchan == wchan) - return (1); - } - - return (0); + return ((qrp = qp->qh_root) != NULL && qrp->qr_head != NULL); } /* @@ -807,12 +944,11 @@ unsleep_self(void) * If so, just loop around and try again. * dequeue_self() clears self->ul_sleepq. */ - if (qp == self->ul_sleepq) { - (void) dequeue_self(qp, self->ul_wchan); - self->ul_writer = 0; - } + if (qp == self->ul_sleepq) + (void) dequeue_self(qp); queue_unlock(qp); } + self->ul_writer = 0; self->ul_critical--; } @@ -1423,9 +1559,9 @@ static lwpid_t mutex_wakeup(mutex_t *mp) { lwpid_t lwpid = 0; + int more; queue_head_t *qp; ulwp_t *ulwp; - int more; /* * Dequeue a waiter from the sleep queue. Don't touch the mutex @@ -1433,9 +1569,9 @@ mutex_wakeup(mutex_t *mp) * might have been deallocated or reallocated for another purpose. */ qp = queue_lock(mp, MX); - if ((ulwp = dequeue(qp, mp, &more)) != NULL) { + if ((ulwp = dequeue(qp, &more)) != NULL) { lwpid = ulwp->ul_lwpid; - mp->mutex_waiters = (more? 1 : 0); + mp->mutex_waiters = more; } queue_unlock(qp); return (lwpid); @@ -1448,11 +1584,10 @@ static void mutex_wakeup_all(mutex_t *mp) { queue_head_t *qp; + queue_root_t *qrp; int nlwpid = 0; int maxlwps = MAXLWPS; - ulwp_t **ulwpp; ulwp_t *ulwp; - ulwp_t *prev = NULL; lwpid_t buffer[MAXLWPS]; lwpid_t *lwpid = buffer; @@ -1473,17 +1608,17 @@ mutex_wakeup_all(mutex_t *mp) * system call directly since that path acquires no locks. */ qp = queue_lock(mp, MX); - ulwpp = &qp->qh_head; - while ((ulwp = *ulwpp) != NULL) { - if (ulwp->ul_wchan != mp) { - prev = ulwp; - ulwpp = &ulwp->ul_link; - } else { - if (nlwpid == maxlwps) - lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); - (void) queue_unlink(qp, ulwpp, prev); - lwpid[nlwpid++] = ulwp->ul_lwpid; - } + for (;;) { + if ((qrp = qp->qh_root) == NULL || + (ulwp = qrp->qr_head) == NULL) + break; + ASSERT(ulwp->ul_wchan == mp); + queue_unlink(qp, &qrp->qr_head, NULL); + ulwp->ul_sleepq = NULL; + ulwp->ul_wchan = NULL; + if (nlwpid == maxlwps) + lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); + lwpid[nlwpid++] = ulwp->ul_lwpid; } if (nlwpid == 0) { @@ -1555,17 +1690,6 @@ mutex_unlock_process(mutex_t *mp, int release_all) } } -/* - * Return the real priority of a thread. - */ -int -real_priority(ulwp_t *ulwp) -{ - if (ulwp->ul_epri == 0) - return (ulwp->ul_mappedpri? ulwp->ul_mappedpri : ulwp->ul_pri); - return (ulwp->ul_emappedpri? ulwp->ul_emappedpri : ulwp->ul_epri); -} - void stall(void) { @@ -1608,12 +1732,12 @@ mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, * The waiter bit can be set/cleared only while holding the queue lock. */ qp = queue_lock(mp, MX); - enqueue(qp, self, mp, MX); + enqueue(qp, self, 0); mp->mutex_waiters = 1; for (;;) { if (set_lock_byte(&mp->mutex_lockw) == 0) { mp->mutex_owner = (uintptr_t)self; - mp->mutex_waiters = dequeue_self(qp, mp); + mp->mutex_waiters = dequeue_self(qp); break; } set_parking_flag(self, 1); @@ -1635,7 +1759,7 @@ mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, qp = queue_lock(mp, MX); if (self->ul_sleepq == NULL) { if (error) { - mp->mutex_waiters = queue_waiter(qp, mp)? 1 : 0; + mp->mutex_waiters = queue_waiter(qp)? 1 : 0; if (error != EINTR) break; error = 0; @@ -1644,7 +1768,7 @@ mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, mp->mutex_owner = (uintptr_t)self; break; } - enqueue(qp, self, mp, MX); + enqueue(qp, self, 0); mp->mutex_waiters = 1; } ASSERT(self->ul_sleepq == qp && @@ -1652,7 +1776,7 @@ mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, self->ul_wchan == mp); if (error) { if (error != EINTR) { - mp->mutex_waiters = dequeue_self(qp, mp); + mp->mutex_waiters = dequeue_self(qp); break; } error = 0; @@ -1812,7 +1936,7 @@ unregister_locks(void) /* * Returns with mutex_owner set correctly. */ -static int +int mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) { ulwp_t *self = curthread; @@ -1820,9 +1944,11 @@ mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) int mtype = mp->mutex_type; tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); int error = 0; + int noceil = try & MUTEX_NOCEIL; uint8_t ceil; int myprio; + try &= ~MUTEX_NOCEIL; ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); if (!self->ul_schedctl_called) @@ -1838,10 +1964,14 @@ mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) tsp == NULL && mutex_is_held(mp)) lock_error(mp, "mutex_lock", NULL, NULL); - if (mtype & LOCK_PRIO_PROTECT) { + if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) { + update_sched(self); + if (self->ul_cid != self->ul_rtclassid) { + DTRACE_PROBE2(plockstat, mutex__error, mp, EPERM); + return (EPERM); + } ceil = mp->mutex_ceiling; - ASSERT(_validate_rt_prio(SCHED_FIFO, ceil) == 0); - myprio = real_priority(self); + myprio = self->ul_epri? self->ul_epri : self->ul_pri; if (myprio > ceil) { DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL); return (EINVAL); @@ -1871,10 +2001,12 @@ mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) */ switch (error) { case 0: + self->ul_pilocks++; mp->mutex_lockw = LOCKSET; break; case EOWNERDEAD: case ELOCKUNMAPPED: + self->ul_pilocks++; mp->mutex_lockw = LOCKSET; /* FALLTHROUGH */ case ENOTRECOVERABLE: @@ -1906,7 +2038,7 @@ mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) record_begin_hold(msp); break; default: - if (mtype & LOCK_PRIO_PROTECT) { + if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) { (void) _ceil_mylist_del(mp); if (myprio < ceil) _ceil_prio_waive(); @@ -1967,9 +2099,8 @@ static int mutex_lock_impl(mutex_t *mp, timespec_t *tsp) { ulwp_t *self = curthread; - uberdata_t *udp = self->ul_uberdata; + int mtype = mp->mutex_type; uberflags_t *gflags; - int mtype; /* * Optimize the case of USYNC_THREAD, including @@ -1978,8 +2109,8 @@ mutex_lock_impl(mutex_t *mp, timespec_t *tsp) * and the process has only a single thread. * (Most likely a traditional single-threaded application.) */ - if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | - udp->uberflags.uf_all) == 0) { + if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | + self->ul_uberdata->uberflags.uf_all) == 0) { /* * Only one thread exists so we don't need an atomic operation. */ @@ -2099,10 +2230,11 @@ __mutex_trylock(mutex_t *mp) { ulwp_t *self = curthread; uberdata_t *udp = self->ul_uberdata; + int mtype = mp->mutex_type; uberflags_t *gflags; - int mtype; ASSERT(!curthread->ul_critical || curthread->ul_bindflags); + /* * Optimize the case of USYNC_THREAD, including * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, @@ -2110,7 +2242,7 @@ __mutex_trylock(mutex_t *mp) * and the process has only a single thread. * (Most likely a traditional single-threaded application.) */ - if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | + if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | udp->uberflags.uf_all) == 0) { /* * Only one thread exists so we don't need an atomic operation. @@ -2194,6 +2326,7 @@ mutex_unlock_internal(mutex_t *mp, int retain_robust_flags) /* mp->mutex_ownerpid is cleared by ___lwp_mutex_unlock() */ DTRACE_PROBE2(plockstat, mutex__release, mp, 0); mp->mutex_lockw = LOCKCLEAR; + self->ul_pilocks--; error = ___lwp_mutex_unlock(mp); preempt(self); } else if (mtype & USYNC_PROCESS) { @@ -2223,10 +2356,9 @@ int __mutex_unlock(mutex_t *mp) { ulwp_t *self = curthread; - uberdata_t *udp = self->ul_uberdata; + int mtype = mp->mutex_type; uberflags_t *gflags; lwpid_t lwpid; - int mtype; short el; /* @@ -2236,8 +2368,8 @@ __mutex_unlock(mutex_t *mp) * and the process has only a single thread. * (Most likely a traditional single-threaded application.) */ - if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | - udp->uberflags.uf_all) == 0) { + if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | + self->ul_uberdata->uberflags.uf_all) == 0) { if (mtype) { /* * At this point we know that one or both of the @@ -2872,6 +3004,7 @@ cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) lwpid_t lwpid; int signalled; int error; + int cv_wake; int release_all; /* @@ -2882,10 +3015,10 @@ cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) */ self->ul_sp = stkptr(); qp = queue_lock(cvp, CV); - enqueue(qp, self, cvp, CV); + enqueue(qp, self, 0); cvp->cond_waiters_user = 1; self->ul_cvmutex = mp; - self->ul_cv_wake = (tsp != NULL); + self->ul_cv_wake = cv_wake = (tsp != NULL); self->ul_signalled = 0; if (mp->mutex_flag & LOCK_OWNERDEAD) { mp->mutex_flag &= ~LOCK_OWNERDEAD; @@ -2924,7 +3057,8 @@ cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) * or we may just have gotten a spurious wakeup. */ qp = queue_lock(cvp, CV); - mqp = queue_lock(mp, MX); + if (!cv_wake) + mqp = queue_lock(mp, MX); if (self->ul_sleepq == NULL) break; /* @@ -2933,15 +3067,15 @@ cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) * were interrupted or we timed out (EINTR or ETIME). * Else this is a spurious wakeup; continue the loop. */ - if (self->ul_sleepq == mqp) { /* mutex queue */ + if (!cv_wake && self->ul_sleepq == mqp) { /* mutex queue */ if (error) { - mp->mutex_waiters = dequeue_self(mqp, mp); + mp->mutex_waiters = dequeue_self(mqp); break; } tsp = NULL; /* no more timeout */ } else if (self->ul_sleepq == qp) { /* condvar queue */ if (error) { - cvp->cond_waiters_user = dequeue_self(qp, cvp); + cvp->cond_waiters_user = dequeue_self(qp); break; } /* @@ -2951,18 +3085,21 @@ cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) } else { thr_panic("cond_sleep_queue(): thread not on queue"); } - queue_unlock(mqp); + if (!cv_wake) + queue_unlock(mqp); } self->ul_sp = 0; - ASSERT(self->ul_cvmutex == NULL && self->ul_cv_wake == 0); + self->ul_cv_wake = 0; + ASSERT(self->ul_cvmutex == NULL); ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && self->ul_wchan == NULL); signalled = self->ul_signalled; self->ul_signalled = 0; queue_unlock(qp); - queue_unlock(mqp); + if (!cv_wake) + queue_unlock(mqp); /* * If we were concurrently cond_signal()d and any of: @@ -3034,8 +3171,10 @@ cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) self->ul_wchan = cvp; mp->mutex_owner = 0; /* mp->mutex_ownerpid is cleared by ___lwp_cond_wait() */ - if (mtype & LOCK_PRIO_INHERIT) + if (mtype & LOCK_PRIO_INHERIT) { mp->mutex_lockw = LOCKCLEAR; + self->ul_pilocks--; + } /* * ___lwp_cond_wait() returns immediately with EINTR if * set_parking_flag(self,0) is called on this lwp before it @@ -3356,15 +3495,14 @@ cond_signal_internal(cond_t *cvp) uberdata_t *udp = self->ul_uberdata; tdb_cond_stats_t *csp = COND_STATS(cvp, udp); int error = 0; + int more; + lwpid_t lwpid; queue_head_t *qp; mutex_t *mp; queue_head_t *mqp; ulwp_t **ulwpp; ulwp_t *ulwp; - ulwp_t *prev = NULL; - ulwp_t *next; - ulwp_t **suspp = NULL; - ulwp_t *susprev; + ulwp_t *prev; if (csp) tdb_incr(csp->cond_signal); @@ -3383,43 +3521,13 @@ cond_signal_internal(cond_t *cvp) * is set, just dequeue and unpark him. */ qp = queue_lock(cvp, CV); - for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; - prev = ulwp, ulwpp = &ulwp->ul_link) { - if (ulwp->ul_wchan == cvp) { - if (!ulwp->ul_stop) - break; - /* - * Try not to dequeue a suspended thread. - * This mimics the old libthread's behavior. - */ - if (suspp == NULL) { - suspp = ulwpp; - susprev = prev; - } - } - } - if (ulwp == NULL && suspp != NULL) { - ulwp = *(ulwpp = suspp); - prev = susprev; - suspp = NULL; - } - if (ulwp == NULL) { /* no one on the sleep queue */ - cvp->cond_waiters_user = 0; + ulwpp = queue_slot(qp, &prev, &more); + cvp->cond_waiters_user = more; + if (ulwpp == NULL) { /* no one on the sleep queue */ queue_unlock(qp); return (error); } - /* - * Scan the remainder of the CV queue for another waiter. - */ - if (suspp != NULL) { - next = *suspp; - } else { - for (next = ulwp->ul_link; next != NULL; next = next->ul_link) - if (next->ul_wchan == cvp) - break; - } - if (next == NULL) - cvp->cond_waiters_user = 0; + ulwp = *ulwpp; /* * Inform the thread that he was the recipient of a cond_signal(). @@ -3434,29 +3542,25 @@ cond_signal_internal(cond_t *cvp) * while we move him to the mutex queue so that he can * deal properly with spurious wakeups. */ - *ulwpp = ulwp->ul_link; - ulwp->ul_link = NULL; - if (qp->qh_tail == ulwp) - qp->qh_tail = prev; - qp->qh_qlen--; + queue_unlink(qp, ulwpp, prev); mp = ulwp->ul_cvmutex; /* the mutex he will acquire */ ulwp->ul_cvmutex = NULL; ASSERT(mp != NULL); if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { - lwpid_t lwpid = ulwp->ul_lwpid; - + /* just wake him up */ + lwpid = ulwp->ul_lwpid; no_preempt(self); ulwp->ul_sleepq = NULL; ulwp->ul_wchan = NULL; - ulwp->ul_cv_wake = 0; queue_unlock(qp); (void) __lwp_unpark(lwpid); preempt(self); } else { + /* move him to the mutex queue */ mqp = queue_lock(mp, MX); - enqueue(mqp, ulwp, mp, MX); + enqueue(mqp, ulwp, 0); mp->mutex_waiters = 1; queue_unlock(mqp); queue_unlock(qp); @@ -3525,12 +3629,11 @@ cond_broadcast_internal(cond_t *cvp) tdb_cond_stats_t *csp = COND_STATS(cvp, udp); int error = 0; queue_head_t *qp; + queue_root_t *qrp; mutex_t *mp; mutex_t *mp_cache = NULL; queue_head_t *mqp = NULL; - ulwp_t **ulwpp; ulwp_t *ulwp; - ulwp_t *prev = NULL; int nlwpid = 0; int maxlwps = MAXLWPS; lwpid_t buffer[MAXLWPS]; @@ -3566,36 +3669,31 @@ cond_broadcast_internal(cond_t *cvp) */ qp = queue_lock(cvp, CV); cvp->cond_waiters_user = 0; - ulwpp = &qp->qh_head; - while ((ulwp = *ulwpp) != NULL) { - if (ulwp->ul_wchan != cvp) { - prev = ulwp; - ulwpp = &ulwp->ul_link; - continue; - } - *ulwpp = ulwp->ul_link; - ulwp->ul_link = NULL; - if (qp->qh_tail == ulwp) - qp->qh_tail = prev; - qp->qh_qlen--; + for (;;) { + if ((qrp = qp->qh_root) == NULL || + (ulwp = qrp->qr_head) == NULL) + break; + ASSERT(ulwp->ul_wchan == cvp); + queue_unlink(qp, &qrp->qr_head, NULL); mp = ulwp->ul_cvmutex; /* his mutex */ ulwp->ul_cvmutex = NULL; ASSERT(mp != NULL); if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { + /* just wake him up */ ulwp->ul_sleepq = NULL; ulwp->ul_wchan = NULL; - ulwp->ul_cv_wake = 0; if (nlwpid == maxlwps) lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); lwpid[nlwpid++] = ulwp->ul_lwpid; } else { + /* move him to the mutex queue */ if (mp != mp_cache) { mp_cache = mp; if (mqp != NULL) queue_unlock(mqp); mqp = queue_lock(mp, MX); } - enqueue(mqp, ulwp, mp, MX); + enqueue(mqp, ulwp, 0); mp->mutex_waiters = 1; } } @@ -3634,7 +3732,6 @@ assert_no_libc_locks_held(void) { ASSERT(!curthread->ul_critical || curthread->ul_bindflags); } -#endif /* protected by link_lock */ uint64_t spin_lock_spin; @@ -3680,26 +3777,28 @@ dump_queue_statistics(void) return; if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 || - fprintf(stderr, "queue# lockcount max qlen\n") < 0) + fprintf(stderr, "queue# lockcount max qlen max hlen\n") < 0) return; for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) { if (qp->qh_lockcount == 0) continue; spin_lock_total += qp->qh_lockcount; - if (fprintf(stderr, "%5d %12llu%12u\n", qn, - (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0) + if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn, + (u_longlong_t)qp->qh_lockcount, + qp->qh_qmax, qp->qh_hmax) < 0) return; } if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 || - fprintf(stderr, "queue# lockcount max qlen\n") < 0) + fprintf(stderr, "queue# lockcount max qlen max hlen\n") < 0) return; for (qn = 0; qn < QHASHSIZE; qn++, qp++) { if (qp->qh_lockcount == 0) continue; spin_lock_total += qp->qh_lockcount; - if (fprintf(stderr, "%5d %12llu%12u\n", qn, - (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0) + if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn, + (u_longlong_t)qp->qh_lockcount, + qp->qh_qmax, qp->qh_hmax) < 0) return; } @@ -3714,3 +3813,4 @@ dump_queue_statistics(void) (void) fprintf(stderr, " spin_lock_wakeup = %10llu\n", (u_longlong_t)spin_lock_wakeup); } +#endif diff --git a/usr/src/lib/libc/port/threads/thr.c b/usr/src/lib/libc/port/threads/thr.c index 3f11ad1f2f..c9fda4bfb4 100644 --- a/usr/src/lib/libc/port/threads/thr.c +++ b/usr/src/lib/libc/port/threads/thr.c @@ -544,8 +544,7 @@ find_lwp(thread_t tid) int _thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg, - long flags, thread_t *new_thread, pri_t priority, int policy, - size_t guardsize) + long flags, thread_t *new_thread, size_t guardsize) { ulwp_t *self = curthread; uberdata_t *udp = self->ul_uberdata; @@ -566,8 +565,7 @@ _thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg, if (udp->hash_size == 1) finish_init(); - if (((stk || stksize) && stksize < MINSTACK) || - priority < THREAD_MIN_PRIORITY || priority > THREAD_MAX_PRIORITY) + if ((stk || stksize) && stksize < MINSTACK) return (EINVAL); if (stk == NULL) { @@ -606,6 +604,12 @@ _thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg, ulwp->ul_queue_spin = self->ul_queue_spin; ulwp->ul_door_noreserve = self->ul_door_noreserve; + /* new thread inherits creating thread's scheduling parameters */ + ulwp->ul_policy = self->ul_policy; + ulwp->ul_pri = (self->ul_epri? self->ul_epri : self->ul_pri); + ulwp->ul_cid = self->ul_cid; + ulwp->ul_rtclassid = self->ul_rtclassid; + ulwp->ul_primarymap = self->ul_primarymap; ulwp->ul_self = ulwp; ulwp->ul_uberdata = udp; @@ -669,8 +673,6 @@ _thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg, ulwp->ul_stop = TSTP_REGULAR; if (flags & THR_SUSPENDED) ulwp->ul_created = 1; - ulwp->ul_policy = policy; - ulwp->ul_pri = priority; lmutex_lock(&udp->link_lock); ulwp->ul_forw = udp->all_lwps; @@ -705,8 +707,7 @@ int _thr_create(void *stk, size_t stksize, void *(*func)(void *), void *arg, long flags, thread_t *new_thread) { - return (_thrp_create(stk, stksize, func, arg, flags, new_thread, - curthread->ul_pri, curthread->ul_policy, 0)); + return (_thrp_create(stk, stksize, func, arg, flags, new_thread, 0)); } /* @@ -793,8 +794,10 @@ _thrp_exit() self->ul_back->ul_forw = self->ul_forw; } self->ul_forw = self->ul_back = NULL; +#if defined(THREAD_DEBUG) /* collect queue lock statistics before marking ourself dead */ record_spin_locks(self); +#endif self->ul_dead = 1; self->ul_pleasestop = 0; if (replace != NULL) { @@ -865,6 +868,7 @@ _thrp_exit() thr_panic("_thrp_exit(): _lwp_terminate() returned"); } +#if defined(THREAD_DEBUG) void collect_queue_statistics() { @@ -881,6 +885,7 @@ collect_queue_statistics() lmutex_unlock(&udp->link_lock); } } +#endif void _thr_exit_common(void *status, int unwind) @@ -1156,9 +1161,9 @@ etest(const char *ev) #if defined(THREAD_DEBUG) if ((value = envvar(ev, "QUEUE_VERIFY", 1)) >= 0) thread_queue_verify = value; -#endif if ((value = envvar(ev, "QUEUE_DUMP", 1)) >= 0) thread_queue_dump = value; +#endif if ((value = envvar(ev, "STACK_CACHE", 10000)) >= 0) thread_stack_cache = value; if ((value = envvar(ev, "COND_WAIT_DEFER", 1)) >= 0) @@ -1312,6 +1317,9 @@ libc_init(void) self->ul_lwpid = 1; /* __lwp_self() */ self->ul_main = 1; self->ul_self = self; + self->ul_policy = -1; /* initialize only when needed */ + self->ul_pri = 0; + self->ul_cid = 0; self->ul_uberdata = udp; if (oldself != NULL) { int i; @@ -1389,6 +1397,7 @@ libc_init(void) /* tls_size was zero when oldself was allocated */ lfree(oldself, sizeof (ulwp_t)); } + self->ul_rtclassid = get_info_by_policy(SCHED_FIFO)->pcc_info.pc_cid; mutex_setup(); atfork_init(); signal_init(); @@ -1510,7 +1519,12 @@ finish_init() ASSERT(udp->hash_size == 1); /* - * First allocate the queue_head array if not already allocated. + * Initialize self->ul_policy, self->ul_cid, and self->ul_pri. + */ + update_sched(self); + + /* + * Allocate the queue_head array if not already allocated. */ if (udp->queue_head == NULL) queue_alloc(); @@ -1542,14 +1556,16 @@ finish_init() /* * Arrange to do special things on exit -- * - collect queue statistics from all remaining active threads. + * - dump queue statistics to stderr if _THREAD_QUEUE_DUMP is set. * - grab assert_lock to ensure that assertion failures * and a core dump take precedence over _exit(). - * - dump queue statistics to stderr if _THREAD_QUEUE_DUMP is set. * (Functions are called in the reverse order of their registration.) */ - (void) _atexit(dump_queue_statistics); (void) _atexit(grab_assert_lock); +#if defined(THREAD_DEBUG) + (void) _atexit(dump_queue_statistics); (void) _atexit(collect_queue_statistics); +#endif } /* @@ -1575,7 +1591,7 @@ postfork1_child() { ulwp_t *self = curthread; uberdata_t *udp = self->ul_uberdata; - mutex_t *mp; + queue_head_t *qp; ulwp_t *next; ulwp_t *ulwp; int i; @@ -1599,13 +1615,18 @@ postfork1_child() USYNC_THREAD | LOCK_RECURSIVE, NULL); /* no one in the child is on a sleep queue; reinitialize */ - if (udp->queue_head) { - (void) _private_memset(udp->queue_head, 0, + if ((qp = udp->queue_head) != NULL) { + (void) _private_memset(qp, 0, 2 * QHASHSIZE * sizeof (queue_head_t)); - for (i = 0; i < 2 * QHASHSIZE; i++) { - mp = &udp->queue_head[i].qh_lock; - mp->mutex_flag = LOCK_INITED; - mp->mutex_magic = MUTEX_MAGIC; + for (i = 0; i < 2 * QHASHSIZE; qp++, i++) { + qp->qh_type = (i < QHASHSIZE)? MX : CV; + qp->qh_lock.mutex_flag = LOCK_INITED; + qp->qh_lock.mutex_magic = MUTEX_MAGIC; + qp->qh_hlist = &qp->qh_def_root; +#if defined(THREAD_DEBUG) + qp->qh_hlen = 1; + qp->qh_hmax = 1; +#endif } } @@ -1666,36 +1687,6 @@ postfork1_child() postfork1_child_aio(); } -#pragma weak thr_setprio = _thr_setprio -#pragma weak pthread_setschedprio = _thr_setprio -#pragma weak _pthread_setschedprio = _thr_setprio -int -_thr_setprio(thread_t tid, int priority) -{ - struct sched_param param; - - (void) _memset(¶m, 0, sizeof (param)); - param.sched_priority = priority; - return (_thread_setschedparam_main(tid, 0, ¶m, PRIO_SET_PRIO)); -} - -#pragma weak thr_getprio = _thr_getprio -int -_thr_getprio(thread_t tid, int *priority) -{ - uberdata_t *udp = curthread->ul_uberdata; - ulwp_t *ulwp; - int error = 0; - - if ((ulwp = find_lwp(tid)) == NULL) - error = ESRCH; - else { - *priority = ulwp->ul_pri; - ulwp_unlock(ulwp, udp); - } - return (error); -} - lwpid_t lwp_self(void) { diff --git a/usr/src/lib/libc/sparc/Makefile b/usr/src/lib/libc/sparc/Makefile index a9f1b9642b..5080553482 100644 --- a/usr/src/lib/libc/sparc/Makefile +++ b/usr/src/lib/libc/sparc/Makefile @@ -813,7 +813,6 @@ THREADSOBJS= \ pthr_mutex.o \ pthr_rwlock.o \ pthread.o \ - rtsched.o \ rwlock.o \ scalls.o \ sema.o \ @@ -984,8 +983,6 @@ CFLAGS += -xinline= THREAD_DEBUG = $(NOT_RELEASE_BUILD)THREAD_DEBUG = -DTHREAD_DEBUG -CFLAGS += $(THREAD_DEBUG) - ALTPICS= $(TRACEOBJS:%=pics/%) $(DYNLIB) := PICS += $(ROOTFS_LIBDIR)/libc_i18n.a @@ -994,7 +991,7 @@ $(DYNLIB) := BUILD.SO = $(LD) -o $@ -G $(DYNFLAGS) $(PICS) $(ALTPICS) MAPFILES = ../port/mapfile-vers mapfile-vers CFLAGS += $(EXTN_CFLAGS) -CPPFLAGS= -D_REENTRANT -Dsparc $(EXTN_CPPFLAGS) \ +CPPFLAGS= -D_REENTRANT -Dsparc $(EXTN_CPPFLAGS) $(THREAD_DEBUG) \ -I$(LIBCBASE)/inc -I../inc $(CPPFLAGS.master) ASFLAGS= -K pic -P -D__STDC__ -D_ASM $(CPPFLAGS) $(sparc_AS_XARCH) @@ -1105,9 +1102,9 @@ TIL= \ pthr_rwlock.o \ pthread.o \ rand.o \ - rtsched.o \ rwlock.o \ scalls.o \ + sched.o \ sema.o \ sigaction.o \ sigev_thread.o \ diff --git a/usr/src/lib/libc/sparc/threads/machdep.c b/usr/src/lib/libc/sparc/threads/machdep.c index 5ebb6b324c..0fc62303f8 100644 --- a/usr/src/lib/libc/sparc/threads/machdep.c +++ b/usr/src/lib/libc/sparc/threads/machdep.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -78,11 +78,15 @@ _thr_setup(ulwp_t *self) self->ul_ustack.ss_size = self->ul_stksiz; self->ul_ustack.ss_flags = 0; (void) _private_setustack(&self->ul_ustack); + + update_sched(self); tls_setup(); /* signals have been deferred until now */ sigon(self); + if (self->ul_cancel_pending == 2 && !self->ul_cancel_disabled) + return (NULL); /* cancelled by pthread_create() */ return (self->ul_startpc(self->ul_startarg)); } @@ -164,7 +168,7 @@ __csigsetjmp(sigjmp_buf env, int savemask) bp->sjs_stack = self->ul_ustack; else { bp->sjs_stack.ss_sp = - (void *)(self->ul_stktop - self->ul_stksiz); + (void *)(self->ul_stktop - self->ul_stksiz); bp->sjs_stack.ss_size = self->ul_stksiz; bp->sjs_stack.ss_flags = 0; } diff --git a/usr/src/lib/libc/sparcv9/Makefile b/usr/src/lib/libc/sparcv9/Makefile index 9a201f7b60..9155de7910 100644 --- a/usr/src/lib/libc/sparcv9/Makefile +++ b/usr/src/lib/libc/sparcv9/Makefile @@ -759,7 +759,6 @@ THREADSOBJS= \ pthr_mutex.o \ pthr_rwlock.o \ pthread.o \ - rtsched.o \ rwlock.o \ scalls.o \ sema.o \ @@ -927,8 +926,6 @@ CFLAGS64 += -xinline= THREAD_DEBUG = $(NOT_RELEASE_BUILD)THREAD_DEBUG = -DTHREAD_DEBUG -CFLAGS64 += $(THREAD_DEBUG) - ALTPICS= $(TRACEOBJS:%=pics/%) $(DYNLIB) := PICS += $(ROOTFS_LIBDIR64)/libc_i18n.a @@ -938,7 +935,7 @@ MAPFILES = ../port/mapfile-vers mapfile-vers sparcv9_C_PICFLAGS= -K PIC CFLAGS64 += $(EXTN_CFLAGS) -CPPFLAGS= -D_REENTRANT -Dsparc $(EXTN_CPPFLAGS) \ +CPPFLAGS= -D_REENTRANT -Dsparc $(EXTN_CPPFLAGS) $(THREAD_DEBUG) \ -I$(LIBCBASE)/inc -I../inc $(CPPFLAGS.master) ASFLAGS= -K PIC -P -D__STDC__ -D_ASM -D__sparcv9 $(CPPFLAGS) \ $(sparcv9_AS_XARCH) @@ -1036,9 +1033,9 @@ TIL= \ pthr_rwlock.o \ pthread.o \ rand.o \ - rtsched.o \ rwlock.o \ scalls.o \ + sched.o \ sema.o \ sigaction.o \ sigev_thread.o \ diff --git a/usr/src/lib/libc_db/common/thread_db.c b/usr/src/lib/libc_db/common/thread_db.c index 28b0ff3bf9..b99f0f3047 100644 --- a/usr/src/lib/libc_db/common/thread_db.c +++ b/usr/src/lib/libc_db/common/thread_db.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -157,9 +157,9 @@ td_read_uberdata(td_thragent_t *ta_p) return (TD_DBERR); ta_p->primary_map = uberdata.primary_map; ta_p->tdb_eventmask_addr = ta_p->uberdata_addr + - offsetof(uberdata_t, tdb.tdb_ev_global_mask); + offsetof(uberdata_t, tdb.tdb_ev_global_mask); ta_p->tdb_register_sync_addr = ta_p->uberdata_addr + - offsetof(uberdata_t, uberflags.uf_tdb_register_sync); + offsetof(uberdata_t, uberflags.uf_tdb_register_sync); ta_p->hash_table_addr = (psaddr_t)uberdata.thr_hash_table; ta_p->hash_size = uberdata.hash_size; if (ps_pdread(ph_p, (psaddr_t)uberdata.tdb.tdb_events, @@ -177,9 +177,9 @@ td_read_uberdata(td_thragent_t *ta_p) return (TD_DBERR); ta_p->primary_map = uberdata.primary_map; ta_p->tdb_eventmask_addr = ta_p->uberdata_addr + - offsetof(uberdata32_t, tdb.tdb_ev_global_mask); + offsetof(uberdata32_t, tdb.tdb_ev_global_mask); ta_p->tdb_register_sync_addr = ta_p->uberdata_addr + - offsetof(uberdata32_t, uberflags.uf_tdb_register_sync); + offsetof(uberdata32_t, uberflags.uf_tdb_register_sync); ta_p->hash_table_addr = (psaddr_t)uberdata.thr_hash_table; ta_p->hash_size = uberdata.hash_size; if (ps_pdread(ph_p, (psaddr_t)uberdata.tdb.tdb_events, @@ -580,15 +580,15 @@ __td_ta_get_nthreads(td_thragent_t *ta_p, int *nthread_p) if (ta_p->model == PR_MODEL_NATIVE) { nthreads_addr = ta_p->uberdata_addr + - offsetof(uberdata_t, nthreads); + offsetof(uberdata_t, nthreads); nzombies_addr = ta_p->uberdata_addr + - offsetof(uberdata_t, nzombies); + offsetof(uberdata_t, nzombies); } else { #if defined(_LP64) && defined(_SYSCALL32) nthreads_addr = ta_p->uberdata_addr + - offsetof(uberdata32_t, nthreads); + offsetof(uberdata32_t, nthreads); nzombies_addr = ta_p->uberdata_addr + - offsetof(uberdata32_t, nzombies); + offsetof(uberdata32_t, nzombies); #else nthreads_addr = 0; nzombies_addr = 0; @@ -673,9 +673,9 @@ __td_ta_map_id2thr(td_thragent_t *ta_p, thread_t tid, data.tid = tid; data.found = 0; return_val = __td_ta_thr_iter(ta_p, - (td_thr_iter_f *)td_mapper_id2thr, (void *)&data, - TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY, - TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS); + (td_thr_iter_f *)td_mapper_id2thr, (void *)&data, + TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY, + TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS); if (return_val == TD_OK) { if (data.found == 0) return_val = TD_NOTHR; @@ -835,10 +835,11 @@ __td_ta_tsd_iter(td_thragent_t *ta_p, td_key_iter_f *cb, void *cbdata_p) int sigequalset(const sigset_t *s1, const sigset_t *s2) { - return (s1->__sigbits[0] == s2->__sigbits[0] && - s1->__sigbits[1] == s2->__sigbits[1] && - s1->__sigbits[2] == s2->__sigbits[2] && - s1->__sigbits[3] == s2->__sigbits[3]); + return ( + s1->__sigbits[0] == s2->__sigbits[0] && + s1->__sigbits[1] == s2->__sigbits[1] && + s1->__sigbits[2] == s2->__sigbits[2] && + s1->__sigbits[3] == s2->__sigbits[3]); } /* @@ -986,9 +987,9 @@ __td_ta_thr_iter(td_thragent_t *ta_p, td_thr_iter_f *cb, next_lwp_addr = (psaddr_t)ulwp.ul_forw; ts_state = ulwp.ul_dead? TD_THR_ZOMBIE : - ulwp.ul_stop? TD_THR_STOPPED : - ulwp.ul_wchan? TD_THR_SLEEP : - TD_THR_ACTIVE; + ulwp.ul_stop? TD_THR_STOPPED : + ulwp.ul_wchan? TD_THR_SLEEP : + TD_THR_ACTIVE; userpri = ulwp.ul_pri; userflags = ulwp.ul_usropts; if (ulwp.ul_dead) @@ -1010,9 +1011,9 @@ __td_ta_thr_iter(td_thragent_t *ta_p, td_thr_iter_f *cb, next_lwp_addr = (psaddr_t)ulwp.ul_forw; ts_state = ulwp.ul_dead? TD_THR_ZOMBIE : - ulwp.ul_stop? TD_THR_STOPPED : - ulwp.ul_wchan? TD_THR_SLEEP : - TD_THR_ACTIVE; + ulwp.ul_stop? TD_THR_STOPPED : + ulwp.ul_wchan? TD_THR_SLEEP : + TD_THR_ACTIVE; userpri = ulwp.ul_pri; userflags = ulwp.ul_usropts; if (ulwp.ul_dead) @@ -1284,11 +1285,11 @@ td_thr2to(td_thragent_t *ta_p, psaddr_t ts_addr, } ti_p->ti_ro_area = ts_addr; ti_p->ti_ro_size = ulwp->ul_replace? - REPLACEMENT_SIZE : sizeof (ulwp_t); + REPLACEMENT_SIZE : sizeof (ulwp_t); ti_p->ti_state = ulwp->ul_dead? TD_THR_ZOMBIE : - ulwp->ul_stop? TD_THR_STOPPED : - ulwp->ul_wchan? TD_THR_SLEEP : - TD_THR_ACTIVE; + ulwp->ul_stop? TD_THR_STOPPED : + ulwp->ul_wchan? TD_THR_SLEEP : + TD_THR_ACTIVE; ti_p->ti_db_suspended = 0; ti_p->ti_type = TD_THR_USER; ti_p->ti_sp = ulwp->ul_sp; @@ -1328,11 +1329,11 @@ td_thr2to32(td_thragent_t *ta_p, psaddr_t ts_addr, } ti_p->ti_ro_area = ts_addr; ti_p->ti_ro_size = ulwp->ul_replace? - REPLACEMENT_SIZE32 : sizeof (ulwp32_t); + REPLACEMENT_SIZE32 : sizeof (ulwp32_t); ti_p->ti_state = ulwp->ul_dead? TD_THR_ZOMBIE : - ulwp->ul_stop? TD_THR_STOPPED : - ulwp->ul_wchan? TD_THR_SLEEP : - TD_THR_ACTIVE; + ulwp->ul_stop? TD_THR_STOPPED : + ulwp->ul_wchan? TD_THR_SLEEP : + TD_THR_ACTIVE; ti_p->ti_db_suspended = 0; ti_p->ti_type = TD_THR_USER; ti_p->ti_sp = (uint32_t)ulwp->ul_sp; @@ -1394,7 +1395,7 @@ __td_thr_get_info(td_thrhandle_t *th_p, td_thrinfo_t *ti_p) if (ps_pdread(ph_p, psaddr, &ulwp, sizeof (ulwp)) != PS_OK && ((void) memset(&ulwp, 0, sizeof (ulwp)), ps_pdread(ph_p, psaddr, &ulwp, REPLACEMENT_SIZE32)) != - PS_OK) + PS_OK) return_val = TD_DBERR; else td_thr2to32(ta_p, psaddr, &ulwp, ti_p); @@ -2086,9 +2087,9 @@ __td_thr_validate(const td_thrhandle_t *th_p) searcher_data.addr = th_p->th_unique; return_val = __td_ta_thr_iter(th_p->th_ta_p, - td_searcher, &searcher_data, - TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY, - TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS); + td_searcher, &searcher_data, + TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY, + TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS); if (return_val == TD_OK && searcher_data.status == 0) return_val = TD_NOTHR; @@ -2321,38 +2322,11 @@ __td_thr_tlsbase(td_thrhandle_t *th_p, ulong_t moduleid, psaddr_t *base) * Currently unused by dbx. */ #pragma weak td_thr_setprio = __td_thr_setprio +/* ARGSUSED */ td_err_e __td_thr_setprio(td_thrhandle_t *th_p, int ti_pri) { - struct ps_prochandle *ph_p; - pri_t priority = ti_pri; - td_err_e return_val = TD_OK; - - if (ti_pri < THREAD_MIN_PRIORITY || ti_pri > THREAD_MAX_PRIORITY) - return (TD_ERR); - if ((ph_p = ph_lock_th(th_p, &return_val)) == NULL) - return (return_val); - - if (th_p->th_ta_p->model == PR_MODEL_NATIVE) { - ulwp_t *ulwp = (ulwp_t *)th_p->th_unique; - - if (ps_pdwrite(ph_p, (psaddr_t)&ulwp->ul_pri, - &priority, sizeof (priority)) != PS_OK) - return_val = TD_DBERR; - } else { -#if defined(_LP64) && defined(_SYSCALL32) - ulwp32_t *ulwp = (ulwp32_t *)th_p->th_unique; - - if (ps_pdwrite(ph_p, (psaddr_t)&ulwp->ul_pri, - &priority, sizeof (priority)) != PS_OK) - return_val = TD_DBERR; -#else - return_val = TD_ERR; -#endif /* _SYSCALL32 */ - } - - ph_unlock(th_p->th_ta_p); - return (return_val); + return (TD_NOCAPAB); } /* @@ -2526,7 +2500,7 @@ sync_get_info_common(const td_synchandle_t *sh_p, struct ps_prochandle *ph_p, if (si_p->si_state.mutex_locked) { if (si_p->si_shared_type & USYNC_PROCESS) si_p->si_ownerpid = - generic_so.lock.mutex_ownerpid; + generic_so.lock.mutex_ownerpid; si_p->si_owner.th_ta_p = sh_p->sh_ta_p; si_p->si_owner.th_unique = generic_so.lock.mutex_owner; } @@ -2539,8 +2513,8 @@ sync_get_info_common(const td_synchandle_t *sh_p, struct ps_prochandle *ph_p, sizeof (generic_so.condition.flags.flag)); si_p->si_size = sizeof (generic_so.condition); si_p->si_has_waiters = - (generic_so.condition.cond_waiters_user | - generic_so.condition.cond_waiters_kernel)? 1 : 0; + (generic_so.condition.cond_waiters_user | + generic_so.condition.cond_waiters_kernel)? 1 : 0; break; case SEMA_MAGIC: if (trunc && ps_pdread(ph_p, sh_p->sh_unique, @@ -2558,7 +2532,7 @@ sync_get_info_common(const td_synchandle_t *sh_p, struct ps_prochandle *ph_p, si_p->si_data = (psaddr_t)generic_so.semaphore.count; break; case RWL_MAGIC: - { + { uint32_t rwstate; if (trunc && ps_pdread(ph_p, sh_p->sh_unique, @@ -2575,10 +2549,10 @@ sync_get_info_common(const td_synchandle_t *sh_p, struct ps_prochandle *ph_p, si_p->si_is_wlock = 1; si_p->si_owner.th_ta_p = sh_p->sh_ta_p; si_p->si_owner.th_unique = - generic_so.rwlock.rwlock_owner; + generic_so.rwlock.rwlock_owner; if (si_p->si_shared_type & USYNC_PROCESS) si_p->si_ownerpid = - generic_so.rwlock.rwlock_ownerpid; + generic_so.rwlock.rwlock_ownerpid; } else { si_p->si_state.nreaders = (rwstate & URW_READERS_MASK); } @@ -2587,7 +2561,7 @@ sync_get_info_common(const td_synchandle_t *sh_p, struct ps_prochandle *ph_p, /* this is useless but the old interface provided it */ si_p->si_data = (psaddr_t)generic_so.rwlock.readers; break; - } + } default: return (TD_BADSH); } @@ -2756,7 +2730,7 @@ __td_sync_get_stats(const td_synchandle_t *sh_p, td_syncstats_t *ss_p) return_val = TD_BADSH; else return_val = read_sync_stats(ta_p, hashaddr, - sh_p->sh_unique, &sync_stats); + sh_p->sh_unique, &sync_stats); if (return_val != TD_OK) goto out; @@ -2766,103 +2740,103 @@ __td_sync_get_stats(const td_synchandle_t *sh_p, td_syncstats_t *ss_p) */ switch (sync_stats.un.type) { case TDB_MUTEX: - { + { td_mutex_stats_t *msp = &ss_p->ss_un.mutex; ss_p->ss_info.si_type = TD_SYNC_MUTEX; ss_p->ss_info.si_size = sizeof (mutex_t); msp->mutex_lock = - sync_stats.un.mutex.mutex_lock; + sync_stats.un.mutex.mutex_lock; msp->mutex_sleep = - sync_stats.un.mutex.mutex_sleep; + sync_stats.un.mutex.mutex_sleep; msp->mutex_sleep_time = - sync_stats.un.mutex.mutex_sleep_time; + sync_stats.un.mutex.mutex_sleep_time; msp->mutex_hold_time = - sync_stats.un.mutex.mutex_hold_time; + sync_stats.un.mutex.mutex_hold_time; msp->mutex_try = - sync_stats.un.mutex.mutex_try; + sync_stats.un.mutex.mutex_try; msp->mutex_try_fail = - sync_stats.un.mutex.mutex_try_fail; + sync_stats.un.mutex.mutex_try_fail; if (sync_stats.sync_addr >= ta_p->hash_table_addr && (ix = sync_stats.sync_addr - ta_p->hash_table_addr) < ta_p->hash_size * sizeof (thr_hash_table_t)) msp->mutex_internal = - ix / sizeof (thr_hash_table_t) + 1; + ix / sizeof (thr_hash_table_t) + 1; break; - } + } case TDB_COND: - { + { td_cond_stats_t *csp = &ss_p->ss_un.cond; ss_p->ss_info.si_type = TD_SYNC_COND; ss_p->ss_info.si_size = sizeof (cond_t); csp->cond_wait = - sync_stats.un.cond.cond_wait; + sync_stats.un.cond.cond_wait; csp->cond_timedwait = - sync_stats.un.cond.cond_timedwait; + sync_stats.un.cond.cond_timedwait; csp->cond_wait_sleep_time = - sync_stats.un.cond.cond_wait_sleep_time; + sync_stats.un.cond.cond_wait_sleep_time; csp->cond_timedwait_sleep_time = - sync_stats.un.cond.cond_timedwait_sleep_time; + sync_stats.un.cond.cond_timedwait_sleep_time; csp->cond_timedwait_timeout = - sync_stats.un.cond.cond_timedwait_timeout; + sync_stats.un.cond.cond_timedwait_timeout; csp->cond_signal = - sync_stats.un.cond.cond_signal; + sync_stats.un.cond.cond_signal; csp->cond_broadcast = - sync_stats.un.cond.cond_broadcast; + sync_stats.un.cond.cond_broadcast; if (sync_stats.sync_addr >= ta_p->hash_table_addr && (ix = sync_stats.sync_addr - ta_p->hash_table_addr) < ta_p->hash_size * sizeof (thr_hash_table_t)) csp->cond_internal = - ix / sizeof (thr_hash_table_t) + 1; + ix / sizeof (thr_hash_table_t) + 1; break; - } + } case TDB_RWLOCK: - { + { td_rwlock_stats_t *rwsp = &ss_p->ss_un.rwlock; ss_p->ss_info.si_type = TD_SYNC_RWLOCK; ss_p->ss_info.si_size = sizeof (rwlock_t); rwsp->rw_rdlock = - sync_stats.un.rwlock.rw_rdlock; + sync_stats.un.rwlock.rw_rdlock; rwsp->rw_rdlock_try = - sync_stats.un.rwlock.rw_rdlock_try; + sync_stats.un.rwlock.rw_rdlock_try; rwsp->rw_rdlock_try_fail = - sync_stats.un.rwlock.rw_rdlock_try_fail; + sync_stats.un.rwlock.rw_rdlock_try_fail; rwsp->rw_wrlock = - sync_stats.un.rwlock.rw_wrlock; + sync_stats.un.rwlock.rw_wrlock; rwsp->rw_wrlock_hold_time = - sync_stats.un.rwlock.rw_wrlock_hold_time; + sync_stats.un.rwlock.rw_wrlock_hold_time; rwsp->rw_wrlock_try = - sync_stats.un.rwlock.rw_wrlock_try; + sync_stats.un.rwlock.rw_wrlock_try; rwsp->rw_wrlock_try_fail = - sync_stats.un.rwlock.rw_wrlock_try_fail; + sync_stats.un.rwlock.rw_wrlock_try_fail; break; - } + } case TDB_SEMA: - { + { td_sema_stats_t *ssp = &ss_p->ss_un.sema; ss_p->ss_info.si_type = TD_SYNC_SEMA; ss_p->ss_info.si_size = sizeof (sema_t); ssp->sema_wait = - sync_stats.un.sema.sema_wait; + sync_stats.un.sema.sema_wait; ssp->sema_wait_sleep = - sync_stats.un.sema.sema_wait_sleep; + sync_stats.un.sema.sema_wait_sleep; ssp->sema_wait_sleep_time = - sync_stats.un.sema.sema_wait_sleep_time; + sync_stats.un.sema.sema_wait_sleep_time; ssp->sema_trywait = - sync_stats.un.sema.sema_trywait; + sync_stats.un.sema.sema_trywait; ssp->sema_trywait_fail = - sync_stats.un.sema.sema_trywait_fail; + sync_stats.un.sema.sema_trywait_fail; ssp->sema_post = - sync_stats.un.sema.sema_post; + sync_stats.un.sema.sema_post; ssp->sema_max_count = - sync_stats.un.sema.sema_max_count; + sync_stats.un.sema.sema_max_count; ssp->sema_min_count = - sync_stats.un.sema.sema_min_count; + sync_stats.un.sema.sema_min_count; break; - } + } default: return_val = TD_BADSH; break; @@ -3064,8 +3038,8 @@ __td_sync_waiters(const td_synchandle_t *sh_p, td_thr_iter_f *cb, void *cb_data) wcb.waiter_cb_arg = cb_data; wcb.errcode = TD_OK; return_val = __td_ta_thr_iter(sh_p->sh_ta_p, waiters_cb, &wcb, - TD_THR_SLEEP, TD_THR_LOWEST_PRIORITY, - TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS); + TD_THR_SLEEP, TD_THR_LOWEST_PRIORITY, + TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS); if (return_val != TD_OK) return (return_val); diff --git a/usr/src/lib/libldap5/sources/ldap/prldap/ldappr-threads.c b/usr/src/lib/libldap5/sources/ldap/prldap/ldappr-threads.c index b6b8a0e27a..0f362759c7 100644 --- a/usr/src/lib/libldap5/sources/ldap/prldap/ldappr-threads.c +++ b/usr/src/lib/libldap5/sources/ldap/prldap/ldappr-threads.c @@ -1,5 +1,5 @@ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -658,8 +658,16 @@ prldap_nspr_idle_primordial_thread(void *arg) { * Make sure PR_Init finishes before any other thread can continue */ (void) mutex_lock(&nspr_idle_lock); - if (PR_Initialized() == PR_FALSE) + if (PR_Initialized() == PR_FALSE) { + /* + * PR_Init() changes the current thread's + * priority. Save and restore the priority. + */ + int priority; + (void) thr_getprio(thr_self(), &priority); PR_Init(PR_USER_THREAD, PR_PRIORITY_NORMAL, 0); + (void) thr_setprio(thr_self(), priority); + } nspr_pr_init_is_done = 1; (void) cond_signal(&nspr_idle_cond); (void) mutex_unlock(&nspr_idle_lock); @@ -714,8 +722,16 @@ prldap_nspr_init(void) { if (thr_self() == 1) { /* main thread */ - if (PR_Initialized() == PR_FALSE) - PR_Init(PR_USER_THREAD, PR_PRIORITY_NORMAL, 0); + if (PR_Initialized() == PR_FALSE) { + /* + * PR_Init() changes the current thread's + * priority. Save and restore the priority. + */ + int priority; + (void) thr_getprio(thr_self(), &priority); + PR_Init(PR_USER_THREAD, PR_PRIORITY_NORMAL, 0); + (void) thr_setprio(thr_self(), priority); + } nspr_initialized = 1; } else { if (thr_create(NULL, NULL, diff --git a/usr/src/uts/common/disp/class.c b/usr/src/uts/common/disp/class.c index 8e83a839ee..c6cecdb012 100644 --- a/usr/src/uts/common/disp/class.c +++ b/usr/src/uts/common/disp/class.c @@ -18,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -36,6 +37,7 @@ #include <sys/modctl.h> #include <sys/disp.h> #include <sys/sysmacros.h> +#include <sys/schedctl.h> static int getcidbyname_locked(char *, id_t *); @@ -178,7 +180,7 @@ getcidbyname(char *clname, id_t *cidp) * tp into the buffer pointed to by parmsp. */ void -parmsget(kthread_id_t tp, pcparms_t *parmsp) +parmsget(kthread_t *tp, pcparms_t *parmsp) { parmsp->pc_cid = tp->t_cid; CL_PARMSGET(tp, parmsp->pc_clparms); @@ -225,7 +227,7 @@ int parmsout(pcparms_t *parmsp, pc_vaparms_t *vaparmsp) { return (CL_PARMSOUT(&sclass[parmsp->pc_cid], parmsp->pc_clparms, - vaparmsp)); + vaparmsp)); } @@ -238,7 +240,7 @@ parmsout(pcparms_t *parmsp, pc_vaparms_t *vaparmsp) * has the appropriate permissions. */ int -parmsset(pcparms_t *parmsp, kthread_id_t targtp) +parmsset(pcparms_t *parmsp, kthread_t *targtp) { caddr_t clprocp; int error; @@ -310,11 +312,12 @@ parmsset(pcparms_t *parmsp, kthread_id_t targtp) * Not changing class */ error = CL_PARMSSET(targtp, parmsp->pc_clparms, - curthread->t_cid, reqpcredp); + curthread->t_cid, reqpcredp); crfree(reqpcredp); if (error) return (error); } + schedctl_set_cidpri(targtp); return (0); } diff --git a/usr/src/uts/common/disp/fss.c b/usr/src/uts/common/disp/fss.c index e132ff3397..e52a9d89aa 100644 --- a/usr/src/uts/common/disp/fss.c +++ b/usr/src/uts/common/disp/fss.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -224,6 +224,7 @@ static void fss_sleep(kthread_t *); static void fss_tick(kthread_t *); static void fss_wakeup(kthread_t *); static int fss_donice(kthread_t *, cred_t *, int, int *); +static int fss_doprio(kthread_t *, cred_t *, int, int *); static pri_t fss_globpri(kthread_t *); static void fss_yield(kthread_t *); static void fss_nullsys(); @@ -263,7 +264,8 @@ static struct classfuncs fss_classfuncs = { fss_donice, fss_globpri, fss_nullsys, /* set_process_group */ - fss_yield + fss_yield, + fss_doprio, }; int @@ -954,6 +956,7 @@ fss_change_priority(kthread_t *t, fssproc_t *fssproc) new_pri = fssproc->fss_umdpri; ASSERT(new_pri >= 0 && new_pri <= fss_maxglobpri); + t->t_cpri = fssproc->fss_upri; fssproc->fss_flags &= ~FSSRESTORE; if (t == curthread || t->t_state == TS_ONPROC) { /* @@ -1271,11 +1274,14 @@ fss_vaparmsout(void *parmsp, pc_vaparms_t *vaparmsp) return (0); } +/* + * Return the user mode scheduling priority range. + */ static int fss_getclpri(pcpri_t *pcprip) { - pcprip->pc_clpmax = fss_maxumdpri; - pcprip->pc_clpmin = 0; + pcprip->pc_clpmax = fss_maxupri; + pcprip->pc_clpmin = -fss_maxupri; return (0); } @@ -1881,8 +1887,7 @@ fss_swapout(kthread_t *t, int flags) if (INHERITED(t) || (fssproc->fss_flags & FSSKPRI) || (t->t_proc_flag & TP_LWPEXIT) || - (t->t_state & (TS_ZOMB | TS_FREE | TS_STOPPED | - TS_ONPROC | TS_WAIT)) || + (t->t_state & (TS_ZOMB|TS_FREE|TS_STOPPED|TS_ONPROC|TS_WAIT)) || !(t->t_schedflag & TS_LOAD) || !(SWAP_OK(t))) return (-1); @@ -2237,7 +2242,7 @@ fss_tick(kthread_t *t) call_cpu_surrender = B_TRUE; } } else if (t->t_state == TS_ONPROC && - t->t_pri < t->t_disp_queue->disp_maxrunpri) { + t->t_pri < t->t_disp_queue->disp_maxrunpri) { /* * If there is a higher-priority thread which is * waiting for a processor, then thread surrenders @@ -2392,6 +2397,38 @@ fss_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp) } /* + * Increment the priority of the specified thread by incr and + * return the new value in *retvalp. + */ +static int +fss_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp) +{ + int newpri; + fssproc_t *fssproc = FSSPROC(t); + fssparms_t fssparms; + + /* + * If there is no change to priority, just return current setting. + */ + if (incr == 0) { + *retvalp = fssproc->fss_upri; + return (0); + } + + newpri = fssproc->fss_upri + incr; + if (newpri > fss_maxupri || newpri < -fss_maxupri) + return (EINVAL); + + *retvalp = newpri; + fssparms.fss_uprilim = fssparms.fss_upri = newpri; + + /* + * Reset the uprilim and upri values of the thread. + */ + return (fss_parmsset(t, &fssparms, (id_t)0, cr)); +} + +/* * Return the global scheduling priority that would be assigned to a thread * entering the fair-sharing class with the fss_upri. */ @@ -2618,12 +2655,12 @@ fss_changepset(kthread_t *t, void *newcp, fssbuf_t *projbuf, thread_lock(t); if (t->t_state == TS_RUN || t->t_state == TS_ONPROC || t->t_state == TS_WAIT) - fss_inactive(t); + fss_inactive(t); fssproc->fss_proj = fssproj_new; fssproc->fss_fsspri = 0; if (t->t_state == TS_RUN || t->t_state == TS_ONPROC || t->t_state == TS_WAIT) - fss_active(t); + fss_active(t); thread_unlock(t); mutex_exit(&fsspset_new->fssps_lock); diff --git a/usr/src/uts/common/disp/fx.c b/usr/src/uts/common/disp/fx.c index b4899e0edf..08a67f671f 100644 --- a/usr/src/uts/common/disp/fx.c +++ b/usr/src/uts/common/disp/fx.c @@ -18,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -191,6 +192,7 @@ static void fx_sleep(kthread_t *); static void fx_tick(kthread_t *); static void fx_wakeup(kthread_t *); static int fx_donice(kthread_t *, cred_t *, int, int *); +static int fx_doprio(kthread_t *, cred_t *, int, int *); static pri_t fx_globpri(kthread_t *); static void fx_yield(kthread_t *); static void fx_nullsys(); @@ -238,6 +240,7 @@ static struct classfuncs fx_classfuncs = { fx_globpri, fx_nullsys, /* set_process_group */ fx_yield, + fx_doprio, }; @@ -282,7 +285,7 @@ fx_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp) */ for (i = 0; i < FX_CB_LISTS; i++) { fx_cb_plisthead[i].fx_cb_next = fx_cb_plisthead[i].fx_cb_prev = - &fx_cb_plisthead[i]; + &fx_cb_plisthead[i]; } /* @@ -498,8 +501,7 @@ fx_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp, fxpp->fx_uprilim = reqfxuprilim; fxpp->fx_pri = reqfxupri; - fxpp->fx_nice = NZERO - (NZERO * reqfxupri) - / fx_maxupri; + fxpp->fx_nice = NZERO - (NZERO * reqfxupri) / fx_maxupri; if (((fxkparmsp->fx_cflags & FX_DOTQ) == 0) || (fxkparmsp->fx_tqntm == FX_TQDEF)) { @@ -694,14 +696,13 @@ fx_getclinfo(void *infop) /* - * Return the global scheduling priority ranges for the fixed-priority - * class in pcpri_t structure. + * Return the user mode scheduling priority range. */ static int fx_getclpri(pcpri_t *pcprip) { - pcprip->pc_clpmax = fx_dptbl[fx_maxumdpri].fx_globpri; - pcprip->pc_clpmin = fx_dptbl[0].fx_globpri; + pcprip->pc_clpmax = fx_maxupri; + pcprip->pc_clpmin = 0; return (0); } @@ -753,7 +754,7 @@ fx_parmsin(void *parmsp) return (EINVAL); if ((fxparmsp->fx_tqsecs == 0 && fxparmsp->fx_tqnsecs == 0) || - fxparmsp->fx_tqnsecs >= NANOSEC) + fxparmsp->fx_tqnsecs >= NANOSEC) return (EINVAL); cflags = (fxparmsp->fx_upri != FX_NOCHANGE ? FX_DOUPRI : 0); @@ -1158,7 +1159,7 @@ fx_preempt(kthread_t *t) clock_t new_quantum = (clock_t)fxpp->fx_pquantum; pri_t newpri = fxpp->fx_pri; FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie, - &new_quantum, &newpri); + &new_quantum, &newpri); FX_ADJUST_QUANTUM(new_quantum); if ((int)new_quantum != fxpp->fx_pquantum) { fxpp->fx_pquantum = (int)new_quantum; @@ -1299,7 +1300,7 @@ fx_tick(kthread_t *t) clock_t new_quantum = (clock_t)fxpp->fx_pquantum; pri_t newpri = fxpp->fx_pri; FX_CB_TICK(FX_CALLB(fxpp), fxpp->fx_cookie, - &new_quantum, &newpri); + &new_quantum, &newpri); FX_ADJUST_QUANTUM(new_quantum); if ((int)new_quantum != fxpp->fx_pquantum) { fxpp->fx_pquantum = (int)new_quantum; @@ -1359,7 +1360,7 @@ fx_tick(kthread_t *t) call_cpu_surrender = B_TRUE; } } else if (t->t_state == TS_ONPROC && - t->t_pri < t->t_disp_queue->disp_maxrunpri) { + t->t_pri < t->t_disp_queue->disp_maxrunpri) { call_cpu_surrender = B_TRUE; } @@ -1398,7 +1399,7 @@ fx_wakeup(kthread_t *t) clock_t new_quantum = (clock_t)fxpp->fx_pquantum; pri_t newpri = fxpp->fx_pri; FX_CB_WAKEUP(FX_CALLB(fxpp), fxpp->fx_cookie, - &new_quantum, &newpri); + &new_quantum, &newpri); FX_ADJUST_QUANTUM(new_quantum); if ((int)new_quantum != fxpp->fx_pquantum) { fxpp->fx_pquantum = (int)new_quantum; @@ -1441,7 +1442,7 @@ fx_yield(kthread_t *t) clock_t new_quantum = (clock_t)fxpp->fx_pquantum; pri_t newpri = fxpp->fx_pri; FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie, - &new_quantum, &newpri); + &new_quantum, &newpri); FX_ADJUST_QUANTUM(new_quantum); if ((int)new_quantum != fxpp->fx_pquantum) { fxpp->fx_pquantum = (int)new_quantum; @@ -1474,7 +1475,6 @@ fx_yield(kthread_t *t) setbackdq(t); } - /* * Increment the nice value of the specified thread by incr and * return the new value in *retvalp. @@ -1517,7 +1517,7 @@ fx_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp) newnice = 0; fxkparms.fx_uprilim = fxkparms.fx_upri = - -((newnice - NZERO) * fx_maxupri) / NZERO; + -((newnice - NZERO) * fx_maxupri) / NZERO; fxkparms.fx_cflags = FX_DOUPRILIM | FX_DOUPRI; @@ -1546,6 +1546,40 @@ fx_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp) return (0); } +/* + * Increment the priority of the specified thread by incr and + * return the new value in *retvalp. + */ +static int +fx_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp) +{ + int newpri; + fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); + fxkparms_t fxkparms; + + ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); + + /* If there's no change to priority, just return current setting */ + if (incr == 0) { + *retvalp = fxpp->fx_pri; + return (0); + } + + newpri = fxpp->fx_pri + incr; + if (newpri > fx_maxupri || newpri < 0) + return (EINVAL); + + *retvalp = newpri; + fxkparms.fx_uprilim = fxkparms.fx_upri = newpri; + fxkparms.fx_tqntm = FX_NOCHANGE; + fxkparms.fx_cflags = FX_DOUPRILIM | FX_DOUPRI; + + /* + * Reset the uprilim and upri values of the thread. + */ + return (fx_parmsset(t, (void *)&fxkparms, (id_t)0, cr)); +} + static void fx_change_priority(kthread_t *t, fxproc_t *fxpp) { @@ -1554,6 +1588,7 @@ fx_change_priority(kthread_t *t, fxproc_t *fxpp) ASSERT(THREAD_LOCK_HELD(t)); new_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; ASSERT(new_pri >= 0 && new_pri <= fx_maxglobpri); + t->t_cpri = fxpp->fx_pri; if (t == curthread || t->t_state == TS_ONPROC) { /* curthread is always onproc */ cpu_t *cp = t->t_disp_queue->disp_cpu; diff --git a/usr/src/uts/common/disp/priocntl.c b/usr/src/uts/common/disp/priocntl.c index 9197dc815b..ae863472b0 100644 --- a/usr/src/uts/common/disp/priocntl.c +++ b/usr/src/uts/common/disp/priocntl.c @@ -18,15 +18,15 @@ * * CDDL HEADER END */ + /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ - #pragma ident "%Z%%M% %I% %E% SMI" #include <sys/types.h> @@ -52,6 +52,7 @@ #include <sys/uadmin.h> #include <sys/cmn_err.h> #include <sys/policy.h> +#include <sys/schedctl.h> /* * Structure used to pass arguments to the proccmp() function. @@ -62,7 +63,7 @@ struct pcmpargs { id_t *pcmp_cidp; int *pcmp_cntp; - kthread_id_t *pcmp_retthreadp; + kthread_t **pcmp_retthreadp; }; /* @@ -115,9 +116,10 @@ copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap, uio_seg_t seg) #endif static int donice(procset_t *, pcnice_t *); +static int doprio(procset_t *, pcprio_t *); static int proccmp(proc_t *, struct pcmpargs *); static int setparms(proc_t *, struct stprmargs *); -extern int threadcmp(struct pcmpargs *, kthread_id_t); +extern int threadcmp(struct pcmpargs *, kthread_t *); /* * The priocntl system call. @@ -129,6 +131,7 @@ priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg, pcinfo_t pcinfo; pcparms_t pcparms; pcnice_t pcnice; + pcprio_t pcprio; pcadmin_t pcadmin; pcpri_t pcpri; procset_t procset; @@ -138,7 +141,7 @@ priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg, char clname[PC_CLNMSZ]; char *outstr; int count; - kthread_id_t retthreadp; + kthread_t *retthreadp; proc_t *initpp; int clnullflag; int error = 0; @@ -340,7 +343,7 @@ priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg, * call parmsset() (which does the real work). */ if ((procset.p_lidtype != P_LWPID) || - (procset.p_ridtype != P_LWPID)) { + (procset.p_ridtype != P_LWPID)) { error1 = dotoprocs(&procset, setparms, (char *)&stprmargs); } @@ -524,6 +527,7 @@ priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg, error = error1; if (error) { if (retthreadp != NULL) + /* CSTYLED */ mutex_exit(&(ttoproc(retthreadp)->p_lock)); ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock)); return (set_errno(error)); @@ -644,7 +648,7 @@ priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg, */ mutex_enter(&ualock); error = CL_ADMIN(&sclass[pcadmin.pc_cid], pcadmin.pc_cladmin, - CRED()); + CRED()); mutex_exit(&ualock); break; @@ -678,6 +682,22 @@ priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg, } break; + case PC_DOPRIO: + /* + * Get pcprio and procset structures from the user. + */ + if ((*copyinfn)(arg, &pcprio, sizeof (pcprio)) || + (*copyinfn)(psp, &procset, sizeof (procset))) + return (set_errno(EFAULT)); + + error = doprio(&procset, &pcprio); + + if (!error && (pcprio.pc_op == PC_GETPRIO)) { + if ((*copyoutfn)(&pcprio, arg, sizeof (pcprio))) + return (set_errno(EFAULT)); + } + break; + case PC_SETDFLCL: if (secpolicy_dispadm(CRED()) != 0) return (set_errno(EPERM)); @@ -738,7 +758,8 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) static int proccmp(proc_t *pp, struct pcmpargs *argp) { - kthread_id_t tx, ty; + kthread_t *tx; + kthread_t *ty; int last_pri = -1; int tx_pri; int found = 0; @@ -800,9 +821,9 @@ proccmp(proc_t *pp, struct pcmpargs *argp) int -threadcmp(struct pcmpargs *argp, kthread_id_t tp) +threadcmp(struct pcmpargs *argp, kthread_t *tp) { - kthread_id_t tx; + kthread_t *tx; proc_t *pp; ASSERT(MUTEX_HELD(&(ttoproc(tp))->p_lock)); @@ -857,7 +878,7 @@ static int setparms(proc_t *targpp, struct stprmargs *stprmp) { int error = 0; - kthread_id_t t; + kthread_t *t; int err; mutex_enter(&targpp->p_lock); @@ -885,7 +906,7 @@ setparms(proc_t *targpp, struct stprmargs *stprmp) int setthreadnice(pcnice_t *pcnice, kthread_t *tp) { - int error = 0; + int error; int nice; int inc; id_t rtcid; @@ -898,9 +919,9 @@ setthreadnice(pcnice_t *pcnice, kthread_t *tp) * must be unaffected by a call to setpriority(). */ error = getcidbyname("RT", &rtcid); - if ((error == 0) && (tp->t_cid == rtcid)) { + if (error == 0 && tp->t_cid == rtcid) { if (pcnice->pc_op == PC_SETNICE) - return (error); + return (0); } if ((error = CL_DONICE(tp, CRED(), 0, &nice)) != 0) @@ -922,6 +943,7 @@ setthreadnice(pcnice_t *pcnice, kthread_t *tp) inc = pcnice->pc_val - nice; error = CL_DONICE(tp, CRED(), inc, &inc); + schedctl_set_cidpri(tp); } return (error); @@ -932,7 +954,7 @@ setprocnice(proc_t *pp, pcnice_t *pcnice) { kthread_t *tp; int retval = 0; - int error = 0; + int error; ASSERT(MUTEX_HELD(&pidlock)); mutex_enter(&pp->p_lock); @@ -1033,3 +1055,170 @@ donice(procset_t *procset, pcnice_t *pcnice) return (err); } + +int +setthreadprio(pcprio_t *pcprio, kthread_t *tp) +{ + int prio = 0; + int incr; + int error; + + ASSERT(MUTEX_HELD(&pidlock)); + ASSERT(MUTEX_HELD(&(ttoproc(tp)->p_lock))); + + if (pcprio->pc_op == PC_SETPRIO && pcprio->pc_cid != tp->t_cid) { + /* + * Target thread must change to new class. + * See comments in parmsset(), from where this code was copied. + */ + void *bufp = NULL; + caddr_t clprocp = (caddr_t)tp->t_cldata; + id_t oldcid = tp->t_cid; + + error = CL_CANEXIT(tp, NULL); + if (error) + return (error); + if (CL_ALLOC(&bufp, pcprio->pc_cid, KM_NOSLEEP) != 0) + return (ENOMEM); + error = CL_ENTERCLASS(tp, pcprio->pc_cid, NULL, CRED(), bufp); + if (error) { + CL_FREE(pcprio->pc_cid, bufp); + return (error); + } + CL_EXITCLASS(oldcid, clprocp); + schedctl_set_cidpri(tp); + } + + if ((error = CL_DOPRIO(tp, CRED(), 0, &prio)) != 0) + return (error); + + if (pcprio->pc_op == PC_GETPRIO) { + /* + * If we are not setting the priority, we should return the + * highest priority pertaining to any of the specified threads. + */ + if (prio > pcprio->pc_val) { + pcprio->pc_cid = tp->t_cid; + pcprio->pc_val = prio; + } + } else if (prio != pcprio->pc_val) { + /* + * Try to change the priority of the thread. + */ + incr = pcprio->pc_val - prio; + error = CL_DOPRIO(tp, CRED(), incr, &prio); + schedctl_set_cidpri(tp); + } + + return (error); +} + +int +setprocprio(proc_t *pp, pcprio_t *pcprio) +{ + kthread_t *tp; + int retval = 0; + int error; + + ASSERT(MUTEX_HELD(&pidlock)); + mutex_enter(&pp->p_lock); + + if ((tp = pp->p_tlist) == NULL) { + mutex_exit(&pp->p_lock); + return (ESRCH); + } + + /* + * Check permissions before changing the prio value. + */ + if (pcprio->pc_op == PC_SETPRIO) { + if (!prochasprocperm(pp, curproc, CRED())) { + mutex_exit(&pp->p_lock); + return (EPERM); + } + } + + do { + error = setthreadprio(pcprio, tp); + if (error) + retval = error; + } while ((tp = tp->t_forw) != pp->p_tlist); + + mutex_exit(&pp->p_lock); + return (retval); +} + +/* + * Set the class and priority of the specified LWP or set of processes. + */ +static int +doprio(procset_t *procset, pcprio_t *pcprio) +{ + int err_proc = 0; + int err_thread = 0; + int err = 0; + + /* + * Sanity check. + */ + if (pcprio->pc_op != PC_GETPRIO && pcprio->pc_op != PC_SETPRIO) + return (EINVAL); + if (pcprio->pc_op == PC_SETPRIO && + (pcprio->pc_cid >= loaded_classes || pcprio->pc_cid < 1)) + return (EINVAL); + + /* + * If it is a PC_GETPRIO operation then set pc_val to the smallest + * possible prio value to help us find the highest priority + * pertaining to any of the specified processes. + */ + if (pcprio->pc_op == PC_GETPRIO) + pcprio->pc_val = SHRT_MIN; + + if (procset->p_lidtype != P_LWPID || + procset->p_ridtype != P_LWPID) + err_proc = dotoprocs(procset, setprocprio, (char *)pcprio); + + if (procset->p_lidtype == P_LWPID || procset->p_ridtype == P_LWPID) { + err_thread = dotolwp(procset, setthreadprio, (char *)pcprio); + /* + * dotolwp() can return with p_lock held. This is required + * for the priocntl GETPARMS case. So, here we just release + * the p_lock. + */ + if (MUTEX_HELD(&curproc->p_lock)) + mutex_exit(&curproc->p_lock); + + /* + * If we were called for a single LWP, then ignore ESRCH + * returned by the previous dotoprocs() call. + */ + if (err_proc == ESRCH) + err_proc = 0; + } + + /* + * dotoprocs() ignores the init process if it is in the set, unless + * it was the only process found. We want to make sure init is not + * excluded if we're going PC_GETPRIO operation. + */ + if (pcprio->pc_op == PC_GETPRIO) { + proc_t *initpp; + + mutex_enter(&pidlock); + initpp = prfind(P_INITPID); + if (initpp != NULL && procinset(initpp, procset)) + err = setprocprio(initpp, pcprio); + mutex_exit(&pidlock); + } + + /* + * We're returning the latest error here that we've got back from + * the setthreadprio() or setprocprio(). That is, err_thread and/or + * err_proc can be replaced by err. + */ + if (!err) + err = err_thread ? err_thread : err_proc; + + return (err); +} diff --git a/usr/src/uts/common/disp/rt.c b/usr/src/uts/common/disp/rt.c index 2b60fbe24e..43b42d5298 100644 --- a/usr/src/uts/common/disp/rt.c +++ b/usr/src/uts/common/disp/rt.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,15 +18,15 @@ * * CDDL HEADER END */ + /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ - #pragma ident "%Z%%M% %I% %E% SMI" #include <sys/types.h> @@ -48,6 +47,7 @@ #include <sys/rtpriocntl.h> #include <sys/kmem.h> #include <sys/systm.h> +#include <sys/schedctl.h> #include <sys/errno.h> #include <sys/cpuvar.h> #include <sys/vmsystm.h> @@ -122,6 +122,7 @@ static int rt_vaparmsin(void *, pc_vaparms_t *); static int rt_vaparmsout(void *, pc_vaparms_t *); static int rt_parmsset(kthread_t *, void *, id_t, cred_t *); static int rt_donice(kthread_t *, cred_t *, int, int *); +static int rt_doprio(kthread_t *, cred_t *, int, int *); static void rt_exitclass(void *); static int rt_canexit(kthread_t *, cred_t *); static void rt_forkret(kthread_t *, kthread_t *); @@ -182,6 +183,7 @@ static struct classfuncs rt_classfuncs = { rt_globpri, rt_nullsys, /* set_process_group */ rt_yield, + rt_doprio, }; /* @@ -534,16 +536,16 @@ rt_getclinfo(void *infop) } /* - * Return the global scheduling priority ranges of the realtime - * class in pcpri_t structure. + * Return the user mode scheduling priority range. */ static int rt_getclpri(pcpri_t *pcprip) { - pcprip->pc_clpmax = rt_dptbl[rt_maxpri].rt_globpri; - pcprip->pc_clpmin = rt_dptbl[0].rt_globpri; + pcprip->pc_clpmax = rt_maxpri; + pcprip->pc_clpmin = 0; return (0); } + static void rt_nullsys() { @@ -1041,6 +1043,35 @@ rt_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp) return (EINVAL); } +/* + * Increment the priority of the specified thread by incr and + * return the new value in *retvalp. + */ +static int +rt_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp) +{ + int newpri; + rtproc_t *rtpp = (rtproc_t *)(t->t_cldata); + rtkparms_t rtkparms; + + /* If there's no change to the priority, just return current setting */ + if (incr == 0) { + *retvalp = rtpp->rt_pri; + return (0); + } + + newpri = rtpp->rt_pri + incr; + if (newpri > rt_maxpri || newpri < 0) + return (EINVAL); + + *retvalp = newpri; + rtkparms.rt_pri = newpri; + rtkparms.rt_tqntm = RT_NOCHANGE; + rtkparms.rt_tqsig = 0; + rtkparms.rt_cflags = RT_DOPRI; + return (rt_parmsset(t, &rtkparms, rt_cid, cr)); +} + static int rt_alloc(void **p, int flag) { @@ -1070,6 +1101,7 @@ rt_change_priority(kthread_t *t, rtproc_t *rtpp) new_pri = rt_dptbl[rtpp->rt_pri].rt_globpri; + t->t_cpri = rtpp->rt_pri; if (t == curthread || t->t_state == TS_ONPROC) { cpu_t *cp = t->t_disp_queue->disp_cpu; THREAD_CHANGE_PRI(t, new_pri); diff --git a/usr/src/uts/common/disp/sysclass.c b/usr/src/uts/common/disp/sysclass.c index d48cc3145e..7323a0fc17 100644 --- a/usr/src/uts/common/disp/sysclass.c +++ b/usr/src/uts/common/disp/sysclass.c @@ -20,14 +20,13 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ - #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.12 */ #include <sys/types.h> @@ -59,14 +58,15 @@ pri_t sys_init(id_t, int, classfuncs_t **); static int sys_getclpri(pcpri_t *); -static int sys_fork(kthread_id_t, kthread_id_t, void *); -static int sys_enterclass(kthread_id_t, id_t, void *, cred_t *, void *); -static int sys_canexit(kthread_id_t, cred_t *); +static int sys_fork(kthread_t *, kthread_t *, void *); +static int sys_enterclass(kthread_t *, id_t, void *, cred_t *, void *); +static int sys_canexit(kthread_t *, cred_t *); static int sys_nosys(); -static int sys_donice(kthread_id_t, cred_t *, int, int *); -static void sys_forkret(kthread_id_t, kthread_id_t); +static int sys_donice(kthread_t *, cred_t *, int, int *); +static int sys_doprio(kthread_t *, cred_t *, int, int *); +static void sys_forkret(kthread_t *, kthread_t *); static void sys_nullsys(); -static pri_t sys_swappri(kthread_id_t, int); +static pri_t sys_swappri(kthread_t *, int); static int sys_alloc(void **, int); struct classfuncs sys_classfuncs = { @@ -107,6 +107,7 @@ struct classfuncs sys_classfuncs = { (pri_t (*)())sys_nosys, /* globpri */ sys_nullsys, /* set_process_group */ sys_nullsys, /* yield */ + sys_doprio, } }; @@ -130,14 +131,14 @@ static int sys_getclpri(pcpri_t *pcprip) { pcprip->pc_clpmax = maxclsyspri; - pcprip->pc_clpmin = 0; + pcprip->pc_clpmin = minclsyspri; return (0); } /* ARGSUSED */ static int sys_enterclass(t, cid, parmsp, reqpcredp, bufp) - kthread_id_t t; + kthread_t *t; id_t cid; void *parmsp; cred_t *reqpcredp; @@ -148,7 +149,7 @@ sys_enterclass(t, cid, parmsp, reqpcredp, bufp) /* ARGSUSED */ static int -sys_canexit(kthread_id_t t, cred_t *reqpcredp) +sys_canexit(kthread_t *t, cred_t *reqpcredp) { return (0); } @@ -156,8 +157,8 @@ sys_canexit(kthread_id_t t, cred_t *reqpcredp) /* ARGSUSED */ static int sys_fork(t, ct, bufp) - kthread_id_t t; - kthread_id_t ct; + kthread_t *t; + kthread_t *ct; void *bufp; { /* @@ -170,8 +171,8 @@ sys_fork(t, ct, bufp) /* ARGSUSED */ static void sys_forkret(t, ct) - kthread_id_t t; - kthread_id_t ct; + kthread_t *t; + kthread_t *ct; { register proc_t *pp = ttoproc(t); register proc_t *cp = ttoproc(ct); @@ -196,7 +197,7 @@ sys_forkret(t, ct) /* ARGSUSED */ static pri_t sys_swappri(t, flags) - kthread_id_t t; + kthread_t *t; int flags; { return (-1); @@ -216,11 +217,14 @@ sys_nullsys() /* ARGSUSED */ static int -sys_donice(t, cr, incr, retvalp) - kthread_id_t t; - cred_t *cr; - int incr; - int *retvalp; +sys_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp) +{ + return (EINVAL); +} + +/* ARGSUSED */ +static int +sys_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp) { return (EINVAL); } diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c index 928b594602..27e6034f05 100644 --- a/usr/src/uts/common/disp/thread.c +++ b/usr/src/uts/common/disp/thread.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -71,6 +72,7 @@ #include <sys/sdt.h> #include <sys/reboot.h> #include <sys/kdi.h> +#include <sys/schedctl.h> #include <sys/waitq.h> #include <sys/cpucaps.h> #include <sys/kiconv.h> @@ -1760,25 +1762,17 @@ thread_change_epri(kthread_t *t, pri_t disp_pri) state = t->t_state; /* - * If it's not on a queue, change the priority with - * impunity. + * If it's not on a queue, change the priority with impunity. */ if ((state & (TS_SLEEP | TS_RUN | TS_WAIT)) == 0) { t->t_epri = disp_pri; - if (state == TS_ONPROC) { cpu_t *cp = t->t_disp_queue->disp_cpu; if (t == cp->cpu_dispthread) cp->cpu_dispatch_pri = DISP_PRIO(t); } - return; - } - - /* - * It's either on a sleep queue or a run queue. - */ - if (state == TS_SLEEP) { + } else if (state == TS_SLEEP) { /* * Take the thread out of its sleep queue. * Change the inherited priority. @@ -1805,7 +1799,8 @@ thread_change_epri(kthread_t *t, pri_t disp_pri) t->t_epri = disp_pri; setbackdq(t); } -} /* end of thread_change_epri */ + schedctl_set_cidpri(t); +} /* * Function: Change the t_pri field of a thread. @@ -1825,8 +1820,7 @@ thread_change_pri(kthread_t *t, pri_t disp_pri, int front) THREAD_WILLCHANGE_PRI(t, disp_pri); /* - * If it's not on a queue, change the priority with - * impunity. + * If it's not on a queue, change the priority with impunity. */ if ((state & (TS_SLEEP | TS_RUN | TS_WAIT)) == 0) { t->t_pri = disp_pri; @@ -1837,13 +1831,7 @@ thread_change_pri(kthread_t *t, pri_t disp_pri, int front) if (t == cp->cpu_dispthread) cp->cpu_dispatch_pri = DISP_PRIO(t); } - return (0); - } - - /* - * It's either on a sleep queue or a run queue. - */ - if (state == TS_SLEEP) { + } else if (state == TS_SLEEP) { /* * If the priority has changed, take the thread out of * its sleep queue and change the priority. @@ -1880,5 +1868,6 @@ thread_change_pri(kthread_t *t, pri_t disp_pri, int front) setbackdq(t); } } + schedctl_set_cidpri(t); return (on_rq); } diff --git a/usr/src/uts/common/disp/ts.c b/usr/src/uts/common/disp/ts.c index e071a80ab5..53612cf2bc 100644 --- a/usr/src/uts/common/disp/ts.c +++ b/usr/src/uts/common/disp/ts.c @@ -20,14 +20,13 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ - #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.23 */ #include <sys/types.h> @@ -196,6 +195,7 @@ static int ts_vaparmsout(void *, pc_vaparms_t *); static int ts_parmsset(kthread_t *, void *, id_t, cred_t *); static void ts_exit(kthread_t *); static int ts_donice(kthread_t *, cred_t *, int, int *); +static int ts_doprio(kthread_t *, cred_t *, int, int *); static void ts_exitclass(void *); static int ts_canexit(kthread_t *, cred_t *); static void ts_forkret(kthread_t *, kthread_t *); @@ -221,6 +221,7 @@ static void ts_free(void *); pri_t ia_init(id_t, int, classfuncs_t **); static int ia_getclinfo(void *); +static int ia_getclpri(pcpri_t *); static int ia_parmsin(void *); static int ia_vaparmsin(void *, pc_vaparms_t *); static int ia_vaparmsout(void *, pc_vaparms_t *); @@ -274,6 +275,7 @@ static struct classfuncs ts_classfuncs = { ts_globpri, ts_nullsys, /* set_process_group */ ts_yield, + ts_doprio, }; /* @@ -290,7 +292,7 @@ static struct classfuncs ia_classfuncs = { ts_parmsout, ia_vaparmsin, ia_vaparmsout, - ts_getclpri, + ia_getclpri, ts_alloc, ts_free, @@ -318,6 +320,7 @@ static struct classfuncs ia_classfuncs = { ts_globpri, ia_set_process_group, ts_yield, + ts_doprio, }; @@ -615,8 +618,7 @@ ts_enterclass(kthread_t *t, id_t cid, void *parmsp, tspp->ts_uprilim = reqtsuprilim; tspp->ts_upri = reqtsupri; - tspp->ts_nice = NZERO - (NZERO * reqtsupri) - / ts_maxupri; + tspp->ts_nice = NZERO - (NZERO * reqtsupri) / ts_maxupri; } TS_NEWUMDPRI(tspp); @@ -788,14 +790,22 @@ ia_getclinfo(void *infop) /* - * Return the global scheduling priority ranges for the timesharing - * class in pcpri_t structure. + * Return the user mode scheduling priority range. */ static int ts_getclpri(pcpri_t *pcprip) { - pcprip->pc_clpmax = ts_dptbl[ts_maxumdpri].ts_globpri; - pcprip->pc_clpmin = ts_dptbl[0].ts_globpri; + pcprip->pc_clpmax = ts_maxupri; + pcprip->pc_clpmin = -ts_maxupri; + return (0); +} + + +static int +ia_getclpri(pcpri_t *pcprip) +{ + pcprip->pc_clpmax = ia_maxupri; + pcprip->pc_clpmin = -ia_maxupri; return (0); } @@ -833,7 +843,6 @@ ia_parmsget(kthread_t *t, void *parmsp) iaparmsp->ia_mode = IA_SET_INTERACTIVE; else iaparmsp->ia_mode = IA_INTERACTIVE_OFF; - iaparmsp->ia_nice = tspp->ts_nice; } @@ -1759,7 +1768,7 @@ ts_tick(kthread_t *t) TRACE_2(TR_FAC_DISP, TR_TICK, "tick:tid %p old pri %d", t, oldpri); } else if (t->t_state == TS_ONPROC && - t->t_pri < t->t_disp_queue->disp_maxrunpri) { + t->t_pri < t->t_disp_queue->disp_maxrunpri) { call_cpu_surrender = B_TRUE; } } @@ -2107,7 +2116,7 @@ ts_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp) newnice = 0; tsparms.ts_uprilim = tsparms.ts_upri = - -((newnice - NZERO) * ts_maxupri) / NZERO; + -((newnice - NZERO) * ts_maxupri) / NZERO; /* * Reset the uprilim and upri values of the thread. * Call ts_parmsset even if thread is interactive since we're @@ -2130,6 +2139,38 @@ ts_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp) return (0); } +/* + * Increment the priority of the specified thread by incr and + * return the new value in *retvalp. + */ +static int +ts_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp) +{ + int newpri; + tsproc_t *tspp = (tsproc_t *)(t->t_cldata); + tsparms_t tsparms; + + ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); + + /* If there's no change to the priority, just return current setting */ + if (incr == 0) { + *retvalp = tspp->ts_upri; + return (0); + } + + newpri = tspp->ts_upri + incr; + if (newpri > ts_maxupri || newpri < -ts_maxupri) + return (EINVAL); + + *retvalp = newpri; + tsparms.ts_uprilim = tsparms.ts_upri = newpri; + /* + * Reset the uprilim and upri values of the thread. + * Call ts_parmsset even if thread is interactive since we're + * not changing mode. + */ + return (ts_parmsset(t, &tsparms, 0, cr)); +} /* * ia_set_process_group marks foreground processes as interactive @@ -2324,6 +2365,7 @@ ts_change_priority(kthread_t *t, tsproc_t *tspp) new_pri = ts_dptbl[tspp->ts_umdpri].ts_globpri; ASSERT(new_pri >= 0 && new_pri <= ts_maxglobpri); tspp->ts_flags &= ~TSRESTORE; + t->t_cpri = tspp->ts_upri; if (t == curthread || t->t_state == TS_ONPROC) { /* curthread is always onproc */ cpu_t *cp = t->t_disp_queue->disp_cpu; diff --git a/usr/src/uts/common/fs/proc/prcontrol.c b/usr/src/uts/common/fs/proc/prcontrol.c index 227b732fc3..e67ba67e04 100644 --- a/usr/src/uts/common/fs/proc/prcontrol.c +++ b/usr/src/uts/common/fs/proc/prcontrol.c @@ -338,11 +338,11 @@ pr_control(long cmd, arg_t *argp, prnode_t *pnp, cred_t *cr) break; } - timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0; - if ((error = pr_wait_stop(pnp, timeo)) != 0) - return (error); + timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0; + if ((error = pr_wait_stop(pnp, timeo)) != 0) + return (error); - break; + break; } case PCRUN: /* make lwp or process runnable */ @@ -960,10 +960,8 @@ pr_control32(int32_t cmd, arg32_t *argp, prnode_t *pnp, cred_t *cr) } case PCSPRIV: /* set the process privileges */ - { - error = pr_spriv(p, &argp->prpriv, cr); - break; - } + error = pr_spriv(p, &argp->prpriv, cr); + break; case PCSZONE: /* set the process's zoneid */ error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr); @@ -1548,6 +1546,7 @@ pr_nice(proc_t *p, int nice, cred_t *cr) do { ASSERT(!(t->t_proc_flag & TP_LWPEXIT)); err = CL_DONICE(t, cr, nice, (int *)NULL); + schedctl_set_cidpri(t); if (error == 0) error = err; } while ((t = t->t_forw) != p->p_tlist); @@ -1925,7 +1924,7 @@ pr_watch(prnode_t *pnp, prwatch_t *pwp, int *unlocked) pwa->wa_eaddr = (caddr_t)vaddr + size; pwa->wa_flags = (ulong_t)wflags; - error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0) ? + error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0)? clear_watched_area(p, pwa) : set_watched_area(p, pwa); if (p == curproc) { diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c index 1162410939..c821d6f964 100644 --- a/usr/src/uts/common/os/exec.c +++ b/usr/src/uts/common/os/exec.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -28,7 +29,6 @@ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ - #include <sys/types.h> #include <sys/param.h> #include <sys/sysmacros.h> @@ -57,6 +57,7 @@ #include <sys/prsystm.h> #include <sys/modctl.h> #include <sys/vmparam.h> +#include <sys/door.h> #include <sys/schedctl.h> #include <sys/utrap.h> #include <sys/systeminfo.h> @@ -1441,12 +1442,10 @@ stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp) if ((error = stk_add(args, args->pathname, UIO_SYSSPACE)) != 0) return (error); if (args->brandname != NULL && - (error = stk_add(args, args->brandname, - UIO_SYSSPACE)) != 0) + (error = stk_add(args, args->brandname, UIO_SYSSPACE)) != 0) return (error); if (args->emulator != NULL && - (error = stk_add(args, args->emulator, - UIO_SYSSPACE)) != 0) + (error = stk_add(args, args->emulator, UIO_SYSSPACE)) != 0) return (error); } diff --git a/usr/src/uts/common/os/fork.c b/usr/src/uts/common/os/fork.c index 2ed10db14a..74d791ac47 100644 --- a/usr/src/uts/common/os/fork.c +++ b/usr/src/uts/common/os/fork.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -592,6 +592,7 @@ cfork(int isvfork, int isfork1, int flags) * and disappear before CL_FORKRET() is called. */ CL_FORKRET(curthread, cp->p_tlist); + schedctl_set_cidpri(curthread); ASSERT(MUTEX_NOT_HELD(&pidlock)); } @@ -602,8 +603,7 @@ forklwperr: if (avl_numnodes(&p->p_wpage) != 0) { /* restore watchpoints to parent */ as = p->p_as; - AS_LOCK_ENTER(as, &as->a_lock, - RW_WRITER); + AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); as->a_wpage = p->p_wpage; avl_create(&p->p_wpage, wp_compare, sizeof (struct watched_page), diff --git a/usr/src/uts/common/os/lwp.c b/usr/src/uts/common/os/lwp.c index a9f1aa2588..39ca56ac7d 100644 --- a/usr/src/uts/common/os/lwp.c +++ b/usr/src/uts/common/os/lwp.c @@ -410,6 +410,11 @@ grow: if (p != curproc || curthread->t_cid != cid) { err = CL_ENTERCLASS(t, cid, NULL, NULL, bufp); t->t_pri = pri; /* CL_ENTERCLASS may have changed it */ + /* + * We don't call schedctl_set_cidpri(t) here + * because the schedctl data is not yet set + * up for the newly-created lwp. + */ } else { t->t_clfuncs = &(sclass[cid].cl_funcs->thread); err = CL_FORK(curthread, t, bufp); diff --git a/usr/src/uts/common/os/pool.c b/usr/src/uts/common/os/pool.c index 03a2f7121a..80b05f90e0 100644 --- a/usr/src/uts/common/os/pool.c +++ b/usr/src/uts/common/os/pool.c @@ -18,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,6 +45,7 @@ #include <sys/atomic.h> #include <sys/zone.h> #include <sys/policy.h> +#include <sys/schedctl.h> /* * RESOURCE POOLS @@ -1217,6 +1219,7 @@ pool_change_class(proc_t *p, id_t cid) ret = CL_ENTERCLASS(t, cid, NULL, NULL, *buf); ASSERT(ret == 0); CL_EXITCLASS(oldcid, cldata); + schedctl_set_cidpri(t); *buf++ = NULL; } } while ((t = t->t_forw) != p->p_tlist); diff --git a/usr/src/uts/common/os/schedctl.c b/usr/src/uts/common/os/schedctl.c index 98c8457523..4a5ccc9944 100644 --- a/usr/src/uts/common/os/schedctl.c +++ b/usr/src/uts/common/os/schedctl.c @@ -119,6 +119,9 @@ schedctl(void) thread_lock(t); /* protect against ts_tick and ts_update */ t->t_schedctl = ssp; t->t_sc_uaddr = uaddr; + ssp->sc_cid = t->t_cid; + ssp->sc_cpri = t->t_cpri; + ssp->sc_priority = DISP_PRIO(t); thread_unlock(t); } @@ -204,7 +207,7 @@ schedctl_proc_cleanup(void) * Called by resume just before switching away from the current thread. * Save new thread state. */ -void +static void schedctl_save(sc_shared_t *ssp) { ssp->sc_state = curthread->t_state; @@ -215,7 +218,7 @@ schedctl_save(sc_shared_t *ssp) * Called by resume after switching to the current thread. * Save new thread state and CPU. */ -void +static void schedctl_restore(sc_shared_t *ssp) { ssp->sc_state = SC_ONPROC; @@ -227,7 +230,7 @@ schedctl_restore(sc_shared_t *ssp) * On fork, remove inherited mappings from the child's address space. * The child's threads must call schedctl() to get new shared mappings. */ -void +static void schedctl_fork(kthread_t *pt, kthread_t *ct) { proc_t *pp = ttoproc(pt); @@ -253,7 +256,7 @@ schedctl_fork(kthread_t *pt, kthread_t *ct) /* * Returns non-zero if the specified thread shouldn't be preempted at this time. - * Called by ts_preempt, ts_tick, and ts_update. + * Called by ts_preempt(), ts_tick(), and ts_update(). */ int schedctl_get_nopreempt(kthread_t *t) @@ -265,7 +268,7 @@ schedctl_get_nopreempt(kthread_t *t) /* * Sets the value of the nopreempt field for the specified thread. - * Called by ts_preempt to clear the field on preemption. + * Called by ts_preempt() to clear the field on preemption. */ void schedctl_set_nopreempt(kthread_t *t, short val) @@ -276,10 +279,11 @@ schedctl_set_nopreempt(kthread_t *t, short val) /* - * Sets the value of the yield field for the specified thread. Called by - * ts_preempt and ts_tick to set the field, and ts_yield to clear it. - * The kernel never looks at this field so we don't need a schedctl_get_yield - * function. + * Sets the value of the yield field for the specified thread. + * Called by ts_preempt() and ts_tick() to set the field, and + * ts_yield() to clear it. + * The kernel never looks at this field so we don't need a + * schedctl_get_yield() function. */ void schedctl_set_yield(kthread_t *t, short val) @@ -290,6 +294,24 @@ schedctl_set_yield(kthread_t *t, short val) /* + * Sets the values of the cid and priority fields for the specified thread. + * Called from thread_change_pri(), thread_change_epri(), THREAD_CHANGE_PRI(). + * Called following calls to CL_FORKRET() and CL_ENTERCLASS(). + */ +void +schedctl_set_cidpri(kthread_t *t) +{ + sc_shared_t *tdp = t->t_schedctl; + + if (tdp != NULL) { + tdp->sc_cid = t->t_cid; + tdp->sc_cpri = t->t_cpri; + tdp->sc_priority = DISP_PRIO(t); + } +} + + +/* * Returns non-zero if the specified thread has requested that all * signals be blocked. Called by signal-related code that tests * the signal mask of a thread that may not be the current thread @@ -443,7 +465,7 @@ schedctl_init(void) } -int +static int schedctl_shared_alloc(sc_shared_t **kaddrp, uintptr_t *uaddrp) { proc_t *p = curproc; diff --git a/usr/src/uts/common/sys/class.h b/usr/src/uts/common/sys/class.h index 9988ca3190..9ec496fdbb 100644 --- a/usr/src/uts/common/sys/class.h +++ b/usr/src/uts/common/sys/class.h @@ -18,15 +18,15 @@ * * CDDL HEADER END */ + /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ - #ifndef _SYS_CLASS_H #define _SYS_CLASS_H @@ -72,29 +72,30 @@ typedef struct class_ops { } class_ops_t; typedef struct thread_ops { - int (*cl_enterclass)(kthread_id_t, id_t, void *, cred_t *, void *); + int (*cl_enterclass)(kthread_t *, id_t, void *, cred_t *, void *); void (*cl_exitclass)(void *); - int (*cl_canexit)(kthread_id_t, cred_t *); - int (*cl_fork)(kthread_id_t, kthread_id_t, void *); - void (*cl_forkret)(kthread_id_t, kthread_id_t); - void (*cl_parmsget)(kthread_id_t, void *); - int (*cl_parmsset)(kthread_id_t, void *, id_t, cred_t *); - void (*cl_stop)(kthread_id_t, int, int); - void (*cl_exit)(kthread_id_t); - void (*cl_active)(kthread_id_t); - void (*cl_inactive)(kthread_id_t); - pri_t (*cl_swapin)(kthread_id_t, int); - pri_t (*cl_swapout)(kthread_id_t, int); - void (*cl_trapret)(kthread_id_t); - void (*cl_preempt)(kthread_id_t); - void (*cl_setrun)(kthread_id_t); - void (*cl_sleep)(kthread_id_t); - void (*cl_tick)(kthread_id_t); - void (*cl_wakeup)(kthread_id_t); - int (*cl_donice)(kthread_id_t, cred_t *, int, int *); - pri_t (*cl_globpri)(kthread_id_t); + int (*cl_canexit)(kthread_t *, cred_t *); + int (*cl_fork)(kthread_t *, kthread_t *, void *); + void (*cl_forkret)(kthread_t *, kthread_t *); + void (*cl_parmsget)(kthread_t *, void *); + int (*cl_parmsset)(kthread_t *, void *, id_t, cred_t *); + void (*cl_stop)(kthread_t *, int, int); + void (*cl_exit)(kthread_t *); + void (*cl_active)(kthread_t *); + void (*cl_inactive)(kthread_t *); + pri_t (*cl_swapin)(kthread_t *, int); + pri_t (*cl_swapout)(kthread_t *, int); + void (*cl_trapret)(kthread_t *); + void (*cl_preempt)(kthread_t *); + void (*cl_setrun)(kthread_t *); + void (*cl_sleep)(kthread_t *); + void (*cl_tick)(kthread_t *); + void (*cl_wakeup)(kthread_t *); + int (*cl_donice)(kthread_t *, cred_t *, int, int *); + pri_t (*cl_globpri)(kthread_t *); void (*cl_set_process_group)(pid_t, pid_t, pid_t); - void (*cl_yield)(kthread_id_t); + void (*cl_yield)(kthread_t *); + int (*cl_doprio)(kthread_t *, cred_t *, int, int *); } thread_ops_t; typedef struct classfuncs { @@ -134,8 +135,8 @@ extern int getcid(char *, id_t *); extern int getcidbyname(char *, id_t *); extern int parmsin(pcparms_t *, pc_vaparms_t *); extern int parmsout(pcparms_t *, pc_vaparms_t *); -extern int parmsset(pcparms_t *, kthread_id_t); -extern void parmsget(kthread_id_t, pcparms_t *); +extern int parmsset(pcparms_t *, kthread_t *); +extern void parmsget(kthread_t *, pcparms_t *); extern int vaparmsout(char *, pcparms_t *, pc_vaparms_t *, uio_seg_t); #endif @@ -207,6 +208,9 @@ extern int vaparmsout(char *, pcparms_t *, pc_vaparms_t *, uio_seg_t); #define CL_DONICE(t, cr, inc, ret) \ (*(t)->t_clfuncs->cl_donice)(t, cr, inc, ret) +#define CL_DOPRIO(t, cr, inc, ret) \ + (*(t)->t_clfuncs->cl_doprio)(t, cr, inc, ret) + #define CL_GLOBPRI(t) (*(t)->t_clfuncs->cl_globpri)(t) #define CL_SET_PROCESS_GROUP(t, s, b, f) \ diff --git a/usr/src/uts/common/sys/iapriocntl.h b/usr/src/uts/common/sys/iapriocntl.h index 2c97d4d6e5..ad12db44ae 100644 --- a/usr/src/uts/common/sys/iapriocntl.h +++ b/usr/src/uts/common/sys/iapriocntl.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,15 +18,15 @@ * * CDDL HEADER END */ -/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ - /* - * Copyright (c) 2001 by Sun Microsystems, Inc. - * All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + #ifndef _SYS_IAPRIOCNTL_H #define _SYS_IAPRIOCNTL_H @@ -53,7 +52,6 @@ typedef struct iaparms { pri_t ia_uprilim; /* user priority limit */ pri_t ia_upri; /* user priority */ int ia_mode; /* interactive on/off */ - int ia_nice; /* present nice value */ } iaparms_t; typedef struct iaclass { @@ -71,7 +69,6 @@ typedef struct iainfo { #define IANPROCS 60 #define IA_INTERACTIVE_OFF 0x00 /* thread is not interactive */ #define IA_SET_INTERACTIVE 0x01 /* thread is interactive */ -#define IA_NICED 0x02 /* thread has been niced */ #define IA_BOOST 10 /* value for boost */ /* diff --git a/usr/src/uts/common/sys/priocntl.h b/usr/src/uts/common/sys/priocntl.h index 6475ed0a4c..1f88f47c66 100644 --- a/usr/src/uts/common/sys/priocntl.h +++ b/usr/src/uts/common/sys/priocntl.h @@ -19,14 +19,14 @@ * CDDL HEADER END */ -/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ - /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + #ifndef _SYS_PRIOCNTL_H #define _SYS_PRIOCNTL_H @@ -58,14 +58,15 @@ extern long priocntl(), priocntlset(); #define PC_SETPARMS 2 /* Set scheduling parameters */ #define PC_GETPARMS 3 /* Get scheduling parameters */ #define PC_ADMIN 4 /* Scheduler administration (used by */ - /* dispadmin(1M), not for general use) */ -#define PC_GETPRIRANGE 5 /* Get global priority range for a class */ + /* dispadmin(1M), not for general use) */ +#define PC_GETPRIRANGE 5 /* Get priority range for a class */ /* posix.4 scheduling, not for general use */ #define PC_DONICE 6 /* Set or get nice value */ #define PC_SETXPARMS 7 /* Set extended scheduling parameters */ #define PC_GETXPARMS 8 /* Get extended scheduling parameters */ #define PC_SETDFLCL 9 /* Set default class, not for general use */ #define PC_GETDFLCL 10 /* Get default class, not for general use */ +#define PC_DOPRIO 11 /* Set or get priority, not for general use */ #define PC_CLNULL -1 @@ -76,6 +77,9 @@ extern long priocntl(), priocntlset(); #define PC_GETNICE 0 #define PC_SETNICE 1 +#define PC_GETPRIO 0 +#define PC_SETPRIO 1 + typedef struct pcinfo { id_t pc_cid; /* class id */ char pc_clname[PC_CLNMSZ]; /* class name */ @@ -92,6 +96,12 @@ typedef struct pcnice { int pc_op; /* type of operation, set or get */ } pcnice_t; +typedef struct pcprio { + int pc_op; /* type of operation, set or get */ + id_t pc_cid; /* class id */ + int pc_val; /* priority value */ +} pcprio_t; + /* * The following is used by the priocntl(2) varargs interface (command * codes: PC_SETXPARMS and PC_GETXPARMS). @@ -141,8 +151,8 @@ typedef struct { typedef struct pcpri { id_t pc_cid; /* process class */ - pri_t pc_clpmax; /* class global priority max */ - pri_t pc_clpmin; /* class global priority min */ + pri_t pc_clpmax; /* class priority max */ + pri_t pc_clpmin; /* class priority min */ } pcpri_t; /* diff --git a/usr/src/uts/common/sys/schedctl.h b/usr/src/uts/common/sys/schedctl.h index 74a534d48c..010b343669 100644 --- a/usr/src/uts/common/sys/schedctl.h +++ b/usr/src/uts/common/sys/schedctl.h @@ -43,13 +43,6 @@ extern "C" { #include <sys/types.h> #include <sys/processor.h> -#ifdef _KERNEL -#include <sys/mutex.h> -#include <sys/thread.h> -#include <sys/vnode.h> -#include <sys/cpuvar.h> -#include <sys/door.h> -#endif /* _KERNEL */ /* * This "public" portion of the sc_shared data is used by libsched/libc. @@ -69,7 +62,10 @@ typedef struct sc_shared { volatile char sc_sigblock; /* all signals blocked */ volatile uchar_t sc_flgs; /* set only by curthread; see below */ volatile processorid_t sc_cpu; /* last CPU on which LWP ran */ - int sc_pad; + volatile char sc_cid; /* scheduling class id */ + volatile char sc_cpri; /* class priority, -128..127 */ + volatile uchar_t sc_priority; /* dispatch priority, 0..255 */ + char sc_pad; sc_public_t sc_preemptctl; /* preemption control data */ } sc_shared_t; @@ -100,6 +96,7 @@ void schedctl_proc_cleanup(void); int schedctl_get_nopreempt(kthread_t *); void schedctl_set_nopreempt(kthread_t *, short); void schedctl_set_yield(kthread_t *, short); +void schedctl_set_cidpri(kthread_t *); int schedctl_sigblock(kthread_t *); void schedctl_finish_sigblock(kthread_t *); int schedctl_cancel_pending(void); diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h index 7302289ea1..78ded14796 100644 --- a/usr/src/uts/common/sys/thread.h +++ b/usr/src/uts/common/sys/thread.h @@ -121,6 +121,7 @@ typedef struct _kthread { uint_t t_state; /* thread state (protected by thread_lock) */ pri_t t_pri; /* assigned thread priority */ pri_t t_epri; /* inherited thread priority */ + pri_t t_cpri; /* thread scheduling class priority */ char t_writer; /* sleeping in lwp_rwlock_lock(RW_WRITE_LOCK) */ label_t t_pcb; /* pcb, save area when switching */ lwpchan_t t_lwpchan; /* reason for blocking */ @@ -584,6 +585,7 @@ caddr_t thread_stk_init(caddr_t); /* init thread stack */ pri_t __new_pri = (pri); \ DTRACE_SCHED2(change__pri, kthread_t *, (t), pri_t, __new_pri); \ (t)->t_pri = __new_pri; \ + schedctl_set_cidpri(t); \ } /* diff --git a/usr/src/uts/common/syscall/nice.c b/usr/src/uts/common/syscall/nice.c index 55db136f7b..056486575c 100644 --- a/usr/src/uts/common/syscall/nice.c +++ b/usr/src/uts/common/syscall/nice.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,10 +18,14 @@ * * CDDL HEADER END */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ -/* Copyright (c) 1994 Sun Microsystems, Inc. */ - #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.15 */ @@ -36,6 +39,7 @@ #include <sys/debug.h> #include <sys/class.h> #include <sys/mutex.h> +#include <sys/schedctl.h> /* * We support the nice system call for compatibility although @@ -49,13 +53,14 @@ nice(int niceness) { int error = 0; int err, retval; - kthread_id_t t; - proc_t *p = curproc; + kthread_t *t; + proc_t *p = curproc; mutex_enter(&p->p_lock); t = p->p_tlist; do { err = CL_DONICE(t, CRED(), niceness, &retval); + schedctl_set_cidpri(t); if (error == 0 && err) error = set_errno(err); } while ((t = t->t_forw) != p->p_tlist); |