diff options
author | raf <none@none> | 2007-06-29 13:31:58 -0700 |
---|---|---|
committer | raf <none@none> | 2007-06-29 13:31:58 -0700 |
commit | 883492d5a933deb34cd27521e7f2756773cd27af (patch) | |
tree | 0c80cc8bc2b91d1882235f25cf28ef8ed0e4e6e3 /usr/src | |
parent | ec4858345aa8c9134ae2563545c54823cd78b5c8 (diff) | |
download | illumos-gate-883492d5a933deb34cd27521e7f2756773cd27af.tar.gz |
PSARC 2007/285 robust locks revisited
6296770 process robust mutexes should be much faster
Diffstat (limited to 'usr/src')
30 files changed, 1223 insertions, 867 deletions
diff --git a/usr/src/cmd/mdb/common/modules/libc/libc.c b/usr/src/cmd/mdb/common/modules/libc/libc.c index acfa8ad884..4ebfecc646 100644 --- a/usr/src/cmd/mdb/common/modules/libc/libc.c +++ b/usr/src/cmd/mdb/common/modules/libc/libc.c @@ -608,19 +608,25 @@ d_ulwp(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) prt_addr(ulwp.ul_sleepq, 1), prt_addr(ulwp.ul_cvmutex, 0)); - HD("mxchain epri emappedpri rdlocks"); - mdb_printf(OFFSTR "%s %-10d %-10d %d\n", + HD("mxchain epri emappedpri"); + mdb_printf(OFFSTR "%s %-10d %d\n", OFFSET(ul_mxchain), prt_addr(ulwp.ul_mxchain, 1), ulwp.ul_epri, - ulwp.ul_emappedpri, - ulwp.ul_rdlocks); + ulwp.ul_emappedpri); - HD("rd_rwlock rd_count tpdp"); - mdb_printf(OFFSTR "%s %-21ld %s\n", - OFFSET(ul_readlock.single.rd_rwlock), + HD("rdlockcnt rd_rwlock rd_count"); + mdb_printf(OFFSTR "%-21d %s %d\n", + OFFSET(ul_rdlockcnt), + ulwp.ul_rdlockcnt, prt_addr(ulwp.ul_readlock.single.rd_rwlock, 1), - ulwp.ul_readlock.single.rd_count, + ulwp.ul_readlock.single.rd_count); + + HD("heldlockcnt heldlocks tpdp"); + mdb_printf(OFFSTR "%-21d %s %s\n", + OFFSET(ul_heldlockcnt), + ulwp.ul_heldlockcnt, + prt_addr(ulwp.ul_heldlocks.single, 1), prt_addr(ulwp.ul_tpdp, 0)); HD("siglink s'l'spin s'l'spin2 s'l'sleep s'l'wakeup"); @@ -815,12 +821,17 @@ d_uberdata(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) prt_addr(uberdata.ulwp_freelist, 1), prt_addr(uberdata.ulwp_lastfree, 0)); - HD("ulwp_replace_free ulwp_replace_last atforklist"); - mdb_printf(OFFSTR "%s %s %s\n", + HD("ulwp_replace_free ulwp_replace_last"); + mdb_printf(OFFSTR "%s %s\n", OFFSET(ulwp_replace_free), prt_addr(uberdata.ulwp_replace_free, 1), - prt_addr(uberdata.ulwp_replace_last, 1), - prt_addr(uberdata.atforklist, 0)); + prt_addr(uberdata.ulwp_replace_last, 0)); + + HD("atforklist robustlocks"); + mdb_printf(OFFSTR "%s %s\n", + OFFSET(atforklist), + prt_addr(uberdata.atforklist, 1), + prt_addr(uberdata.robustlocks, 0)); HD("tdb_bootstrap tdb_sync_addr_hash tdb_'count tdb_'fail"); mdb_printf(OFFSTR "%s %s %-10d %d\n", diff --git a/usr/src/cmd/sgs/link_audit/common/bindings.c b/usr/src/cmd/sgs/link_audit/common/bindings.c index 6b3fb55fec..f4133cacdb 100644 --- a/usr/src/cmd/sgs/link_audit/common/bindings.c +++ b/usr/src/cmd/sgs/link_audit/common/bindings.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -140,7 +140,7 @@ remap_buffer(int fd) } static void -grow_buffer() +grow_buffer(void) { int fd; if ((fd = open(buffer_name, O_RDWR)) == -1) { @@ -163,7 +163,7 @@ grow_buffer() } static void -get_new_strbuf() +get_new_strbuf(void) { bt_lock(&bhp->bh_lock); while (bhp->bh_end + STRBLKSIZE > bhp->bh_size) @@ -201,7 +201,7 @@ save_str(const char *str) static unsigned int -get_new_entry() +get_new_entry(void) { unsigned int new_ent; bt_lock(&bhp->bh_lock); @@ -216,14 +216,10 @@ get_new_entry() static void -init_locks() +init_locks(void) { int i; - /* - * NOTE: I should call _lwp_mutex_init() but it doesn't - * yet exist. see bug#1179352 - */ (void) memcpy(&bhp->bh_lock, &sharedmutex, sizeof (lwp_mutex_t)); for (i = 0; i < DEFBKTS; i++) (void) memcpy(&bhp->bh_bkts[i].bb_lock, &sharedmutex, diff --git a/usr/src/cmd/truss/expound.c b/usr/src/cmd/truss/expound.c index 166672b338..479f584561 100644 --- a/usr/src/cmd/truss/expound.c +++ b/usr/src/cmd/truss/expound.c @@ -27,7 +27,6 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ - #pragma ident "%Z%%M% %I% %E% SMI" #define _SYSCALL32 @@ -520,9 +519,10 @@ show_cladm(private_t *pri, int code, int function, long offset) } } -#define ALL_LOCK_TYPES \ - (USYNC_PROCESS|LOCK_ERRORCHECK|LOCK_RECURSIVE|USYNC_PROCESS_ROBUST|\ - LOCK_PRIO_INHERIT|LOCK_PRIO_PROTECT|LOCK_ROBUST_NP) +#define ALL_LOCK_TYPES \ + (USYNC_PROCESS | LOCK_ERRORCHECK | LOCK_RECURSIVE | \ + LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT | LOCK_ROBUST | \ + USYNC_PROCESS_ROBUST) /* return cv and mutex types */ const char * @@ -539,14 +539,14 @@ synch_type(private_t *pri, uint_t type) (void) strcat(str, "|LOCK_ERRORCHECK"); if (type & LOCK_RECURSIVE) (void) strcat(str, "|LOCK_RECURSIVE"); - if (type & USYNC_PROCESS_ROBUST) - (void) strcat(str, "|USYNC_PROCESS_ROBUST"); if (type & LOCK_PRIO_INHERIT) (void) strcat(str, "|LOCK_PRIO_INHERIT"); if (type & LOCK_PRIO_PROTECT) (void) strcat(str, "|LOCK_PRIO_PROTECT"); - if (type & LOCK_ROBUST_NP) - (void) strcat(str, "|LOCK_ROBUST_NP"); + if (type & LOCK_ROBUST) + (void) strcat(str, "|LOCK_ROBUST"); + if (type & USYNC_PROCESS_ROBUST) + (void) strcat(str, "|USYNC_PROCESS_ROBUST"); if ((type &= ~ALL_LOCK_TYPES) != 0) (void) sprintf(str + strlen(str), "|0x%.4X", type); @@ -5023,7 +5023,7 @@ expound(private_t *pri, long r0, int raw) case SYS_lwp_mutex_lock: case SYS_lwp_mutex_unlock: case SYS_lwp_mutex_trylock: - case SYS_lwp_mutex_init: + case SYS_lwp_mutex_register: if (pri->sys_nargs > 0) show_mutex(pri, (long)pri->sys_args[0]); break; diff --git a/usr/src/cmd/truss/systable.c b/usr/src/cmd/truss/systable.c index c11500baf4..b960898928 100644 --- a/usr/src/cmd/truss/systable.c +++ b/usr/src/cmd/truss/systable.c @@ -27,7 +27,6 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ - #pragma ident "%Z%%M% %I% %E% SMI" #include <stdio.h> @@ -391,7 +390,7 @@ const struct systable systable[] = { {"lwp_sigmask", 3, HEX, HEX, SPM, HEX, HEX}, /* 165 */ {"lwp_private", 3, HEX, NOV, DEC, DEC, HEX}, /* 166 */ {"lwp_wait", 2, DEC, NOV, DEC, HEX}, /* 167 */ -{"lwp_mutex_wakeup", 1, DEC, NOV, HEX}, /* 168 */ +{"lwp_mutex_wakeup", 2, DEC, NOV, HEX, DEC}, /* 168 */ {"lwp_mutex_lock", 1, DEC, NOV, HEX}, /* 169 */ {"lwp_cond_wait", 4, DEC, NOV, HEX, HEX, HEX, DEC}, /* 170 */ {"lwp_cond_signal", 1, DEC, NOV, HEX}, /* 171 */ @@ -475,7 +474,7 @@ const struct systable systable[] = { {"ntp_adjtime", 1, DEC, NOV, HEX}, /* 249 */ {"lwp_mutex_unlock", 1, DEC, NOV, HEX}, /* 250 */ {"lwp_mutex_trylock", 1, DEC, NOV, HEX}, /* 251 */ -{"lwp_mutex_init", 2, DEC, NOV, HEX, HEX}, /* 252 */ +{"lwp_mutex_register", 1, DEC, NOV, HEX}, /* 252 */ {"cladm", 3, DEC, NOV, CLC, CLF, HEX}, /* 253 */ {"uucopy", 3, DEC, NOV, HEX, HEX, UNS}, /* 254 */ {"umount2", 2, DEC, NOV, STG, MTF}, /* 255 */ diff --git a/usr/src/cmd/ypcmd/shared/lockmap.c b/usr/src/cmd/ypcmd/shared/lockmap.c index afb0389081..17cbe9e6d0 100644 --- a/usr/src/cmd/ypcmd/shared/lockmap.c +++ b/usr/src/cmd/ypcmd/shared/lockmap.c @@ -18,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -131,7 +132,7 @@ init_locks_mem() */ for (iiter = 0; iiter < MAXHASH; iiter++) { if (rc = mutex_init(&(shmlockarray->locknode[iiter]), - USYNC_PROCESS_ROBUST, 0)) { + USYNC_PROCESS | LOCK_ROBUST, 0)) { if (rc == EBUSY) { ebusy_cnt++; } else { @@ -314,14 +315,14 @@ lock_core(int hashval) switch (rc) { case EOWNERDEAD: /* - * Previows lock owner died, resetting lock + * Previous lock owner died, resetting lock * to recover from error. */ - rc = mutex_init(&(shmlockarray->locknode[hashval]), - USYNC_PROCESS_ROBUST, 0); + rc = mutex_consistent( + &(shmlockarray->locknode[hashval])); if (rc != 0) { syslog(LOG_ERR, - "mutex_init(): error=%d", rc); + "mutex_consistent(): error=%d", rc); return (0); } rc = mutex_unlock(&(shmlockarray->locknode[hashval])); diff --git a/usr/src/head/synch.h b/usr/src/head/synch.h index 629570a646..eab9de86a5 100644 --- a/usr/src/head/synch.h +++ b/usr/src/head/synch.h @@ -111,6 +111,7 @@ int cond_signal(cond_t *); int cond_broadcast(cond_t *); int mutex_init(mutex_t *, int, void *); int mutex_destroy(mutex_t *); +int mutex_consistent(mutex_t *); int mutex_lock(mutex_t *); int mutex_trylock(mutex_t *); int mutex_unlock(mutex_t *); @@ -152,6 +153,7 @@ int cond_signal(); int cond_broadcast(); int mutex_init(); int mutex_destroy(); +int mutex_consistent(); int mutex_lock(); int mutex_trylock(); int mutex_unlock(); diff --git a/usr/src/lib/common/inc/c_synonyms.h b/usr/src/lib/common/inc/c_synonyms.h index 77df53840f..a4e17fc171 100644 --- a/usr/src/lib/common/inc/c_synonyms.h +++ b/usr/src/lib/common/inc/c_synonyms.h @@ -506,6 +506,8 @@ extern "C" { #define munlockall _munlockall #define munlock _munlock #define munmap _munmap +#define _mutex_consistent __mutex_consistent +#define mutex_consistent __mutex_consistent #define _mutex_destroy __mutex_destroy #define mutex_destroy __mutex_destroy #define _mutex_held __mutex_held diff --git a/usr/src/lib/libc/amd64/sys/_lwp_mutex_unlock.s b/usr/src/lib/libc/amd64/sys/_lwp_mutex_unlock.s index abf8442dee..b9f30fc2d1 100644 --- a/usr/src/lib/libc/amd64/sys/_lwp_mutex_unlock.s +++ b/usr/src/lib/libc/amd64/sys/_lwp_mutex_unlock.s @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,7 +44,8 @@ xchgl (%rax), %ecx /* clear lock and get old lock into %ecx */ andl $WAITER_MASK, %ecx /* was anyone waiting on it? */ je 1f - SYSTRAP_RVAL1(lwp_mutex_wakeup) + xorl %esi, %esi + SYSTRAP_RVAL1(lwp_mutex_wakeup) /* lwp_mutex_wakeup(mp, 0) */ SYSLWPERR RET 1: diff --git a/usr/src/lib/libc/common/sys/syslwp.s b/usr/src/lib/libc/common/sys/syslwp.s index 1b0cca505f..fda8f2625a 100644 --- a/usr/src/lib/libc/common/sys/syslwp.s +++ b/usr/src/lib/libc/common/sys/syslwp.s @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -130,7 +130,7 @@ /* * int - * ___lwp_mutex_wakeup(lwp_mutex_t *mp) + * ___lwp_mutex_wakeup(lwp_mutex_t *mp, int) */ ENTRY(___lwp_mutex_wakeup) SYSTRAP_RVAL1(lwp_mutex_wakeup) @@ -238,10 +238,10 @@ /* * int - * ___lwp_mutex_init(lwp_mutex_t *mp, int type) + * ___lwp_mutex_register(lwp_mutex_t *mp) */ - ENTRY(___lwp_mutex_init) - SYSTRAP_RVAL1(lwp_mutex_init) + ENTRY(___lwp_mutex_register) + SYSTRAP_RVAL1(lwp_mutex_register) SYSLWPERR RET - SET_SIZE(___lwp_mutex_init) + SET_SIZE(___lwp_mutex_register) diff --git a/usr/src/lib/libc/i386/sys/_lwp_mutex_unlock.s b/usr/src/lib/libc/i386/sys/_lwp_mutex_unlock.s index 3afafcab1b..543e194d82 100644 --- a/usr/src/lib/libc/i386/sys/_lwp_mutex_unlock.s +++ b/usr/src/lib/libc/i386/sys/_lwp_mutex_unlock.s @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,7 +44,16 @@ xchgl (%eax), %ecx / clear lock and get old lock into %ecx andl $WAITER_MASK, %ecx / was anyone waiting on it? je 1f - SYSTRAP_RVAL1(lwp_mutex_wakeup) + movl 0(%esp), %ecx / prepare to call lwp_mutex_wakeup() + movl 4(%esp), %edx + movl $0, 4(%esp) + movl %edx, 0(%esp) + pushl %ecx + SYSTRAP_RVAL1(lwp_mutex_wakeup) / lwp_mutex_wakeup(mp, 0) + popl %ecx / reconstruct the stack frame + movl 0(%esp), %edx + movl %edx, 4(%esp) + movl %ecx, 0(%esp) SYSLWPERR RET 1: diff --git a/usr/src/lib/libc/inc/synonyms.h b/usr/src/lib/libc/inc/synonyms.h index 81e03fcc82..47fc223452 100644 --- a/usr/src/lib/libc/inc/synonyms.h +++ b/usr/src/lib/libc/inc/synonyms.h @@ -600,6 +600,8 @@ extern "C" { #define munlockall _munlockall #define munlock _munlock #define munmap _munmap +#define _mutex_consistent __mutex_consistent +#define mutex_consistent __mutex_consistent #define _mutex_destroy __mutex_destroy #define mutex_destroy __mutex_destroy #define _mutex_held __mutex_held diff --git a/usr/src/lib/libc/inc/thr_uberdata.h b/usr/src/lib/libc/inc/thr_uberdata.h index 7dbe4d7aab..ce30f25ef1 100644 --- a/usr/src/lib/libc/inc/thr_uberdata.h +++ b/usr/src/lib/libc/inc/thr_uberdata.h @@ -127,8 +127,8 @@ #define MUTEX_OWNER(mp) ((ulwp_t *)(uintptr_t)(mp)->mutex_owner) /* - * Test if a thread owns a USYNC_THREAD mutex. This is inappropriate - * for a process-shared (USYNC_PROCESS | USYNC_PROCESS_ROBUST) mutex. + * Test if a thread owns a process-private (USYNC_THREAD) mutex. + * This is inappropriate for a process-shared (USYNC_PROCESS) mutex. * The 'mp' argument must not have side-effects since it is evaluated twice. */ #define MUTEX_OWNED(mp, thrp) \ @@ -368,9 +368,11 @@ typedef union { #define MX 0 #define CV 1 #define FIFOQ 0x10 /* or'ing with FIFOQ asks for FIFO queueing */ -#define QHASHSIZE 512 -#define QUEUE_HASH(wchan, type) \ - ((uint_t)((((uintptr_t)(wchan) >> 3) ^ ((uintptr_t)(wchan) >> 12)) \ +#define QHASHSHIFT 9 /* number of hashing bits */ +#define QHASHSIZE (1 << QHASHSHIFT) /* power of 2 (1<<9 == 512) */ +#define QUEUE_HASH(wchan, type) ((uint_t) \ + ((((uintptr_t)(wchan) >> 3) \ + ^ ((uintptr_t)(wchan) >> (QHASHSHIFT + 3))) \ & (QHASHSIZE - 1)) + (((type) == MX)? 0 : QHASHSIZE)) extern queue_head_t *queue_lock(void *, int); @@ -542,12 +544,18 @@ typedef struct ulwp { mxchain_t *ul_mxchain; /* chain of owned ceiling mutexes */ pri_t ul_epri; /* effective scheduling priority */ pri_t ul_emappedpri; /* effective mapped priority */ - uint_t ul_rdlocks; /* # of entries in ul_readlock array */ - /* 0 means there is but a single lock */ - union { /* single rwlock or pointer to array */ + uint_t ul_rdlockcnt; /* # entries in ul_readlock array */ + /* 0 means there is but a single entry */ + union { /* single entry or pointer to array */ readlock_t single; readlock_t *array; } ul_readlock; + uint_t ul_heldlockcnt; /* # entries in ul_heldlocks array */ + /* 0 means there is but a single entry */ + union { /* single entry or pointer to array */ + mutex_t *single; + mutex_t **array; + } ul_heldlocks; /* PROBE_SUPPORT begin */ void *ul_tpdp; /* PROBE_SUPPORT end */ @@ -624,6 +632,26 @@ typedef struct atfork { } atfork_t; /* + * Element in the table of registered process robust locks. + * We keep track of these to make sure that we only call + * ___lwp_mutex_register() once for each such lock. + */ +typedef struct robust { + struct robust *robust_next; + mutex_t *robust_lock; +} robust_t; + +/* + * Parameters of the lock registration hash table. + */ +#define LOCKSHIFT 9 /* number of hashing bits */ +#define LOCKHASHSZ (1 << LOCKSHIFT) /* power of 2 (1<<9 == 512) */ +#define LOCK_HASH(addr) (uint_t) \ + ((((uintptr_t)(addr) >> 3) \ + ^ ((uintptr_t)(addr) >> (LOCKSHIFT + 3))) \ + & (LOCKHASHSZ - 1)) + +/* * Make our hot locks reside on private cache lines (64 bytes). * pad_owner and pad_count (aka fork_owner and fork_count) * are used only in fork_lock_enter() and fork_lock_exit() @@ -781,6 +809,7 @@ typedef struct uberdata { ulwp_t *ulwp_replace_free; ulwp_t *ulwp_replace_last; atfork_t *atforklist; /* circular Q for fork handlers */ + robust_t **robustlocks; /* table of registered robust locks */ struct uberdata **tdb_bootstrap; tdb_t tdb; /* thread debug interfaces (for libc_db) */ } uberdata_t; @@ -910,12 +939,18 @@ typedef struct ulwp32 { caddr32_t ul_mxchain; /* chain of owned ceiling mutexes */ pri_t ul_epri; /* effective scheduling priority */ pri_t ul_emappedpri; /* effective mapped priority */ - uint_t ul_rdlocks; /* # of entries in ul_readlock array */ - /* 0 means there is but a single lock */ - union { /* single rwlock or pointer to array */ + uint_t ul_rdlockcnt; /* # entries in ul_readlock array */ + /* 0 means there is but a single entry */ + union { /* single entry or pointer to array */ readlock32_t single; caddr32_t array; } ul_readlock; + uint_t ul_heldlockcnt; /* # entries in ul_heldlocks array */ + /* 0 means there is but a single entry */ + union { /* single entry or pointer to array */ + caddr32_t single; + caddr32_t array; + } ul_heldlocks; /* PROBE_SUPPORT begin */ caddr32_t ul_tpdp; /* PROBE_SUPPORT end */ @@ -974,6 +1009,7 @@ typedef struct uberdata32 { caddr32_t ulwp_replace_free; caddr32_t ulwp_replace_last; caddr32_t atforklist; + caddr32_t robustlocks; caddr32_t tdb_bootstrap; tdb32_t tdb; } uberdata32_t; @@ -1053,6 +1089,8 @@ extern void tls_setup(void); extern void tls_exit(void); extern void tls_free(ulwp_t *); extern void rwl_free(ulwp_t *); +extern void heldlock_exit(void); +extern void heldlock_free(ulwp_t *); extern void sigacthandler(int, siginfo_t *, void *); extern void signal_init(void); extern int sigequalset(const sigset_t *, const sigset_t *); @@ -1075,6 +1113,10 @@ extern void grab_assert_lock(void); extern void dump_queue_statistics(void); extern void collect_queue_statistics(void); extern void record_spin_locks(ulwp_t *); +extern void remember_lock(mutex_t *); +extern void forget_lock(mutex_t *); +extern void register_lock(mutex_t *); +extern void unregister_locks(void); #if defined(__sparc) extern void _flush_windows(void); #else @@ -1083,8 +1125,8 @@ extern void _flush_windows(void); extern void set_curthread(void *); /* - * Utility function used by cond_broadcast() and rw_unlock() - * when waking up many threads (more than MAXLWPS) all at once. + * Utility function used when waking up many threads (more than MAXLWPS) + * all at once. See mutex_wakeup_all(), cond_broadcast(), and rw_unlock(). */ #define MAXLWPS 128 /* max remembered lwpids before overflow */ #define NEWLWPS 2048 /* max remembered lwpids at first overflow */ @@ -1271,20 +1313,17 @@ extern int _private_mutex_unlock(mutex_t *); extern int _mutex_init(mutex_t *, int, void *); extern int _mutex_destroy(mutex_t *); +extern int _mutex_consistent(mutex_t *); extern int _mutex_lock(mutex_t *); extern int _mutex_trylock(mutex_t *); extern int _mutex_unlock(mutex_t *); -extern void _mutex_set_typeattr(mutex_t *, int); extern int __mutex_init(mutex_t *, int, void *); extern int __mutex_destroy(mutex_t *); +extern int __mutex_consistent(mutex_t *); extern int __mutex_lock(mutex_t *); extern int __mutex_trylock(mutex_t *); extern int __mutex_unlock(mutex_t *); extern int mutex_is_held(mutex_t *); -extern int mutex_lock_internal(mutex_t *, timespec_t *, int); -extern int mutex_trylock_adaptive(mutex_t *); -extern int mutex_queuelock_adaptive(mutex_t *); -extern int mutex_lock_impl(mutex_t *mp, timespec_t *tsp); extern int _cond_init(cond_t *, int, void *); extern int _cond_wait(cond_t *, mutex_t *); @@ -1293,8 +1332,6 @@ extern int _cond_reltimedwait(cond_t *, mutex_t *, const timespec_t *); extern int _cond_signal(cond_t *); extern int _cond_broadcast(cond_t *); extern int _cond_destroy(cond_t *); -extern int cond_sleep_queue(cond_t *, mutex_t *, timespec_t *); -extern int cond_sleep_kernel(cond_t *, mutex_t *, timespec_t *); extern int cond_signal_internal(cond_t *); extern int cond_broadcast_internal(cond_t *); @@ -1344,11 +1381,11 @@ extern int get_info_by_policy(int); /* * System call wrappers (direct interfaces to the kernel) */ -extern int ___lwp_mutex_init(mutex_t *, int); +extern int ___lwp_mutex_register(mutex_t *); extern int ___lwp_mutex_trylock(mutex_t *); extern int ___lwp_mutex_timedlock(mutex_t *, timespec_t *); extern int ___lwp_mutex_unlock(mutex_t *); -extern int ___lwp_mutex_wakeup(mutex_t *); +extern int ___lwp_mutex_wakeup(mutex_t *, int); extern int ___lwp_cond_wait(cond_t *, mutex_t *, timespec_t *, int); extern int __lwp_cond_signal(lwp_cond_t *); extern int __lwp_cond_broadcast(lwp_cond_t *); diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers index 932505d279..d32f776934 100644 --- a/usr/src/lib/libc/port/mapfile-vers +++ b/usr/src/lib/libc/port/mapfile-vers @@ -82,6 +82,7 @@ SUNW_1.23 { # SunOS 5.11 (Solaris 11) mq_timedreceive; mq_timedsend; mq_unlink; + mutex_consistent; nanosleep; ntohl; ntohs; @@ -370,7 +371,7 @@ SUNW_1.22 { # SunOS 5.10 (Solaris 10) pthread_condattr_setclock; pthread_mutexattr_getrobust_np; pthread_mutexattr_setrobust_np; - pthread_mutex_consistent_np; + pthread_mutex_consistent_np = NODYNSORT; pthread_mutex_reltimedlock_np; pthread_mutex_timedlock; pthread_rwlock_reltimedrdlock_np; @@ -1724,6 +1725,8 @@ SUNWprivate_1.1 { _msgctl64; __multi_innetgr; _munlockall; + _mutex_consistent = NODYNSORT; + __mutex_consistent = NODYNSORT; _mutex_destroy = NODYNSORT; __mutex_destroy; mutex_held; @@ -1887,7 +1890,7 @@ SUNWprivate_1.1 { _pthread_mutexattr_setpshared; _pthread_mutexattr_setrobust_np; _pthread_mutexattr_settype; - _pthread_mutex_consistent_np; + _pthread_mutex_consistent_np = NODYNSORT; _pthread_mutex_destroy = NODYNSORT; _pthread_mutex_getprioceiling; _pthread_mutex_init; diff --git a/usr/src/lib/libc/port/threads/assfail.c b/usr/src/lib/libc/port/threads/assfail.c index 0ffd9a3219..ac3e6a068f 100644 --- a/usr/src/lib/libc/port/threads/assfail.c +++ b/usr/src/lib/libc/port/threads/assfail.c @@ -216,7 +216,7 @@ lock_error(const mutex_t *mp, const char *who, void *cv, const char *msg) /* EMPTY */; else if (mcopy.mutex_lockw == 0) (void) strcat(buf, "\nthe lock is unowned"); - else if (!(mcopy.mutex_type & (USYNC_PROCESS|USYNC_PROCESS_ROBUST))) { + else if (!(mcopy.mutex_type & USYNC_PROCESS)) { (void) strcat(buf, "\nthe lock owner is "); ultos((uint64_t)mcopy.mutex_owner, 16, buf + strlen(buf)); } else { diff --git a/usr/src/lib/libc/port/threads/pthr_mutex.c b/usr/src/lib/libc/port/threads/pthr_mutex.c index 208cce06ac..3eabd3de61 100644 --- a/usr/src/lib/libc/port/threads/pthr_mutex.c +++ b/usr/src/lib/libc/port/threads/pthr_mutex.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 1999-2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -66,9 +66,10 @@ _pthread_mutexattr_destroy(pthread_mutexattr_t *attr) } /* - * pthread_mutexattr_setpshared: sets the shared attr to PRIVATE or SHARED. - * This is equivalent to setting USYNC_PROCESS/USYNC_THREAD flag in - * mutex_init(). + * pthread_mutexattr_setpshared: sets the shared attribute + * to PTHREAD_PROCESS_PRIVATE or PTHREAD_PROCESS_SHARED. + * This is equivalent to setting the USYNC_THREAD/USYNC_PROCESS + * flag in mutex_init(). */ #pragma weak pthread_mutexattr_setpshared = _pthread_mutexattr_setpshared int @@ -85,7 +86,7 @@ _pthread_mutexattr_setpshared(pthread_mutexattr_t *attr, int pshared) } /* - * pthread_mutexattr_getpshared: gets the shared attr. + * pthread_mutexattr_getpshared: gets the shared attribute. */ #pragma weak pthread_mutexattr_getpshared = _pthread_mutexattr_getpshared int @@ -101,7 +102,7 @@ _pthread_mutexattr_getpshared(const pthread_mutexattr_t *attr, int *pshared) } /* - * pthread_mutexattr_setprioceiling: sets the prioceiling attr. + * pthread_mutexattr_setprioceiling: sets the prioceiling attribute. */ #pragma weak pthread_mutexattr_setprioceiling = \ _pthread_mutexattr_setprioceiling @@ -118,8 +119,7 @@ _pthread_mutexattr_setprioceiling(pthread_mutexattr_t *attr, int prioceiling) } /* - * pthread_mutexattr_getprioceiling: gets the prioceiling attr. - * Currently unsupported. + * pthread_mutexattr_getprioceiling: gets the prioceiling attribute. */ #pragma weak pthread_mutexattr_getprioceiling = \ _pthread_mutexattr_getprioceiling @@ -137,7 +137,6 @@ _pthread_mutexattr_getprioceiling(const pthread_mutexattr_t *attr, int *ceiling) /* * pthread_mutexattr_setprotocol: sets the protocol attribute. - * Currently unsupported. */ #pragma weak pthread_mutexattr_setprotocol = _pthread_mutexattr_setprotocol int @@ -157,7 +156,6 @@ _pthread_mutexattr_setprotocol(pthread_mutexattr_t *attr, int protocol) /* * pthread_mutexattr_getprotocol: gets the protocol attribute. - * Currently unsupported. */ #pragma weak pthread_mutexattr_getprotocol = _pthread_mutexattr_getprotocol int @@ -173,7 +171,8 @@ _pthread_mutexattr_getprotocol(const pthread_mutexattr_t *attr, int *protocol) } /* - * pthread_mutexattr_setrobust_np: sets the robustness attr to ROBUST or STALL. + * pthread_mutexattr_setrobust_np: sets the robustness attribute + * to PTHREAD_MUTEX_ROBUST_NP or PTHREAD_MUTEX_STALL_NP. */ #pragma weak pthread_mutexattr_setrobust_np = \ _pthread_mutexattr_setrobust_np @@ -191,7 +190,7 @@ _pthread_mutexattr_setrobust_np(pthread_mutexattr_t *attr, int robust) } /* - * pthread_mutexattr_getrobust_np: gets the robustness attr. + * pthread_mutexattr_getrobust_np: gets the robustness attribute. */ #pragma weak pthread_mutexattr_getrobust_np = \ _pthread_mutexattr_getrobust_np @@ -208,95 +207,33 @@ _pthread_mutexattr_getrobust_np(const pthread_mutexattr_t *attr, int *robust) } /* - * pthread_mutex_consistent_np: make an inconsistent mutex consistent. - * The mutex must have been made inconsistent due to the last owner of it - * having died. Currently, no validation is done to check if: - * - the caller owns the mutex - * Since this function is supported only for PI/robust locks, to check - * if the caller owns the mutex, one needs to call the kernel. For now, - * such extra validation does not seem necessary. - */ -#pragma weak pthread_mutex_consistent_np = _pthread_mutex_consistent_np -int -_pthread_mutex_consistent_np(pthread_mutex_t *pmp) -{ - mutex_t *mp = (mutex_t *)pmp; - - /* - * Do this only for an inconsistent, initialized, PI, Robust lock. - * For all other cases, return EINVAL. - */ - if ((mp->mutex_type & PTHREAD_PRIO_INHERIT) && - (mp->mutex_type & PTHREAD_MUTEX_ROBUST_NP) && - (mp->mutex_flag & LOCK_INITED) && - (mp->mutex_flag & LOCK_OWNERDEAD)) { - mp->mutex_flag &= ~LOCK_OWNERDEAD; - return (0); - } - return (EINVAL); -} - -/* - * pthread_mutex_init: Initializes the mutex object. It copies the - * pshared attr into type argument and calls mutex_init(). + * pthread_mutex_init: Initializes the mutex object. It copies the + * various attributes into one type argument and calls mutex_init(). */ #pragma weak pthread_mutex_init = _pthread_mutex_init int _pthread_mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *attr) { - mutex_t *mp = (mutex_t *)mutex; + mattr_t *ap; int type; - int pshared; - int protocol; int prioceiling = 0; - int robust; - int error; - mattr_t *ap; + /* + * All of the pshared, type, protocol, robust attributes + * translate to bits in the mutex_type field. + */ if (attr != NULL) { if ((ap = attr->__pthread_mutexattrp) == NULL) return (EINVAL); - pshared = ap->pshared; - type = ap->type; - protocol = ap->protocol; - if (protocol == PTHREAD_PRIO_PROTECT) + type = ap->pshared | ap->type | ap->protocol | ap->robustness; + if (ap->protocol == PTHREAD_PRIO_PROTECT) prioceiling = ap->prioceiling; - robust = ap->robustness; - /* - * Support robust mutexes only for PI locks. - */ - if (robust == PTHREAD_MUTEX_ROBUST_NP && - protocol != PTHREAD_PRIO_INHERIT) - return (EINVAL); } else { - pshared = DEFAULT_TYPE; - type = PTHREAD_MUTEX_DEFAULT; - protocol = PTHREAD_PRIO_NONE; - robust = PTHREAD_MUTEX_STALL_NP; + type = DEFAULT_TYPE | PTHREAD_MUTEX_DEFAULT | + PTHREAD_PRIO_NONE | PTHREAD_MUTEX_STALL_NP; } - error = _private_mutex_init(mp, pshared, NULL); - if (error == 0) { - /* - * Use the same routine to set the protocol, and robustness - * attributes, as that used to set the type attribute, since - * all of these attributes translate to bits in the mutex_type - * field. - * - * Note that robustness is a new bit, not the Solaris robust - * bit - the latter implies USYNC_PROCESS_ROBUST, or - * SHARED,ROBUST together. For POSIX, since robustness is an - * orthogonal attribute, both SHARED,ROBUST and PRIVATE,ROBUST - * should be valid combinations for the future. Hence, - * introduce a new bit in the mutex type field. See - * sys/synch.h or pthread.h. In the future, if we ever - * introduce a USYNC_THREAD_ROBUST, the latter could use this - * new bit... - */ - _mutex_set_typeattr(mp, type|protocol|robust); - mp->mutex_ceiling = (uint8_t)prioceiling; - } - return (error); + return (_private_mutex_init((mutex_t *)mutex, type, &prioceiling)); } /* @@ -353,7 +290,7 @@ _pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type) type = LOCK_ERRORCHECK; break; case PTHREAD_MUTEX_RECURSIVE: - type = LOCK_RECURSIVE|LOCK_ERRORCHECK; + type = LOCK_RECURSIVE | LOCK_ERRORCHECK; break; default: return (EINVAL); @@ -364,7 +301,7 @@ _pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type) /* * UNIX98 - * pthread_mutexattr_gettype: gets the type attr. + * pthread_mutexattr_gettype: gets the type attribute. */ #pragma weak pthread_mutexattr_gettype = _pthread_mutexattr_gettype int @@ -383,7 +320,7 @@ _pthread_mutexattr_gettype(const pthread_mutexattr_t *attr, int *typep) case LOCK_ERRORCHECK: type = PTHREAD_MUTEX_ERRORCHECK; break; - case LOCK_RECURSIVE|LOCK_ERRORCHECK: + case LOCK_RECURSIVE | LOCK_ERRORCHECK: type = PTHREAD_MUTEX_RECURSIVE; break; default: diff --git a/usr/src/lib/libc/port/threads/rwlock.c b/usr/src/lib/libc/port/threads/rwlock.c index 408f74489e..0f58b3a230 100644 --- a/usr/src/lib/libc/port/threads/rwlock.c +++ b/usr/src/lib/libc/port/threads/rwlock.c @@ -45,7 +45,7 @@ /* * Find/allocate an entry for rwlp in our array of rwlocks held for reading. * We must be deferring signals for this to be safe. - * Else if we are returning an entry with ul_rdlocks == 0, + * Else if we are returning an entry with ul_rdlockcnt == 0, * it could be reassigned behind our back in a signal handler. */ static readlock_t * @@ -59,7 +59,7 @@ rwl_entry(rwlock_t *rwlp) /* we must be deferring signals */ ASSERT((self->ul_critical + self->ul_sigdefer) != 0); - if ((nlocks = self->ul_rdlocks) != 0) + if ((nlocks = self->ul_rdlockcnt) != 0) readlockp = self->ul_readlock.array; else { nlocks = 1; @@ -81,12 +81,12 @@ rwl_entry(rwlock_t *rwlp) * No entry available. Allocate more space, converting the single * readlock_t entry into an array of readlock_t entries if necessary. */ - if ((nlocks = self->ul_rdlocks) == 0) { + if ((nlocks = self->ul_rdlockcnt) == 0) { /* * Initial allocation of the readlock_t array. * Convert the single entry into an array. */ - self->ul_rdlocks = nlocks = NLOCKS; + self->ul_rdlockcnt = nlocks = NLOCKS; readlockp = lmalloc(nlocks * sizeof (readlock_t)); /* * The single readlock_t becomes the first entry in the array. @@ -108,7 +108,7 @@ rwl_entry(rwlock_t *rwlp) nlocks * sizeof (readlock_t)); lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t)); self->ul_readlock.array = readlockp; - self->ul_rdlocks *= 2; + self->ul_rdlockcnt *= 2; /* * Return the next available entry in the newly allocated array. */ @@ -124,9 +124,9 @@ rwl_free(ulwp_t *ulwp) { uint_t nlocks; - if ((nlocks = ulwp->ul_rdlocks) != 0) + if ((nlocks = ulwp->ul_rdlockcnt) != 0) lfree(ulwp->ul_readlock.array, nlocks * sizeof (readlock_t)); - ulwp->ul_rdlocks = 0; + ulwp->ul_rdlockcnt = 0; ulwp->ul_readlock.single.rd_rwlock = NULL; ulwp->ul_readlock.single.rd_count = 0; } @@ -157,7 +157,7 @@ _rw_read_held(rwlock_t *rwlp) * The lock is held for reading by some thread. * Search our array of rwlocks held for reading for a match. */ - if ((nlocks = self->ul_rdlocks) != 0) + if ((nlocks = self->ul_rdlockcnt) != 0) readlockp = self->ul_readlock.array; else { nlocks = 1; diff --git a/usr/src/lib/libc/port/threads/scalls.c b/usr/src/lib/libc/port/threads/scalls.c index 34540e0d05..29da638f13 100644 --- a/usr/src/lib/libc/port/threads/scalls.c +++ b/usr/src/lib/libc/port/threads/scalls.c @@ -195,6 +195,7 @@ _private_forkx(int flags) self->ul_siginfo.si_signo = 0; udp->pid = _private_getpid(); /* reset the library's data structures to reflect one thread */ + unregister_locks(); postfork1_child(); restore_signals(self); _postfork_child_handler(); @@ -269,6 +270,7 @@ _private_forkallx(int flags) self->ul_cursig = 0; self->ul_siginfo.si_signo = 0; udp->pid = _private_getpid(); + unregister_locks(); continue_fork(1); } else { continue_fork(0); diff --git a/usr/src/lib/libc/port/threads/synch.c b/usr/src/lib/libc/port/threads/synch.c index ec9fabc226..d3f0e70da3 100644 --- a/usr/src/lib/libc/port/threads/synch.c +++ b/usr/src/lib/libc/port/threads/synch.c @@ -34,11 +34,14 @@ /* * This mutex is initialized to be held by lwp#1. * It is used to block a thread that has returned from a mutex_lock() - * of a PTHREAD_PRIO_INHERIT mutex with an unrecoverable error. + * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error. */ mutex_t stall_mutex = DEFAULTMUTEX; static int shared_mutex_held(mutex_t *); +static int mutex_unlock_internal(mutex_t *, int); +static int mutex_queuelock_adaptive(mutex_t *); +static void mutex_wakeup_all(mutex_t *); /* * Lock statistics support functions. @@ -102,16 +105,19 @@ int thread_queue_spin = 1000; */ #define mutex_spinners mutex_ownerpid -void -_mutex_set_typeattr(mutex_t *mp, int attr) -{ - mp->mutex_type |= (uint8_t)attr; -} +#define ALL_ATTRIBUTES \ + (LOCK_RECURSIVE | LOCK_ERRORCHECK | \ + LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT | \ + LOCK_ROBUST) /* - * 'type' can be one of USYNC_THREAD or USYNC_PROCESS, possibly - * augmented by the flags LOCK_RECURSIVE and/or LOCK_ERRORCHECK, - * or it can be USYNC_PROCESS_ROBUST with no extra flags. + * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST, + * augmented by zero or more the flags: + * LOCK_RECURSIVE + * LOCK_ERRORCHECK + * LOCK_PRIO_INHERIT + * LOCK_PRIO_PROTECT + * LOCK_ROBUST */ #pragma weak _private_mutex_init = __mutex_init #pragma weak mutex_init = __mutex_init @@ -120,28 +126,62 @@ _mutex_set_typeattr(mutex_t *mp, int attr) int __mutex_init(mutex_t *mp, int type, void *arg) { - int error; + int basetype = (type & ~ALL_ATTRIBUTES); + int error = 0; + + if (basetype == USYNC_PROCESS_ROBUST) { + /* + * USYNC_PROCESS_ROBUST is a deprecated historical type. + * We change it into (USYNC_PROCESS | LOCK_ROBUST) but + * retain the USYNC_PROCESS_ROBUST flag so we can return + * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST + * mutexes will ever draw ELOCKUNMAPPED). + */ + type |= (USYNC_PROCESS | LOCK_ROBUST); + basetype = USYNC_PROCESS; + } - switch (type & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) { - case USYNC_THREAD: - case USYNC_PROCESS: + if (!(basetype == USYNC_THREAD || basetype == USYNC_PROCESS) || + (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) + == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) { + error = EINVAL; + } else if (type & LOCK_ROBUST) { + /* + * Callers of mutex_init() with the LOCK_ROBUST attribute + * are required to pass an initially all-zero mutex. + * Multiple calls to mutex_init() are allowed; all but + * the first return EBUSY. A call to mutex_init() is + * allowed to make an inconsistent robust lock consistent + * (for historical usage, even though the proper interface + * for this is mutex_consistent()). Note that we use + * atomic_or_16() to set the LOCK_INITED flag so as + * not to disturb surrounding bits (LOCK_OWNERDEAD, etc). + */ + extern void _atomic_or_16(volatile uint16_t *, uint16_t); + if (!(mp->mutex_flag & LOCK_INITED)) { + mp->mutex_type = (uint8_t)type; + _atomic_or_16(&mp->mutex_flag, LOCK_INITED); + mp->mutex_magic = MUTEX_MAGIC; + } else if (type != mp->mutex_type || + ((type & LOCK_PRIO_PROTECT) && + mp->mutex_ceiling != (*(int *)arg))) { + error = EINVAL; + } else if (__mutex_consistent(mp) != 0) { + error = EBUSY; + } + /* register a process robust mutex with the kernel */ + if (basetype == USYNC_PROCESS) + register_lock(mp); + } else { (void) _memset(mp, 0, sizeof (*mp)); mp->mutex_type = (uint8_t)type; mp->mutex_flag = LOCK_INITED; - error = 0; - break; - case USYNC_PROCESS_ROBUST: - if (type & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) - error = EINVAL; - else - error = ___lwp_mutex_init(mp, type); - break; - default: - error = EINVAL; - break; - } - if (error == 0) mp->mutex_magic = MUTEX_MAGIC; + } + + if (error == 0 && (type & LOCK_PRIO_PROTECT)) + mp->mutex_ceiling = (uint8_t)(*(int *)arg); + return (error); } @@ -293,7 +333,7 @@ spin_lock_clear(mutex_t *mp) mp->mutex_owner = 0; if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { - (void) ___lwp_mutex_wakeup(mp); + (void) ___lwp_mutex_wakeup(mp, 0); if (self->ul_spin_lock_wakeup != UINT_MAX) self->ul_spin_lock_wakeup++; } @@ -308,6 +348,7 @@ queue_alloc(void) { ulwp_t *self = curthread; uberdata_t *udp = self->ul_uberdata; + mutex_t *mp; void *data; int i; @@ -321,8 +362,11 @@ queue_alloc(void) == MAP_FAILED) thr_panic("cannot allocate thread queue_head table"); udp->queue_head = (queue_head_t *)data; - for (i = 0; i < 2 * QHASHSIZE; i++) - udp->queue_head[i].qh_lock.mutex_magic = MUTEX_MAGIC; + for (i = 0; i < 2 * QHASHSIZE; i++) { + mp = &udp->queue_head[i].qh_lock; + mp->mutex_flag = LOCK_INITED; + mp->mutex_magic = MUTEX_MAGIC; + } } #if defined(THREAD_DEBUG) @@ -688,12 +732,14 @@ unsleep_self(void) * Common code for calling the the ___lwp_mutex_timedlock() system call. * Returns with mutex_owner and mutex_ownerpid set correctly. */ -int +static int mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) { ulwp_t *self = curthread; uberdata_t *udp = self->ul_uberdata; + int mtype = mp->mutex_type; hrtime_t begin_sleep; + int acquired; int error; self->ul_sp = stkptr(); @@ -711,13 +757,17 @@ mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) DTRACE_PROBE1(plockstat, mutex__block, mp); for (;;) { - if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0) { - DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); - DTRACE_PROBE2(plockstat, mutex__error, mp, error); + /* + * A return value of EOWNERDEAD or ELOCKUNMAPPED + * means we successfully acquired the lock. + */ + if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0 && + error != EOWNERDEAD && error != ELOCKUNMAPPED) { + acquired = 0; break; } - if (mp->mutex_type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) { + if (mtype & USYNC_PROCESS) { /* * Defend against forkall(). We may be the child, * in which case we don't actually own the mutex. @@ -726,16 +776,13 @@ mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) if (mp->mutex_ownerpid == udp->pid) { mp->mutex_owner = (uintptr_t)self; exit_critical(self); - DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); - DTRACE_PROBE3(plockstat, mutex__acquire, mp, - 0, 0); + acquired = 1; break; } exit_critical(self); } else { mp->mutex_owner = (uintptr_t)self; - DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); - DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + acquired = 1; break; } } @@ -744,6 +791,14 @@ mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) self->ul_wchan = NULL; self->ul_sp = 0; + if (acquired) { + DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + } else { + DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); + DTRACE_PROBE2(plockstat, mutex__error, mp, error); + } + return (error); } @@ -756,18 +811,22 @@ mutex_trylock_kernel(mutex_t *mp) { ulwp_t *self = curthread; uberdata_t *udp = self->ul_uberdata; + int mtype = mp->mutex_type; int error; + int acquired; for (;;) { - if ((error = ___lwp_mutex_trylock(mp)) != 0) { - if (error != EBUSY) { - DTRACE_PROBE2(plockstat, mutex__error, mp, - error); - } + /* + * A return value of EOWNERDEAD or ELOCKUNMAPPED + * means we successfully acquired the lock. + */ + if ((error = ___lwp_mutex_trylock(mp)) != 0 && + error != EOWNERDEAD && error != ELOCKUNMAPPED) { + acquired = 0; break; } - if (mp->mutex_type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) { + if (mtype & USYNC_PROCESS) { /* * Defend against forkall(). We may be the child, * in which case we don't actually own the mutex. @@ -776,18 +835,23 @@ mutex_trylock_kernel(mutex_t *mp) if (mp->mutex_ownerpid == udp->pid) { mp->mutex_owner = (uintptr_t)self; exit_critical(self); - DTRACE_PROBE3(plockstat, mutex__acquire, mp, - 0, 0); + acquired = 1; break; } exit_critical(self); } else { mp->mutex_owner = (uintptr_t)self; - DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + acquired = 1; break; } } + if (acquired) { + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + } else if (error != EBUSY) { + DTRACE_PROBE2(plockstat, mutex__error, mp, error); + } + return (error); } @@ -932,31 +996,42 @@ preempt_unpark(ulwp_t *self, lwpid_t lwpid) } /* - * Spin for a while, trying to grab the lock. We know that we - * failed set_lock_byte(&mp->mutex_lockw) once before coming here. + * Spin for a while, trying to grab the lock. * If this fails, return EBUSY and let the caller deal with it. * If this succeeds, return 0 with mutex_owner set to curthread. */ -int +static int mutex_trylock_adaptive(mutex_t *mp) { ulwp_t *self = curthread; + int error = EBUSY; ulwp_t *ulwp; volatile sc_shared_t *scp; volatile uint8_t *lockp; volatile uint64_t *ownerp; - int count, max = self->ul_adaptive_spin; + int count; + int max; - ASSERT(!(mp->mutex_type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST))); + ASSERT(!(mp->mutex_type & USYNC_PROCESS)); - if (max == 0 || (mp->mutex_spinners >= self->ul_max_spinners)) + if (MUTEX_OWNER(mp) == self) return (EBUSY); - lockp = (volatile uint8_t *)&mp->mutex_lockw; - ownerp = (volatile uint64_t *)&mp->mutex_owner; + /* short-cut, not definitive (see below) */ + if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { + ASSERT(mp->mutex_type & LOCK_ROBUST); + DTRACE_PROBE2(plockstat, mutex__error, mp, ENOTRECOVERABLE); + return (ENOTRECOVERABLE); + } + + if ((max = self->ul_adaptive_spin) == 0 || + mp->mutex_spinners >= self->ul_max_spinners) + max = 1; /* try at least once */ DTRACE_PROBE1(plockstat, mutex__spin, mp); + lockp = (volatile uint8_t *)&mp->mutex_lockw; + ownerp = (volatile uint64_t *)&mp->mutex_owner; /* * This spin loop is unfair to lwps that have already dropped into * the kernel to sleep. They will starve on a highly-contended mutex. @@ -968,14 +1043,11 @@ mutex_trylock_adaptive(mutex_t *mp) */ enter_critical(self); /* protects ul_schedctl */ atomic_inc_32(&mp->mutex_spinners); - for (count = 0; count < max; count++) { + for (count = 1; count <= max; count++) { if (*lockp == 0 && set_lock_byte(lockp) == 0) { *ownerp = (uintptr_t)self; - atomic_dec_32(&mp->mutex_spinners); - exit_critical(self); - DTRACE_PROBE2(plockstat, mutex__spun, 1, count); - DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); - return (0); + error = 0; + break; } SMT_PAUSE(); /* @@ -1004,16 +1076,39 @@ mutex_trylock_adaptive(mutex_t *mp) atomic_dec_32(&mp->mutex_spinners); exit_critical(self); - DTRACE_PROBE2(plockstat, mutex__spun, 0, count); + if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { + ASSERT(mp->mutex_type & LOCK_ROBUST); + /* + * We shouldn't own the mutex; clear the lock. + */ + mp->mutex_owner = 0; + if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) + mutex_wakeup_all(mp); + error = ENOTRECOVERABLE; + } - return (EBUSY); + if (error) { + DTRACE_PROBE2(plockstat, mutex__spun, 0, count); + if (error != EBUSY) { + DTRACE_PROBE2(plockstat, mutex__error, mp, error); + } + } else { + DTRACE_PROBE2(plockstat, mutex__spun, 1, count); + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); + if (mp->mutex_flag & LOCK_OWNERDEAD) { + ASSERT(mp->mutex_type & LOCK_ROBUST); + error = EOWNERDEAD; + } + } + + return (error); } /* * Same as mutex_trylock_adaptive(), except specifically for queue locks. * The owner field is not set here; the caller (spin_lock_set()) sets it. */ -int +static int mutex_queuelock_adaptive(mutex_t *mp) { ulwp_t *ulwp; @@ -1044,71 +1139,93 @@ mutex_queuelock_adaptive(mutex_t *mp) /* * Like mutex_trylock_adaptive(), but for process-shared mutexes. - * Spin for a while, trying to grab the lock. We know that we - * failed set_lock_byte(&mp->mutex_lockw) once before coming here. + * Spin for a while, trying to grab the lock. * If this fails, return EBUSY and let the caller deal with it. * If this succeeds, return 0 with mutex_owner set to curthread * and mutex_ownerpid set to the current pid. */ -int +static int mutex_trylock_process(mutex_t *mp) { ulwp_t *self = curthread; - uberdata_t *udp = self->ul_uberdata; - int count; + int error = EBUSY; volatile uint8_t *lockp; - volatile uint64_t *ownerp; - volatile int32_t *pidp; - pid_t pid, newpid; - uint64_t owner, newowner; - - if ((count = ncpus) == 0) - count = ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); - count = (count > 1)? self->ul_adaptive_spin : 0; + int count; + int max; - ASSERT((mp->mutex_type & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == - USYNC_PROCESS); + ASSERT(mp->mutex_type & USYNC_PROCESS); - if (count == 0) + if (shared_mutex_held(mp)) return (EBUSY); + /* short-cut, not definitive (see below) */ + if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { + ASSERT(mp->mutex_type & LOCK_ROBUST); + DTRACE_PROBE2(plockstat, mutex__error, mp, ENOTRECOVERABLE); + return (ENOTRECOVERABLE); + } + + if (ncpus == 0) + ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); + max = (ncpus > 1)? self->ul_adaptive_spin : 1; + if (max == 0) + max = 1; /* try at least once */ + + DTRACE_PROBE1(plockstat, mutex__spin, mp); + lockp = (volatile uint8_t *)&mp->mutex_lockw; - ownerp = (volatile uint64_t *)&mp->mutex_owner; - pidp = (volatile int32_t *)&mp->mutex_ownerpid; - owner = *ownerp; - pid = *pidp; /* * This is a process-shared mutex. * We cannot know if the owner is running on a processor. * We just spin and hope that it is on a processor. */ - while (--count >= 0) { - if (*lockp == 0) { - enter_critical(self); - if (set_lock_byte(lockp) == 0) { - *ownerp = (uintptr_t)self; - *pidp = udp->pid; - exit_critical(self); - DTRACE_PROBE3(plockstat, mutex__acquire, mp, - 0, 0); - return (0); - } - exit_critical(self); - } else if ((newowner = *ownerp) == owner && - (newpid = *pidp) == pid) { - SMT_PAUSE(); - continue; + enter_critical(self); + for (count = 1; count <= max; count++) { + if (*lockp == 0 && set_lock_byte(lockp) == 0) { + mp->mutex_owner = (uintptr_t)self; + mp->mutex_ownerpid = self->ul_uberdata->pid; + error = 0; + break; } + SMT_PAUSE(); + } + exit_critical(self); + + if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { + ASSERT(mp->mutex_type & LOCK_ROBUST); /* - * The owner of the lock changed; start the count over again. - * This may be too aggressive; it needs testing. + * We shouldn't own the mutex; clear the lock. */ - owner = newowner; - pid = newpid; - count = self->ul_adaptive_spin; + mp->mutex_owner = 0; + mp->mutex_ownerpid = 0; + if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { + no_preempt(self); + (void) ___lwp_mutex_wakeup(mp, 1); + preempt(self); + } + error = ENOTRECOVERABLE; } - return (EBUSY); + if (error) { + DTRACE_PROBE2(plockstat, mutex__spun, 0, count); + if (error != EBUSY) { + DTRACE_PROBE2(plockstat, mutex__error, mp, error); + } + } else { + DTRACE_PROBE2(plockstat, mutex__spun, 1, count); + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); + if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { + ASSERT(mp->mutex_type & LOCK_ROBUST); + if (mp->mutex_flag & LOCK_OWNERDEAD) + error = EOWNERDEAD; + else if (mp->mutex_type & USYNC_PROCESS_ROBUST) + error = ELOCKUNMAPPED; + else + error = EOWNERDEAD; + } + } + + return (error); } /* @@ -1117,7 +1234,7 @@ mutex_trylock_process(mutex_t *mp) * The caller of mutex_wakeup() must call __lwp_unpark(lwpid) * to wake up the specified lwp. */ -lwpid_t +static lwpid_t mutex_wakeup(mutex_t *mp) { lwpid_t lwpid = 0; @@ -1140,11 +1257,73 @@ mutex_wakeup(mutex_t *mp) } /* + * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex. + */ +static void +mutex_wakeup_all(mutex_t *mp) +{ + queue_head_t *qp; + int nlwpid = 0; + int maxlwps = MAXLWPS; + ulwp_t **ulwpp; + ulwp_t *ulwp; + ulwp_t *prev = NULL; + lwpid_t buffer[MAXLWPS]; + lwpid_t *lwpid = buffer; + + /* + * Walk the list of waiters and prepare to wake up all of them. + * The waiters flag has already been cleared from the mutex. + * + * We keep track of lwpids that are to be unparked in lwpid[]. + * __lwp_unpark_all() is called to unpark all of them after + * they have been removed from the sleep queue and the sleep + * queue lock has been dropped. If we run out of space in our + * on-stack buffer, we need to allocate more but we can't call + * lmalloc() because we are holding a queue lock when the overflow + * occurs and lmalloc() acquires a lock. We can't use alloca() + * either because the application may have allocated a small + * stack and we don't want to overrun the stack. So we call + * alloc_lwpids() to allocate a bigger buffer using the mmap() + * system call directly since that path acquires no locks. + */ + qp = queue_lock(mp, MX); + ulwpp = &qp->qh_head; + while ((ulwp = *ulwpp) != NULL) { + if (ulwp->ul_wchan != mp) { + prev = ulwp; + ulwpp = &ulwp->ul_link; + } else { + if (nlwpid == maxlwps) + lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); + (void) queue_unlink(qp, ulwpp, prev); + lwpid[nlwpid++] = ulwp->ul_lwpid; + } + } + mp->mutex_waiters = 0; + + if (nlwpid == 0) { + queue_unlock(qp); + } else { + no_preempt(curthread); + queue_unlock(qp); + if (nlwpid == 1) + (void) __lwp_unpark(lwpid[0]); + else + (void) __lwp_unpark_all(lwpid, nlwpid); + preempt(curthread); + } + + if (lwpid != buffer) + (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); +} + +/* * Spin for a while, testing to see if the lock has been grabbed. * If this fails, call mutex_wakeup() to release a waiter. */ -lwpid_t -mutex_unlock_queue(mutex_t *mp) +static lwpid_t +mutex_unlock_queue(mutex_t *mp, int release_all) { ulwp_t *self = curthread; uint32_t *lockw = &mp->mutex_lockword; @@ -1168,13 +1347,12 @@ mutex_unlock_queue(mutex_t *mp) * any of the adaptive code because the waiter bit has been cleared * and the adaptive code is unreliable in this case. */ - if (!(*lockw & WAITERMASK)) { /* no waiter exists right now */ + if (release_all || !(*lockw & WAITERMASK)) { mp->mutex_owner = 0; DTRACE_PROBE2(plockstat, mutex__release, mp, 0); if (!(atomic_swap_32(lockw, 0) & WAITERMASK)) - return (0); /* still no waiters */ + return (0); /* no waiters */ no_preempt(self); /* ensure a prompt wakeup */ - lwpid = mutex_wakeup(mp); } else { no_preempt(self); /* ensure a prompt wakeup */ lockp = (volatile uint8_t *)&mp->mutex_lockw; @@ -1217,9 +1395,14 @@ mutex_unlock_queue(mutex_t *mp) * Wake up some lwp that is waiting for it. */ mp->mutex_waiters = 0; - lwpid = mutex_wakeup(mp); } + if (release_all) { + mutex_wakeup_all(mp); + lwpid = 0; + } else { + lwpid = mutex_wakeup(mp); + } if (lwpid == 0) preempt(self); return (lwpid); @@ -1229,8 +1412,8 @@ mutex_unlock_queue(mutex_t *mp) * Like mutex_unlock_queue(), but for process-shared mutexes. * We tested the waiters field before calling here and it was non-zero. */ -void -mutex_unlock_process(mutex_t *mp) +static void +mutex_unlock_process(mutex_t *mp, int release_all) { ulwp_t *self = curthread; int count; @@ -1239,14 +1422,14 @@ mutex_unlock_process(mutex_t *mp) /* * See the comments in mutex_unlock_queue(), above. */ - if ((count = ncpus) == 0) - count = ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); - count = (count > 1)? self->ul_release_spin : 0; + if (ncpus == 0) + ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); + count = (ncpus > 1)? self->ul_release_spin : 0; no_preempt(self); mp->mutex_owner = 0; mp->mutex_ownerpid = 0; DTRACE_PROBE2(plockstat, mutex__release, mp, 0); - if (count == 0) { + if (release_all || count == 0) { /* clear lock, test waiter */ if (!(atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK)) { /* no waiters now */ @@ -1271,7 +1454,7 @@ mutex_unlock_process(mutex_t *mp) */ mp->mutex_waiters = 0; } - (void) ___lwp_mutex_wakeup(mp); + (void) ___lwp_mutex_wakeup(mp, release_all); preempt(self); } @@ -1296,7 +1479,7 @@ stall(void) /* * Acquire a USYNC_THREAD mutex via user-level sleep queues. * We failed set_lock_byte(&mp->mutex_lockw) before coming here. - * Returns with mutex_owner set correctly. + * If successful, returns with mutex_owner set correctly. */ int mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, @@ -1333,8 +1516,6 @@ mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, for (;;) { if (set_lock_byte(&mp->mutex_lockw) == 0) { mp->mutex_owner = (uintptr_t)self; - DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); - DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); mp->mutex_waiters = dequeue_self(qp, mp); break; } @@ -1357,17 +1538,10 @@ mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, */ qp = queue_lock(mp, MX); if (self->ul_sleepq == NULL) { - if (error) { - DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); - DTRACE_PROBE2(plockstat, mutex__error, mp, - error); + if (error) break; - } if (set_lock_byte(&mp->mutex_lockw) == 0) { mp->mutex_owner = (uintptr_t)self; - DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); - DTRACE_PROBE3(plockstat, mutex__acquire, mp, - 0, 0); break; } enqueue(qp, self, mp, MX); @@ -1378,28 +1552,164 @@ mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, self->ul_wchan == mp); if (error) { mp->mutex_waiters = dequeue_self(qp, mp); - DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); - DTRACE_PROBE2(plockstat, mutex__error, mp, error); break; } } - ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && self->ul_wchan == NULL); self->ul_sp = 0; - queue_unlock(qp); + if (msp) msp->mutex_sleep_time += gethrtime() - begin_sleep; ASSERT(error == 0 || error == EINVAL || error == ETIME); + + if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { + ASSERT(mp->mutex_type & LOCK_ROBUST); + /* + * We shouldn't own the mutex; clear the lock. + */ + mp->mutex_owner = 0; + if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) + mutex_wakeup_all(mp); + error = ENOTRECOVERABLE; + } + + if (error) { + DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); + DTRACE_PROBE2(plockstat, mutex__error, mp, error); + } else { + DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + if (mp->mutex_flag & LOCK_OWNERDEAD) { + ASSERT(mp->mutex_type & LOCK_ROBUST); + error = EOWNERDEAD; + } + } + return (error); } +static int +mutex_recursion(mutex_t *mp, int mtype, int try) +{ + ASSERT(mutex_is_held(mp)); + ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)); + ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); + + if (mtype & LOCK_RECURSIVE) { + if (mp->mutex_rcount == RECURSION_MAX) { + DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN); + return (EAGAIN); + } + mp->mutex_rcount++; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0); + return (0); + } + if (try == MUTEX_LOCK) { + DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); + return (EDEADLK); + } + return (EBUSY); +} + +/* + * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so + * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary. + * We use tdb_hash_lock here and in the synch object tracking code in + * the tdb_agent.c file. There is no conflict between these two usages. + */ +void +register_lock(mutex_t *mp) +{ + uberdata_t *udp = curthread->ul_uberdata; + uint_t hash = LOCK_HASH(mp); + robust_t *rlp; + robust_t **rlpp; + robust_t **table; + + if ((table = udp->robustlocks) == NULL) { + lmutex_lock(&udp->tdb_hash_lock); + if ((table = udp->robustlocks) == NULL) { + table = lmalloc(LOCKHASHSZ * sizeof (robust_t *)); + _membar_producer(); + udp->robustlocks = table; + } + lmutex_unlock(&udp->tdb_hash_lock); + } + _membar_consumer(); + + /* + * First search the registered table with no locks held. + * This is safe because the table never shrinks + * and we can only get a false negative. + */ + for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) { + if (rlp->robust_lock == mp) /* already registered */ + return; + } + + /* + * The lock was not found. + * Repeat the operation with tdb_hash_lock held. + */ + lmutex_lock(&udp->tdb_hash_lock); + + for (rlpp = &table[hash]; + (rlp = *rlpp) != NULL; + rlpp = &rlp->robust_next) { + if (rlp->robust_lock == mp) { /* already registered */ + lmutex_unlock(&udp->tdb_hash_lock); + return; + } + } + + /* + * The lock has never been registered. + * Register it now and add it to the table. + */ + (void) ___lwp_mutex_register(mp); + rlp = lmalloc(sizeof (*rlp)); + rlp->robust_lock = mp; + _membar_producer(); + *rlpp = rlp; + + lmutex_unlock(&udp->tdb_hash_lock); +} + +/* + * This is called in the child of fork()/forkall() to start over + * with a clean slate. (Each process must register its own locks.) + * No locks are needed because all other threads are suspended or gone. + */ +void +unregister_locks(void) +{ + uberdata_t *udp = curthread->ul_uberdata; + uint_t hash; + robust_t **table; + robust_t *rlp; + robust_t *next; + + if ((table = udp->robustlocks) != NULL) { + for (hash = 0; hash < LOCKHASHSZ; hash++) { + rlp = table[hash]; + while (rlp != NULL) { + next = rlp->robust_next; + lfree(rlp, sizeof (*rlp)); + rlp = next; + } + } + lfree(table, LOCKHASHSZ * sizeof (robust_t *)); + udp->robustlocks = NULL; + } +} + /* * Returns with mutex_owner set correctly. */ -int +static int mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) { ulwp_t *self = curthread; @@ -1407,6 +1717,8 @@ mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) int mtype = mp->mutex_type; tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); int error = 0; + uint8_t ceil; + int myprio; ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); @@ -1416,184 +1728,86 @@ mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) if (msp && try == MUTEX_TRY) tdb_incr(msp->mutex_try); - if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_is_held(mp)) { - if (mtype & LOCK_RECURSIVE) { - if (mp->mutex_rcount == RECURSION_MAX) { - error = EAGAIN; - } else { - mp->mutex_rcount++; - DTRACE_PROBE3(plockstat, mutex__acquire, mp, - 1, 0); - return (0); - } - } else if (try == MUTEX_TRY) { - return (EBUSY); - } else { - DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); - return (EDEADLK); - } - } + if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_is_held(mp)) + return (mutex_recursion(mp, mtype, try)); if (self->ul_error_detection && try == MUTEX_LOCK && tsp == NULL && mutex_is_held(mp)) lock_error(mp, "mutex_lock", NULL, NULL); - if (mtype & - (USYNC_PROCESS_ROBUST|PTHREAD_PRIO_INHERIT|PTHREAD_PRIO_PROTECT)) { - uint8_t ceil; - int myprio; - - if (mtype & PTHREAD_PRIO_PROTECT) { - ceil = mp->mutex_ceiling; - ASSERT(_validate_rt_prio(SCHED_FIFO, ceil) == 0); - myprio = real_priority(self); - if (myprio > ceil) { - DTRACE_PROBE2(plockstat, mutex__error, mp, - EINVAL); - return (EINVAL); - } - if ((error = _ceil_mylist_add(mp)) != 0) { - DTRACE_PROBE2(plockstat, mutex__error, mp, - error); - return (error); - } - if (myprio < ceil) - _ceil_prio_inherit(ceil); + if (mtype & LOCK_PRIO_PROTECT) { + ceil = mp->mutex_ceiling; + ASSERT(_validate_rt_prio(SCHED_FIFO, ceil) == 0); + myprio = real_priority(self); + if (myprio > ceil) { + DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL); + return (EINVAL); } - - if (mtype & PTHREAD_PRIO_INHERIT) { - /* go straight to the kernel */ - if (try == MUTEX_TRY) - error = mutex_trylock_kernel(mp); - else /* MUTEX_LOCK */ - error = mutex_lock_kernel(mp, tsp, msp); - /* - * The kernel never sets or clears the lock byte - * for PTHREAD_PRIO_INHERIT mutexes. - * Set it here for debugging consistency. - */ - switch (error) { - case 0: - case EOWNERDEAD: - mp->mutex_lockw = LOCKSET; - break; - } - } else if (mtype & USYNC_PROCESS_ROBUST) { - /* go straight to the kernel */ - if (try == MUTEX_TRY) - error = mutex_trylock_kernel(mp); - else /* MUTEX_LOCK */ - error = mutex_lock_kernel(mp, tsp, msp); - } else { /* PTHREAD_PRIO_PROTECT */ - /* - * Try once at user level before going to the kernel. - * If this is a process shared mutex then protect - * against forkall() while setting mp->mutex_ownerpid. - */ - if (mtype & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) { - enter_critical(self); - if (set_lock_byte(&mp->mutex_lockw) == 0) { - mp->mutex_owner = (uintptr_t)self; - mp->mutex_ownerpid = udp->pid; - exit_critical(self); - DTRACE_PROBE3(plockstat, - mutex__acquire, mp, 0, 0); - } else { - exit_critical(self); - error = EBUSY; - } - } else { - if (set_lock_byte(&mp->mutex_lockw) == 0) { - mp->mutex_owner = (uintptr_t)self; - DTRACE_PROBE3(plockstat, - mutex__acquire, mp, 0, 0); - } else { - error = EBUSY; - } - } - if (error && try == MUTEX_LOCK) - error = mutex_lock_kernel(mp, tsp, msp); + if ((error = _ceil_mylist_add(mp)) != 0) { + DTRACE_PROBE2(plockstat, mutex__error, mp, error); + return (error); } + if (myprio < ceil) + _ceil_prio_inherit(ceil); + } - if (error) { - if (mtype & PTHREAD_PRIO_INHERIT) { - switch (error) { - case EOWNERDEAD: - case ENOTRECOVERABLE: - if (mtype & PTHREAD_MUTEX_ROBUST_NP) - break; - if (error == EOWNERDEAD) { - /* - * We own the mutex; unlock it. - * It becomes ENOTRECOVERABLE. - * All waiters are waked up. - */ - mp->mutex_owner = 0; - mp->mutex_ownerpid = 0; - DTRACE_PROBE2(plockstat, - mutex__release, mp, 0); - mp->mutex_lockw = LOCKCLEAR; - (void) ___lwp_mutex_unlock(mp); - } - /* FALLTHROUGH */ - case EDEADLK: - if (try == MUTEX_LOCK) - stall(); - error = EBUSY; - break; - } - } - if ((mtype & PTHREAD_PRIO_PROTECT) && - error != EOWNERDEAD) { - (void) _ceil_mylist_del(mp); - if (myprio < ceil) - _ceil_prio_waive(); - } - } - } else if (mtype & USYNC_PROCESS) { + if ((mtype & (USYNC_PROCESS | LOCK_ROBUST)) + == (USYNC_PROCESS | LOCK_ROBUST)) + register_lock(mp); + + if (mtype & LOCK_PRIO_INHERIT) { + /* go straight to the kernel */ + if (try == MUTEX_TRY) + error = mutex_trylock_kernel(mp); + else /* MUTEX_LOCK */ + error = mutex_lock_kernel(mp, tsp, msp); /* - * This is a process shared mutex. Protect against - * forkall() while setting mp->mutex_ownerpid. + * The kernel never sets or clears the lock byte + * for LOCK_PRIO_INHERIT mutexes. + * Set it here for consistency. */ - enter_critical(self); - if (set_lock_byte(&mp->mutex_lockw) == 0) { - mp->mutex_owner = (uintptr_t)self; - mp->mutex_ownerpid = udp->pid; - exit_critical(self); - DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); - } else { - /* try a little harder */ - exit_critical(self); - error = mutex_trylock_process(mp); + switch (error) { + case 0: + mp->mutex_lockw = LOCKSET; + break; + case EOWNERDEAD: + case ELOCKUNMAPPED: + mp->mutex_lockw = LOCKSET; + /* FALLTHROUGH */ + case ENOTRECOVERABLE: + ASSERT(mtype & LOCK_ROBUST); + break; + case EDEADLK: + if (try == MUTEX_LOCK) + stall(); + error = EBUSY; + break; } - if (error && try == MUTEX_LOCK) + } else if (mtype & USYNC_PROCESS) { + error = mutex_trylock_process(mp); + if (error == EBUSY && try == MUTEX_LOCK) error = mutex_lock_kernel(mp, tsp, msp); } else { /* USYNC_THREAD */ - /* try once */ - if (set_lock_byte(&mp->mutex_lockw) == 0) { - mp->mutex_owner = (uintptr_t)self; - DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); - } else { - /* try a little harder if we don't own the mutex */ - error = EBUSY; - if (MUTEX_OWNER(mp) != self) - error = mutex_trylock_adaptive(mp); - if (error && try == MUTEX_LOCK) /* go park */ - error = mutex_lock_queue(self, msp, mp, tsp); - } + error = mutex_trylock_adaptive(mp); + if (error == EBUSY && try == MUTEX_LOCK) + error = mutex_lock_queue(self, msp, mp, tsp); } switch (error) { + case 0: case EOWNERDEAD: case ELOCKUNMAPPED: - mp->mutex_owner = (uintptr_t)self; - DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); - /* FALLTHROUGH */ - case 0: + if (mtype & LOCK_ROBUST) + remember_lock(mp); if (msp) record_begin_hold(msp); break; default: + if (mtype & LOCK_PRIO_PROTECT) { + (void) _ceil_mylist_del(mp); + if (myprio < ceil) + _ceil_prio_waive(); + } if (try == MUTEX_TRY) { if (msp) tdb_incr(msp->mutex_try_fail); @@ -1619,6 +1833,7 @@ fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try) * zero, one, or both of the flags LOCK_RECURSIVE and * LOCK_ERRORCHECK are set, and that no other flags are set. */ + ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0); enter_critical(self); if (set_lock_byte(&mp->mutex_lockw) == 0) { mp->mutex_owner = (uintptr_t)self; @@ -1629,23 +1844,11 @@ fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try) } exit_critical(self); - if ((mtype & ~USYNC_PROCESS) && shared_mutex_held(mp)) { - if (mtype & LOCK_RECURSIVE) { - if (mp->mutex_rcount == RECURSION_MAX) - return (EAGAIN); - mp->mutex_rcount++; - DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0); - return (0); - } - if (try == MUTEX_LOCK) { - DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); - return (EDEADLK); - } - return (EBUSY); - } + if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp)) + return (mutex_recursion(mp, mtype, try)); - /* try a little harder if we don't own the mutex */ - if (!shared_mutex_held(mp) && mutex_trylock_process(mp) == 0) + /* try a little harder */ + if (mutex_trylock_process(mp) == 0) return (0); if (try == MUTEX_LOCK) @@ -1659,16 +1862,6 @@ fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try) } static int -slow_lock(ulwp_t *self, mutex_t *mp, timespec_t *tsp) -{ - int error = 0; - - if (MUTEX_OWNER(mp) == self || mutex_trylock_adaptive(mp) != 0) - error = mutex_lock_queue(self, NULL, mp, tsp); - return (error); -} - -int mutex_lock_impl(mutex_t *mp, timespec_t *tsp) { ulwp_t *self = curthread; @@ -1694,21 +1887,8 @@ mutex_lock_impl(mutex_t *mp, timespec_t *tsp) DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); return (0); } - if (mtype && MUTEX_OWNER(mp) == self) { - /* - * LOCK_RECURSIVE, LOCK_ERRORCHECK, or both. - */ - if (mtype & LOCK_RECURSIVE) { - if (mp->mutex_rcount == RECURSION_MAX) - return (EAGAIN); - mp->mutex_rcount++; - DTRACE_PROBE3(plockstat, mutex__acquire, mp, - 1, 0); - return (0); - } - DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); - return (EDEADLK); /* LOCK_ERRORCHECK */ - } + if (mtype && MUTEX_OWNER(mp) == self) + return (mutex_recursion(mp, mtype, MUTEX_LOCK)); /* * We have reached a deadlock, probably because the * process is executing non-async-signal-safe code in @@ -1736,30 +1916,18 @@ mutex_lock_impl(mutex_t *mp, timespec_t *tsp) if ((gflags = self->ul_schedctl_called) != NULL && (gflags->uf_trs_ted | (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { - if (mtype & USYNC_PROCESS) return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK)); - if (set_lock_byte(&mp->mutex_lockw) == 0) { mp->mutex_owner = (uintptr_t)self; DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); return (0); } - - if (mtype && MUTEX_OWNER(mp) == self) { - if (mtype & LOCK_RECURSIVE) { - if (mp->mutex_rcount == RECURSION_MAX) - return (EAGAIN); - mp->mutex_rcount++; - DTRACE_PROBE3(plockstat, mutex__acquire, mp, - 1, 0); - return (0); - } - DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); - return (EDEADLK); /* LOCK_ERRORCHECK */ - } - - return (slow_lock(self, mp, tsp)); + if (mtype && MUTEX_OWNER(mp) == self) + return (mutex_recursion(mp, mtype, MUTEX_LOCK)); + if (mutex_trylock_adaptive(mp) != 0) + return (mutex_lock_queue(self, NULL, mp, tsp)); + return (0); } /* else do it the long way */ @@ -1808,22 +1976,6 @@ _pthread_mutex_reltimedlock_np(mutex_t *mp, const timespec_t *reltime) return (error); } -static int -slow_trylock(mutex_t *mp, ulwp_t *self) -{ - if (MUTEX_OWNER(mp) == self || - mutex_trylock_adaptive(mp) != 0) { - uberdata_t *udp = self->ul_uberdata; - - if (__td_event_report(self, TD_LOCK_TRY, udp)) { - self->ul_td_evbuf.eventnum = TD_LOCK_TRY; - tdb_event(TD_LOCK_TRY, udp); - } - return (EBUSY); - } - return (0); -} - #pragma weak _private_mutex_trylock = __mutex_trylock #pragma weak mutex_trylock = __mutex_trylock #pragma weak _mutex_trylock = __mutex_trylock @@ -1856,17 +2008,8 @@ __mutex_trylock(mutex_t *mp) DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); return (0); } - if (mtype && MUTEX_OWNER(mp) == self) { - if (mtype & LOCK_RECURSIVE) { - if (mp->mutex_rcount == RECURSION_MAX) - return (EAGAIN); - mp->mutex_rcount++; - DTRACE_PROBE3(plockstat, mutex__acquire, mp, - 1, 0); - return (0); - } - return (EDEADLK); /* LOCK_ERRORCHECK */ - } + if (mtype && MUTEX_OWNER(mp) == self) + return (mutex_recursion(mp, mtype, MUTEX_TRY)); return (EBUSY); } @@ -1878,29 +2021,23 @@ __mutex_trylock(mutex_t *mp) if ((gflags = self->ul_schedctl_called) != NULL && (gflags->uf_trs_ted | (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { - if (mtype & USYNC_PROCESS) return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY)); - if (set_lock_byte(&mp->mutex_lockw) == 0) { mp->mutex_owner = (uintptr_t)self; DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); return (0); } - - if (mtype && MUTEX_OWNER(mp) == self) { - if (mtype & LOCK_RECURSIVE) { - if (mp->mutex_rcount == RECURSION_MAX) - return (EAGAIN); - mp->mutex_rcount++; - DTRACE_PROBE3(plockstat, mutex__acquire, mp, - 1, 0); - return (0); + if (mtype && MUTEX_OWNER(mp) == self) + return (mutex_recursion(mp, mtype, MUTEX_TRY)); + if (mutex_trylock_adaptive(mp) != 0) { + if (__td_event_report(self, TD_LOCK_TRY, udp)) { + self->ul_td_evbuf.eventnum = TD_LOCK_TRY; + tdb_event(TD_LOCK_TRY, udp); } - return (EBUSY); /* LOCK_ERRORCHECK */ + return (EBUSY); } - - return (slow_trylock(mp, self)); + return (0); } /* else do it the long way */ @@ -1908,13 +2045,14 @@ __mutex_trylock(mutex_t *mp) } int -mutex_unlock_internal(mutex_t *mp) +mutex_unlock_internal(mutex_t *mp, int retain_robust_flags) { ulwp_t *self = curthread; uberdata_t *udp = self->ul_uberdata; int mtype = mp->mutex_type; tdb_mutex_stats_t *msp; - int error; + int error = 0; + int release_all; lwpid_t lwpid; if ((mtype & LOCK_ERRORCHECK) && !mutex_is_held(mp)) @@ -1932,50 +2070,49 @@ mutex_unlock_internal(mutex_t *mp) if ((msp = MUTEX_STATS(mp, udp)) != NULL) (void) record_hold_time(msp); - if (mtype & - (USYNC_PROCESS_ROBUST|PTHREAD_PRIO_INHERIT|PTHREAD_PRIO_PROTECT)) { + if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) && + (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { + ASSERT(mp->mutex_type & LOCK_ROBUST); + mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); + mp->mutex_flag |= LOCK_NOTRECOVERABLE; + } + release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); + + if (mtype & LOCK_PRIO_INHERIT) { no_preempt(self); mp->mutex_owner = 0; mp->mutex_ownerpid = 0; DTRACE_PROBE2(plockstat, mutex__release, mp, 0); - if (mtype & PTHREAD_PRIO_INHERIT) { - mp->mutex_lockw = LOCKCLEAR; - error = ___lwp_mutex_unlock(mp); - } else if (mtype & USYNC_PROCESS_ROBUST) { - error = ___lwp_mutex_unlock(mp); - } else { - if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) - (void) ___lwp_mutex_wakeup(mp); - error = 0; - } - if (mtype & PTHREAD_PRIO_PROTECT) { - if (_ceil_mylist_del(mp)) - _ceil_prio_waive(); - } + mp->mutex_lockw = LOCKCLEAR; + error = ___lwp_mutex_unlock(mp); preempt(self); } else if (mtype & USYNC_PROCESS) { - if (mp->mutex_lockword & WAITERMASK) - mutex_unlock_process(mp); - else { + if (mp->mutex_lockword & WAITERMASK) { + mutex_unlock_process(mp, release_all); + } else { mp->mutex_owner = 0; mp->mutex_ownerpid = 0; DTRACE_PROBE2(plockstat, mutex__release, mp, 0); if (atomic_swap_32(&mp->mutex_lockword, 0) & - WAITERMASK) { + WAITERMASK) { /* a waiter suddenly appeared */ no_preempt(self); - (void) ___lwp_mutex_wakeup(mp); + (void) ___lwp_mutex_wakeup(mp, release_all); preempt(self); } } - error = 0; } else { /* USYNC_THREAD */ - if ((lwpid = mutex_unlock_queue(mp)) != 0) { + if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) { (void) __lwp_unpark(lwpid); preempt(self); } - error = 0; } + if (mtype & LOCK_ROBUST) + forget_lock(mp); + + if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) + _ceil_prio_waive(); + return (error); } @@ -2046,7 +2183,7 @@ fast_unlock: (void) __lwp_unpark(lwpid); preempt(self); } - } else if ((lwpid = mutex_unlock_queue(mp)) != 0) { + } else if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { (void) __lwp_unpark(lwpid); preempt(self); } @@ -2082,16 +2219,16 @@ fast_unlock: DTRACE_PROBE2(plockstat, mutex__release, mp, 1); return (0); } - if (mp->mutex_lockword & WAITERMASK) - mutex_unlock_process(mp); - else { + if (mp->mutex_lockword & WAITERMASK) { + mutex_unlock_process(mp, 0); + } else { mp->mutex_owner = 0; mp->mutex_ownerpid = 0; DTRACE_PROBE2(plockstat, mutex__release, mp, 0); if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { no_preempt(self); - (void) ___lwp_mutex_wakeup(mp); + (void) ___lwp_mutex_wakeup(mp, 0); preempt(self); } } @@ -2101,7 +2238,7 @@ fast_unlock: /* else do it the long way */ slow_unlock: - return (mutex_unlock_internal(mp)); + return (mutex_unlock_internal(mp, 0)); } /* @@ -2176,7 +2313,7 @@ lmutex_unlock(mutex_t *mp) if (msp) (void) record_hold_time(msp); - if ((lwpid = mutex_unlock_queue(mp)) != 0) { + if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { (void) __lwp_unpark(lwpid); preempt(self); } @@ -2259,39 +2396,23 @@ static int shared_mutex_held(mutex_t *mparg) { /* - * There is an inherent data race in the current ownership design. - * The mutex_owner and mutex_ownerpid fields cannot be set or tested - * atomically as a pair. The original implementation tested each - * field just once. This was exposed to trivial false positives in - * the case of multiple multithreaded processes with thread addresses - * in common. To close the window to an acceptable level we now use a - * sequence of five tests: pid-thr-pid-thr-pid. This ensures that any - * single interruption will still leave one uninterrupted sequence of - * pid-thr-pid tests intact. - * - * It is assumed that all updates are always ordered thr-pid and that - * we have TSO hardware. + * The 'volatile' is necessary to make sure the compiler doesn't + * reorder the tests of the various components of the mutex. + * They must be tested in this order: + * mutex_lockw + * mutex_owner + * mutex_ownerpid + * This relies on the fact that everywhere mutex_lockw is cleared, + * mutex_owner and mutex_ownerpid are cleared before mutex_lockw + * is cleared, and that everywhere mutex_lockw is set, mutex_owner + * and mutex_ownerpid are set after mutex_lockw is set, and that + * mutex_lockw is set or cleared with a memory barrier. */ volatile mutex_t *mp = (volatile mutex_t *)mparg; ulwp_t *self = curthread; uberdata_t *udp = self->ul_uberdata; - if (mp->mutex_ownerpid != udp->pid) - return (0); - - if (!MUTEX_OWNED(mp, self)) - return (0); - - if (mp->mutex_ownerpid != udp->pid) - return (0); - - if (!MUTEX_OWNED(mp, self)) - return (0); - - if (mp->mutex_ownerpid != udp->pid) - return (0); - - return (1); + return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid); } /* @@ -2305,10 +2426,12 @@ shared_mutex_held(mutex_t *mparg) #pragma weak _mutex_held = mutex_is_held #pragma weak __mutex_held = mutex_is_held int -mutex_is_held(mutex_t *mp) +mutex_is_held(mutex_t *mparg) { - if (mp->mutex_type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) - return (shared_mutex_held(mp)); + volatile mutex_t *mp = (volatile mutex_t *)mparg; + + if (mparg->mutex_type & USYNC_PROCESS) + return (shared_mutex_held(mparg)); return (MUTEX_OWNED(mp, curthread)); } @@ -2320,12 +2443,35 @@ mutex_is_held(mutex_t *mp) int __mutex_destroy(mutex_t *mp) { - mp->mutex_magic = 0; - mp->mutex_flag &= ~LOCK_INITED; + if (mp->mutex_type & USYNC_PROCESS) + forget_lock(mp); + (void) _memset(mp, 0, sizeof (*mp)); tdb_sync_obj_deregister(mp); return (0); } +#pragma weak mutex_consistent = __mutex_consistent +#pragma weak _mutex_consistent = __mutex_consistent +#pragma weak pthread_mutex_consistent_np = __mutex_consistent +#pragma weak _pthread_mutex_consistent_np = __mutex_consistent +int +__mutex_consistent(mutex_t *mp) +{ + /* + * Do this only for an inconsistent, initialized robust lock + * that we hold. For all other cases, return EINVAL. + */ + if (mutex_is_held(mp) && + (mp->mutex_type & LOCK_ROBUST) && + (mp->mutex_flag & LOCK_INITED) && + (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { + mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); + mp->mutex_rcount = 0; + return (0); + } + return (EINVAL); +} + /* * Spin locks are separate from ordinary mutexes, * but we use the same data structure for them. @@ -2380,21 +2526,37 @@ _pthread_spin_trylock(pthread_spinlock_t *lock) int _pthread_spin_lock(pthread_spinlock_t *lock) { - volatile uint8_t *lockp = - (volatile uint8_t *)&((mutex_t *)lock)->mutex_lockw; + mutex_t *mp = (mutex_t *)lock; + ulwp_t *self = curthread; + volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; + int count = 0; + + ASSERT(!self->ul_critical || self->ul_bindflags); + + DTRACE_PROBE1(plockstat, mutex__spin, mp); - ASSERT(!curthread->ul_critical || curthread->ul_bindflags); /* * We don't care whether the owner is running on a processor. * We just spin because that's what this interface requires. */ for (;;) { + if (count < INT_MAX) + count++; if (*lockp == 0) { /* lock byte appears to be clear */ - if (_pthread_spin_trylock(lock) == 0) - return (0); + no_preempt(self); + if (set_lock_byte(lockp) == 0) + break; + preempt(self); } SMT_PAUSE(); } + mp->mutex_owner = (uintptr_t)self; + if (mp->mutex_type == USYNC_PROCESS) + mp->mutex_ownerpid = self->ul_uberdata->pid; + preempt(self); + DTRACE_PROBE2(plockstat, mutex__spun, 1, count); + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); + return (0); } #pragma weak pthread_spin_unlock = _pthread_spin_unlock @@ -2413,6 +2575,148 @@ _pthread_spin_unlock(pthread_spinlock_t *lock) return (0); } +#define INITIAL_LOCKS 8 /* initialial size of ul_heldlocks.array */ + +/* + * Find/allocate an entry for 'lock' in our array of held locks. + */ +static mutex_t ** +find_lock_entry(mutex_t *lock) +{ + ulwp_t *self = curthread; + mutex_t **remembered = NULL; + mutex_t **lockptr; + uint_t nlocks; + + if ((nlocks = self->ul_heldlockcnt) != 0) + lockptr = self->ul_heldlocks.array; + else { + nlocks = 1; + lockptr = &self->ul_heldlocks.single; + } + + for (; nlocks; nlocks--, lockptr++) { + if (*lockptr == lock) + return (lockptr); + if (*lockptr == NULL && remembered == NULL) + remembered = lockptr; + } + if (remembered != NULL) { + *remembered = lock; + return (remembered); + } + + /* + * No entry available. Allocate more space, converting + * the single entry into an array of entries if necessary. + */ + if ((nlocks = self->ul_heldlockcnt) == 0) { + /* + * Initial allocation of the array. + * Convert the single entry into an array. + */ + self->ul_heldlockcnt = nlocks = INITIAL_LOCKS; + lockptr = lmalloc(nlocks * sizeof (mutex_t *)); + /* + * The single entry becomes the first entry in the array. + */ + *lockptr = self->ul_heldlocks.single; + self->ul_heldlocks.array = lockptr; + /* + * Return the next available entry in the array. + */ + *++lockptr = lock; + return (lockptr); + } + /* + * Reallocate the array, double the size each time. + */ + lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *)); + (void) _memcpy(lockptr, self->ul_heldlocks.array, + nlocks * sizeof (mutex_t *)); + lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); + self->ul_heldlocks.array = lockptr; + self->ul_heldlockcnt *= 2; + /* + * Return the next available entry in the newly allocated array. + */ + *(lockptr += nlocks) = lock; + return (lockptr); +} + +/* + * Insert 'lock' into our list of held locks. + * Currently only used for LOCK_ROBUST mutexes. + */ +void +remember_lock(mutex_t *lock) +{ + (void) find_lock_entry(lock); +} + +/* + * Remove 'lock' from our list of held locks. + * Currently only used for LOCK_ROBUST mutexes. + */ +void +forget_lock(mutex_t *lock) +{ + *find_lock_entry(lock) = NULL; +} + +/* + * Free the array of held locks. + */ +void +heldlock_free(ulwp_t *ulwp) +{ + uint_t nlocks; + + if ((nlocks = ulwp->ul_heldlockcnt) != 0) + lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); + ulwp->ul_heldlockcnt = 0; + ulwp->ul_heldlocks.array = NULL; +} + +/* + * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD. + * Called from _thrp_exit() to deal with abandoned locks. + */ +void +heldlock_exit(void) +{ + ulwp_t *self = curthread; + mutex_t **lockptr; + uint_t nlocks; + mutex_t *mp; + + if ((nlocks = self->ul_heldlockcnt) != 0) + lockptr = self->ul_heldlocks.array; + else { + nlocks = 1; + lockptr = &self->ul_heldlocks.single; + } + + for (; nlocks; nlocks--, lockptr++) { + /* + * The kernel takes care of transitioning held + * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD. + * We avoid that case here. + */ + if ((mp = *lockptr) != NULL && + mutex_is_held(mp) && + (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) == + LOCK_ROBUST) { + mp->mutex_rcount = 0; + if (!(mp->mutex_flag & LOCK_UNMAPPED)) + mp->mutex_flag |= LOCK_OWNERDEAD; + (void) mutex_unlock_internal(mp, 1); + } + } + + heldlock_free(self); +} + #pragma weak cond_init = _cond_init /* ARGSUSED2 */ int @@ -2437,7 +2741,7 @@ _cond_init(cond_t *cvp, int type, void *arg) * The associated mutex is *not* reacquired before returning. * That must be done by the caller of cond_sleep_queue(). */ -int +static int cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) { ulwp_t *self = curthread; @@ -2446,6 +2750,7 @@ cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) lwpid_t lwpid; int signalled; int error; + int release_all; /* * Put ourself on the CV sleep queue, unlock the mutex, then @@ -2460,7 +2765,12 @@ cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) self->ul_cvmutex = mp; self->ul_cv_wake = (tsp != NULL); self->ul_signalled = 0; - lwpid = mutex_unlock_queue(mp); + if (mp->mutex_flag & LOCK_OWNERDEAD) { + mp->mutex_flag &= ~LOCK_OWNERDEAD; + mp->mutex_flag |= LOCK_NOTRECOVERABLE; + } + release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); + lwpid = mutex_unlock_queue(mp, release_all); for (;;) { set_parking_flag(self, 1); queue_unlock(qp); @@ -2549,6 +2859,7 @@ cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp, { ulwp_t *self = curthread; int error; + int merror; /* * The old thread library was programmed to defer signals @@ -2572,14 +2883,11 @@ cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp, /* * Reacquire the mutex. */ - if (set_lock_byte(&mp->mutex_lockw) == 0) { - mp->mutex_owner = (uintptr_t)self; - DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); - } else if (mutex_trylock_adaptive(mp) != 0) { - (void) mutex_lock_queue(self, msp, mp, NULL); - } - - if (msp) + if ((merror = mutex_trylock_adaptive(mp)) == EBUSY) + merror = mutex_lock_queue(self, msp, mp, NULL); + if (merror) + error = merror; + if (msp && (merror == 0 || merror == EOWNERDEAD)) record_begin_hold(msp); /* @@ -2595,23 +2903,21 @@ cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp, * cond_sleep_kernel(): utility function for cond_wait_kernel(). * See the comment ahead of cond_sleep_queue(), above. */ -int +static int cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) { int mtype = mp->mutex_type; ulwp_t *self = curthread; int error; - if (mtype & PTHREAD_PRIO_PROTECT) { - if (_ceil_mylist_del(mp)) - _ceil_prio_waive(); - } + if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) + _ceil_prio_waive(); self->ul_sp = stkptr(); self->ul_wchan = cvp; mp->mutex_owner = 0; mp->mutex_ownerpid = 0; - if (mtype & PTHREAD_PRIO_INHERIT) + if (mtype & LOCK_PRIO_INHERIT) mp->mutex_lockw = LOCKCLEAR; /* * ___lwp_cond_wait() returns immediately with EINTR if @@ -2721,12 +3027,12 @@ cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp) lock_error(mp, "recursive mutex in cond_wait", cvp, NULL); if (cvp->cond_type & USYNC_PROCESS) { - if (!(mtype & (USYNC_PROCESS | USYNC_PROCESS_ROBUST))) + if (!(mtype & USYNC_PROCESS)) lock_error(mp, "cond_wait", cvp, "condvar process-shared, " "mutex process-private"); } else { - if (mtype & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) + if (mtype & USYNC_PROCESS) lock_error(mp, "cond_wait", cvp, "condvar process-private, " "mutex process-shared"); @@ -2741,8 +3047,8 @@ cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp) */ rcount = mp->mutex_rcount; mp->mutex_rcount = 0; - if ((mtype & (USYNC_PROCESS | USYNC_PROCESS_ROBUST | - PTHREAD_PRIO_INHERIT | PTHREAD_PRIO_PROTECT)) | + if ((mtype & + (USYNC_PROCESS | LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) | (cvp->cond_type & USYNC_PROCESS)) error = cond_wait_kernel(cvp, mp, tsp); else @@ -3005,10 +3311,10 @@ cond_signal_internal(cond_t *cvp) * deal properly with spurious wakeups. */ *ulwpp = ulwp->ul_link; + ulwp->ul_link = NULL; if (qp->qh_tail == ulwp) qp->qh_tail = prev; qp->qh_qlen--; - ulwp->ul_link = NULL; mp = ulwp->ul_cvmutex; /* the mutex he will acquire */ ulwp->ul_cvmutex = NULL; @@ -3036,12 +3342,12 @@ cond_signal_internal(cond_t *cvp) } /* - * Utility function called from cond_broadcast() and rw_queue_release() - * to (re)allocate a big buffer to hold the lwpids of all the threads - * to be set running after they are removed from their sleep queues. - * Since we are holding a queue lock, we cannot call any function - * that might acquire a lock. mmap(), munmap() and lwp_unpark_all() - * are simple system calls and are safe in this regard. + * Utility function called by mutex_wakeup_all(), cond_broadcast(), + * and rw_queue_release() to (re)allocate a big buffer to hold the + * lwpids of all the threads to be set running after they are removed + * from their sleep queues. Since we are holding a queue lock, we + * cannot call any function that might acquire a lock. mmap(), munmap(), + * lwp_unpark_all() are simple system calls and are safe in this regard. */ lwpid_t * alloc_lwpids(lwpid_t *lwpid, int *nlwpid_ptr, int *maxlwps_ptr) @@ -3144,10 +3450,10 @@ cond_broadcast_internal(cond_t *cvp) continue; } *ulwpp = ulwp->ul_link; + ulwp->ul_link = NULL; if (qp->qh_tail == ulwp) qp->qh_tail = prev; qp->qh_qlen--; - ulwp->ul_link = NULL; mp = ulwp->ul_cvmutex; /* his mutex */ ulwp->ul_cvmutex = NULL; ASSERT(mp != NULL); diff --git a/usr/src/lib/libc/port/threads/thr.c b/usr/src/lib/libc/port/threads/thr.c index affa5297eb..6df0608e1c 100644 --- a/usr/src/lib/libc/port/threads/thr.c +++ b/usr/src/lib/libc/port/threads/thr.c @@ -117,6 +117,7 @@ uberdata_t __uberdata = { NULL, /* ulwp_replace_free */ NULL, /* ulwp_replace_last */ NULL, /* atforklist */ + NULL, /* robustlocks */ NULL, /* __tdb_bootstrap */ { /* tdb */ NULL, /* tdb_sync_addr_hash */ @@ -798,8 +799,9 @@ _thrp_exit() } lmutex_unlock(&udp->link_lock); - tsd_exit(); /* deallocate thread-specific data */ - tls_exit(); /* deallocate thread-local storage */ + tsd_exit(); /* deallocate thread-specific data */ + tls_exit(); /* deallocate thread-local storage */ + heldlock_exit(); /* deal with left-over held locks */ /* block all signals to finish exiting */ block_all_signals(self); @@ -1564,6 +1566,7 @@ finish_init() udp->hash_mask = HASHTBLSZ - 1; for (i = 0; i < HASHTBLSZ; i++, htp++) { + htp->hash_lock.mutex_flag = LOCK_INITED; htp->hash_lock.mutex_magic = MUTEX_MAGIC; htp->hash_cond.cond_magic = COND_MAGIC; } @@ -1610,6 +1613,7 @@ postfork1_child() { ulwp_t *self = curthread; uberdata_t *udp = self->ul_uberdata; + mutex_t *mp; ulwp_t *next; ulwp_t *ulwp; int i; @@ -1629,8 +1633,11 @@ postfork1_child() if (udp->queue_head) { (void) _private_memset(udp->queue_head, 0, 2 * QHASHSIZE * sizeof (queue_head_t)); - for (i = 0; i < 2 * QHASHSIZE; i++) - udp->queue_head[i].qh_lock.mutex_magic = MUTEX_MAGIC; + for (i = 0; i < 2 * QHASHSIZE; i++) { + mp = &udp->queue_head[i].qh_lock; + mp->mutex_flag = LOCK_INITED; + mp->mutex_magic = MUTEX_MAGIC; + } } /* @@ -1650,6 +1657,7 @@ postfork1_child() tsd_free(ulwp); tls_free(ulwp); rwl_free(ulwp); + heldlock_free(ulwp); ulwp_free(ulwp); } self->ul_forw = self->ul_back = udp->all_lwps = self; diff --git a/usr/src/lib/libc/sparc/sys/_lwp_mutex_unlock.s b/usr/src/lib/libc/sparc/sys/_lwp_mutex_unlock.s index 5cf848d7b7..fb2e0f2dce 100644 --- a/usr/src/lib/libc/sparc/sys/_lwp_mutex_unlock.s +++ b/usr/src/lib/libc/sparc/sys/_lwp_mutex_unlock.s @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -53,7 +53,8 @@ clr %o0 ! return 0 ! else (note that %o0 is still ! &mutex) - SYSTRAP_RVAL1(lwp_mutex_wakeup) ! call kernel to wakeup waiter + clr %o1 ! call kernel to wakeup waiter: + SYSTRAP_RVAL1(lwp_mutex_wakeup) ! lwp_mutex_wakeup(mp, 0) SYSLWPERR 2: RET SET_SIZE(_lwp_mutex_unlock) diff --git a/usr/src/lib/libc_db/common/thread_db.c b/usr/src/lib/libc_db/common/thread_db.c index 9c10183772..28b0ff3bf9 100644 --- a/usr/src/lib/libc_db/common/thread_db.c +++ b/usr/src/lib/libc_db/common/thread_db.c @@ -2513,7 +2513,8 @@ sync_get_info_common(const td_synchandle_t *sh_p, struct ps_prochandle *ph_p, &generic_so.lock, sizeof (generic_so.lock)) != PS_OK) return (TD_DBERR); si_p->si_type = TD_SYNC_MUTEX; - si_p->si_shared_type = generic_so.lock.mutex_type; + si_p->si_shared_type = + (generic_so.lock.mutex_type & USYNC_PROCESS); (void) memcpy(si_p->si_flags, &generic_so.lock.mutex_flag, sizeof (generic_so.lock.mutex_flag)); si_p->si_state.mutex_locked = @@ -2523,8 +2524,7 @@ sync_get_info_common(const td_synchandle_t *sh_p, struct ps_prochandle *ph_p, si_p->si_rcount = generic_so.lock.mutex_rcount; si_p->si_prioceiling = generic_so.lock.mutex_ceiling; if (si_p->si_state.mutex_locked) { - if (si_p->si_shared_type & - (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) + if (si_p->si_shared_type & USYNC_PROCESS) si_p->si_ownerpid = generic_so.lock.mutex_ownerpid; si_p->si_owner.th_ta_p = sh_p->sh_ta_p; @@ -2533,7 +2533,8 @@ sync_get_info_common(const td_synchandle_t *sh_p, struct ps_prochandle *ph_p, break; case COND_MAGIC: si_p->si_type = TD_SYNC_COND; - si_p->si_shared_type = generic_so.condition.cond_type; + si_p->si_shared_type = + (generic_so.condition.cond_type & USYNC_PROCESS); (void) memcpy(si_p->si_flags, generic_so.condition.flags.flag, sizeof (generic_so.condition.flags.flag)); si_p->si_size = sizeof (generic_so.condition); @@ -2547,7 +2548,8 @@ sync_get_info_common(const td_synchandle_t *sh_p, struct ps_prochandle *ph_p, != PS_OK) return (TD_DBERR); si_p->si_type = TD_SYNC_SEMA; - si_p->si_shared_type = generic_so.semaphore.type; + si_p->si_shared_type = + (generic_so.semaphore.type & USYNC_PROCESS); si_p->si_state.sem_count = generic_so.semaphore.count; si_p->si_size = sizeof (generic_so.semaphore); si_p->si_has_waiters = @@ -2563,7 +2565,8 @@ sync_get_info_common(const td_synchandle_t *sh_p, struct ps_prochandle *ph_p, &generic_so.rwlock, sizeof (generic_so.rwlock)) != PS_OK) return (TD_DBERR); si_p->si_type = TD_SYNC_RWLOCK; - si_p->si_shared_type = generic_so.rwlock.rwlock_type; + si_p->si_shared_type = + (generic_so.rwlock.rwlock_type & USYNC_PROCESS); si_p->si_size = sizeof (generic_so.rwlock); rwstate = (uint32_t)generic_so.rwlock.rwlock_readers; diff --git a/usr/src/lib/libnisdb/yptol/lock_update.c b/usr/src/lib/libnisdb/yptol/lock_update.c index 4194fb2c68..bf34972a50 100644 --- a/usr/src/lib/libnisdb/yptol/lock_update.c +++ b/usr/src/lib/libnisdb/yptol/lock_update.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,8 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -77,7 +77,7 @@ init_update_locks_mem() */ for (iiter = 0; iiter < MAXHASH; iiter++) { if (rc = mutex_init(&(shmupdatearray->updatenode[iiter]), - USYNC_PROCESS_ROBUST, 0)) { + USYNC_PROCESS | LOCK_ROBUST, 0)) { if (rc == EBUSY) { ebusy_cnt++; } else { @@ -238,18 +238,18 @@ lock_map_update(map_ctrl *map) * Previous lock owner died, resetting lock * to recover from error. */ - rc = mutex_init(&(shmupdatearray->updatenode[hashval]), - USYNC_PROCESS_ROBUST, 0); + rc = mutex_consistent( + &(shmupdatearray->updatenode[hashval])); if (rc != 0) { logmsg(MSG_NOTIMECHECK, LOG_ERR, - "mutex_init(): error=%d", rc); + "mutex_consistent(): error=%d", rc); return (FAILURE); } rc = mutex_unlock( - &(shmupdatearray->updatenode[hashval])); + &(shmupdatearray->updatenode[hashval])); if (rc != 0) { logmsg(MSG_NOTIMECHECK, LOG_ERR, - "mutex_unlock(): error=%d", rc); + "mutex_unlock(): error=%d", rc); return (FAILURE); } break; @@ -366,18 +366,18 @@ try_lock_map_update(map_ctrl *map) * Previous lock owner died, resetting lock * to recover from error. */ - rc = mutex_init(&(shmupdatearray->updatenode[hashval]), - USYNC_PROCESS_ROBUST, 0); + rc = mutex_consistent( + &(shmupdatearray->updatenode[hashval])); if (rc != 0) { logmsg(MSG_NOTIMECHECK, LOG_ERR, - "mutex_init(): error=%d", rc); + "mutex_consistent(): error=%d", rc); return (rc); } rc = mutex_unlock( - &(shmupdatearray->updatenode[hashval])); + &(shmupdatearray->updatenode[hashval])); if (rc != 0) { logmsg(MSG_NOTIMECHECK, LOG_ERR, - "mutex_unlock(): error=%d", rc); + "mutex_unlock(): error=%d", rc); return (rc); } break; diff --git a/usr/src/lib/libproc/common/proc_names.c b/usr/src/lib/libproc/common/proc_names.c index 22f7171ce3..637e5085fb 100644 --- a/usr/src/lib/libproc/common/proc_names.c +++ b/usr/src/lib/libproc/common/proc_names.c @@ -286,7 +286,7 @@ static const char *const systable[] = { "lwp_sigmask", /* 165 */ "lwp_private", /* 166 */ "lwp_wait", /* 167 */ - "lwp_mutex_unlock", /* 168 */ + "lwp_mutex_wakeup", /* 168 */ "lwp_mutex_lock", /* 169 */ "lwp_cond_wait", /* 170 */ "lwp_cond_signal", /* 171 */ @@ -370,7 +370,7 @@ static const char *const systable[] = { "ntp_adjtime", /* 249 */ "lwp_mutex_unlock", /* 250 */ "lwp_mutex_trylock", /* 251 */ - "lwp_mutex_init", /* 252 */ + "lwp_mutex_register", /* 252 */ "cladm", /* 253 */ "uucopy", /* 254 */ "umount2" /* 255 */ diff --git a/usr/src/uts/common/c2/audit_event.c b/usr/src/uts/common/c2/audit_event.c index ce4a318b91..5dfc75ec04 100644 --- a/usr/src/uts/common/c2/audit_event.c +++ b/usr/src/uts/common/c2/audit_event.c @@ -548,7 +548,7 @@ aui_null, AUE_NULL, aus_null, /* 166 (loadable) */ auf_null, 0, aui_null, AUE_NULL, aus_null, /* 167 lwp_wait */ auf_null, 0, -aui_null, AUE_NULL, aus_null, /* 168 lwp_mutex_unlock */ +aui_null, AUE_NULL, aus_null, /* 168 lwp_mutex_wakeup */ auf_null, 0, aui_null, AUE_NULL, aus_null, /* 169 lwp_mutex_lock */ auf_null, 0, @@ -717,7 +717,7 @@ aui_null, AUE_NULL, aus_null, /* 250 lwp_mutex_unlock */ auf_null, 0, aui_null, AUE_NULL, aus_null, /* 251 lwp_mutex_trylock */ auf_null, 0, -aui_null, AUE_NULL, aus_null, /* 252 lwp_mutex_init */ +aui_null, AUE_NULL, aus_null, /* 252 lwp_mutex_register */ auf_null, 0, aui_null, AUE_NULL, aus_null, /* 253 cladm */ auf_null, 0, diff --git a/usr/src/uts/common/os/sysent.c b/usr/src/uts/common/os/sysent.c index 7b767297f3..a878cee623 100644 --- a/usr/src/uts/common/os/sysent.c +++ b/usr/src/uts/common/os/sysent.c @@ -222,7 +222,7 @@ int lwp_mutex_timedlock(); int lwp_mutex_wakeup(); int lwp_mutex_unlock(); int lwp_mutex_trylock(); -int lwp_mutex_init(); +int lwp_mutex_register(); int lwp_rwlock_sys(); int lwp_sema_post(); int lwp_sema_wait(); @@ -647,7 +647,7 @@ struct sysent sysent[NSYSCALL] = SYSENT_CI("lwp_private", syslwp_private, 3), SYSENT_NOSYS()), /* 167 */ SYSENT_CI("lwp_wait", lwp_wait, 2), - /* 168 */ SYSENT_CI("lwp_mutex_wakeup", lwp_mutex_wakeup, 1), + /* 168 */ SYSENT_CI("lwp_mutex_wakeup", lwp_mutex_wakeup, 2), /* 169 */ SYSENT_CI("lwp_mutex_lock", lwp_mutex_lock, 1), /* 170 */ SYSENT_CI("lwp_cond_wait", lwp_cond_wait, 4), /* 171 */ SYSENT_CI("lwp_cond_signal", lwp_cond_signal, 1), @@ -772,7 +772,7 @@ struct sysent sysent[NSYSCALL] = /* 249 */ SYSENT_CI("ntp_adjtime", ntp_adjtime, 1), /* 250 */ SYSENT_CI("lwp_mutex_unlock", lwp_mutex_unlock, 1), /* 251 */ SYSENT_CI("lwp_mutex_trylock", lwp_mutex_trylock, 1), - /* 252 */ SYSENT_CI("lwp_mutex_init", lwp_mutex_init, 2), + /* 252 */ SYSENT_CI("lwp_mutex_register", lwp_mutex_register, 1), /* 253 */ SYSENT_CI("cladm", cladm, 3), /* 254 */ SYSENT_CI("uucopy", uucopy, 3), /* 255 */ SYSENT_CI("umount2", umount2, 2) @@ -1031,7 +1031,7 @@ struct sysent sysent32[NSYSCALL] = SYSENT_CI("lwp_private", syslwp_private, 3), SYSENT_NOSYS()), /* 167 */ SYSENT_CI("lwp_wait", lwp_wait, 2), - /* 168 */ SYSENT_CI("lwp_mutex_wakeup", lwp_mutex_wakeup, 1), + /* 168 */ SYSENT_CI("lwp_mutex_wakeup", lwp_mutex_wakeup, 2), /* 169 */ SYSENT_CI("lwp_mutex_lock", lwp_mutex_lock, 1), /* 170 */ SYSENT_CI("lwp_cond_wait", lwp_cond_wait, 4), /* 171 */ SYSENT_CI("lwp_cond_signal", lwp_cond_signal, 1), @@ -1118,7 +1118,7 @@ struct sysent sysent32[NSYSCALL] = /* 249 */ SYSENT_CI("ntp_adjtime", ntp_adjtime, 1), /* 250 */ SYSENT_CI("lwp_mutex_unlock", lwp_mutex_unlock, 1), /* 251 */ SYSENT_CI("lwp_mutex_trylock", lwp_mutex_trylock, 1), - /* 252 */ SYSENT_CI("lwp_mutex_init", lwp_mutex_init, 2), + /* 252 */ SYSENT_CI("lwp_mutex_register", lwp_mutex_register, 1), /* 253 */ SYSENT_CI("cladm", cladm, 3), /* 254 */ SYSENT_CI("uucopy", uucopy, 3), /* 255 */ SYSENT_CI("umount2", umount2, 2) diff --git a/usr/src/uts/common/sys/synch.h b/usr/src/uts/common/sys/synch.h index 9b428e6040..6431bf22bc 100644 --- a/usr/src/uts/common/sys/synch.h +++ b/usr/src/uts/common/sys/synch.h @@ -131,20 +131,21 @@ typedef struct _lwp_rwlock { #define USYNC_THREAD 0x00 /* private to a process */ #define USYNC_PROCESS 0x01 /* shared by processes */ -/* Keep the following 3 fields in sync with pthread.h */ -#define LOCK_NORMAL 0x00 /* same as USYNC_THREAD */ -#define LOCK_ERRORCHECK 0x02 /* error check lock */ -#define LOCK_RECURSIVE 0x04 /* recursive lock */ +/* Keep the following values in sync with pthread.h */ +#define LOCK_NORMAL 0x00 /* same as USYNC_THREAD */ +#define LOCK_SHARED 0x01 /* same as USYNC_PROCESS */ +#define LOCK_ERRORCHECK 0x02 /* error check lock */ +#define LOCK_RECURSIVE 0x04 /* recursive lock */ +#define LOCK_PRIO_INHERIT 0x10 /* priority inheritance lock */ +#define LOCK_PRIO_PROTECT 0x20 /* priority ceiling lock */ +#define LOCK_ROBUST 0x40 /* robust lock */ -#define USYNC_PROCESS_ROBUST 0x08 /* shared by processes robustly */ - -/* Keep the following 5 fields in sync with pthread.h */ - -#define LOCK_PRIO_NONE 0x00 -#define LOCK_PRIO_INHERIT 0x10 -#define LOCK_PRIO_PROTECT 0x20 -#define LOCK_STALL_NP 0x00 -#define LOCK_ROBUST_NP 0x40 +/* + * USYNC_PROCESS_ROBUST is a deprecated historical type. It is mapped + * into (USYNC_PROCESS | LOCK_ROBUST) by mutex_init(). Application code + * should be revised to use (USYNC_PROCESS | LOCK_ROBUST) rather than this. + */ +#define USYNC_PROCESS_ROBUST 0x08 /* * lwp_mutex_t flags diff --git a/usr/src/uts/common/sys/syscall.h b/usr/src/uts/common/sys/syscall.h index f8ad2291a5..ae7e8224da 100644 --- a/usr/src/uts/common/sys/syscall.h +++ b/usr/src/uts/common/sys/syscall.h @@ -516,7 +516,7 @@ extern "C" { #define SYS_ntp_adjtime 249 #define SYS_lwp_mutex_unlock 250 #define SYS_lwp_mutex_trylock 251 -#define SYS_lwp_mutex_init 252 +#define SYS_lwp_mutex_register 252 #define SYS_cladm 253 #define SYS_uucopy 254 #define SYS_umount2 255 diff --git a/usr/src/uts/common/syscall/lwp_sobj.c b/usr/src/uts/common/syscall/lwp_sobj.c index 65d9236a32..2f3c8e1be7 100644 --- a/usr/src/uts/common/syscall/lwp_sobj.c +++ b/usr/src/uts/common/syscall/lwp_sobj.c @@ -179,7 +179,7 @@ lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end) if (start <= addr && addr < end) { *prev = ent->lwpchan_next; if (ent->lwpchan_pool == LWPCHAN_MPPOOL && - (ent->lwpchan_type & USYNC_PROCESS_ROBUST)) + (ent->lwpchan_type & LOCK_ROBUST)) lwp_mutex_cleanup(ent, LOCK_UNMAPPED); kmem_free(ent, sizeof (*ent)); atomic_add_32(&lcp->lwpchan_entries, -1); @@ -335,7 +335,7 @@ lwpchan_destroy_cache(int exec) while (ent != NULL) { next = ent->lwpchan_next; if (ent->lwpchan_pool == LWPCHAN_MPPOOL && - (ent->lwpchan_type & USYNC_PROCESS_ROBUST)) + (ent->lwpchan_type & LOCK_ROBUST)) lwp_mutex_cleanup(ent, lockflg); kmem_free(ent, sizeof (*ent)); ent = next; @@ -473,12 +473,12 @@ get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool) * (segvn_getmemid() does the same for MAP_PRIVATE mappings.) * The lwpchan cache is used only for process-shared objects. */ - if ((type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) == 0) { + if (!(type & USYNC_PROCESS)) { lwpchan->lc_wchan0 = (caddr_t)as; lwpchan->lc_wchan = addr; return (1); } - /* check the lwpchan cache for mapping */ + return (lwpchan_get_mapping(as, addr, type, lwpchan, pool)); } @@ -744,24 +744,25 @@ retry: error = ENOMEM; goto out; } - if (flag & LOCK_OWNERDEAD) { - /* - * Return with upimutex held. - */ - error = EOWNERDEAD; - } else if (flag & LOCK_NOTRECOVERABLE) { + if (flag & LOCK_NOTRECOVERABLE) { /* * Since the setting of LOCK_NOTRECOVERABLE * was done under the high-level upi mutex, * in lwp_upimutex_unlock(), this flag needs to * be checked while holding the upi mutex. - * If set, this thread should return without - * the lock held, and with the right error - * code. + * If set, this thread should return without + * the lock held, and with the right error code. */ upimutex_unlock((upimutex_t *)upimutex, flag); upilocked = 0; error = ENOTRECOVERABLE; + } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { + if (flag & LOCK_OWNERDEAD) + error = EOWNERDEAD; + else if (type & USYNC_PROCESS_ROBUST) + error = ELOCKUNMAPPED; + else + error = EOWNERDEAD; } goto out; } @@ -884,18 +885,21 @@ retry: /* * Now, need to read the user-level lp->mutex_flag to do the following: * - * - if lock is held, check if EOWNERDEAD should be returned - * - if lock isn't held, check if ENOTRECOVERABLE should be returned + * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED + * should be returned. + * - if lock isn't held, check if ENOTRECOVERABLE should + * be returned. * * Now, either lp->mutex_flag is readable or it's not. If not - * readable, the on_fault path will cause a return with EFAULT as - * it should. If it is readable, the state of the flag encodes the - * robustness state of the lock: + * readable, the on_fault path will cause a return with EFAULT + * as it should. If it is readable, the state of the flag + * encodes the robustness state of the lock: * - * If the upimutex is locked here, the flag's LOCK_OWNERDEAD setting - * will influence the return code appropriately. If the upimutex is - * not locked here, this could be due to a spurious wake-up or a - * NOTRECOVERABLE event. The flag's setting can be used to distinguish + * If the upimutex is locked here, the flag's LOCK_OWNERDEAD + * or LOCK_UNMAPPED setting will influence the return code + * appropriately. If the upimutex is not locked here, this + * could be due to a spurious wake-up or a NOTRECOVERABLE + * event. The flag's setting can be used to distinguish * between these two events. */ fuword16_noerr(&lp->mutex_flag, &flag); @@ -911,8 +915,13 @@ retry: upimutex_unlock((upimutex_t *)upimutex, flag); upilocked = 0; error = ENOMEM; - } else if (flag & LOCK_OWNERDEAD) { - error = EOWNERDEAD; + } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { + if (flag & LOCK_OWNERDEAD) + error = EOWNERDEAD; + else if (type & USYNC_PROCESS_ROBUST) + error = ELOCKUNMAPPED; + else + error = EOWNERDEAD; } } else { /* @@ -1001,14 +1010,16 @@ lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type) mutex_exit(&upibp->upib_lock); /* release for user memory access */ upilocked = 1; fuword16_noerr(&lp->mutex_flag, &flag); - if (flag & LOCK_OWNERDEAD) { + if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { /* * transition mutex to the LOCK_NOTRECOVERABLE state. */ - flag &= ~LOCK_OWNERDEAD; + flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); flag |= LOCK_NOTRECOVERABLE; suword16_noerr(&lp->mutex_flag, flag); } + if (type & USYNC_PROCESS) + suword32_noerr(&lp->mutex_ownerpid, 0); upimutex_unlock((upimutex_t *)upimutex, flag); upilocked = 0; out: @@ -1017,15 +1028,37 @@ out: } /* - * Mark user mutex state, corresponding to kernel upimutex, as LOCK_OWNERDEAD. + * Clear the contents of a user-level mutex; return the flags. + * Used only by upi_dead() and lwp_mutex_cleanup(), below. + */ +static uint16_t +lwp_clear_mutex(lwp_mutex_t *lp, uint16_t lockflg) +{ + uint16_t flag; + + fuword16_noerr(&lp->mutex_flag, &flag); + if ((flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) == 0) { + flag |= lockflg; + suword16_noerr(&lp->mutex_flag, flag); + } + suword32_noerr((uint32_t *)&lp->mutex_owner, 0); + suword32_noerr((uint32_t *)&lp->mutex_owner + 1, 0); + suword32_noerr(&lp->mutex_ownerpid, 0); + suword8_noerr(&lp->mutex_rcount, 0); + + return (flag); +} + +/* + * Mark user mutex state, corresponding to kernel upimutex, + * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate */ static int -upi_dead(upimutex_t *upip) +upi_dead(upimutex_t *upip, uint16_t lockflg) { label_t ljb; int error = 0; lwp_mutex_t *lp; - uint16_t flag; if (on_fault(&ljb)) { error = EFAULT; @@ -1033,9 +1066,8 @@ upi_dead(upimutex_t *upip) } lp = upip->upi_vaddr; - fuword16_noerr(&lp->mutex_flag, &flag); - flag |= LOCK_OWNERDEAD; - suword16_noerr(&lp->mutex_flag, flag); + (void) lwp_clear_mutex(lp, lockflg); + suword8_noerr(&lp->mutex_lockw, 0); out: no_fault(); return (error); @@ -1050,10 +1082,12 @@ void upimutex_cleanup() { kthread_t *t = curthread; + uint16_t lockflg = (ttoproc(t)->p_proc_flag & P_PR_EXEC)? + LOCK_UNMAPPED : LOCK_OWNERDEAD; struct upimutex *upip; while ((upip = t->t_upimutex) != NULL) { - if (upi_dead(upip) != 0) { + if (upi_dead(upip, lockflg) != 0) { /* * If the user object associated with this upimutex is * unmapped, unlock upimutex with the @@ -1138,8 +1172,9 @@ lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp) if (UPIMUTEX(type)) { no_fault(); error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt); - if ((error == 0 || error == EOWNERDEAD) && - (type & USYNC_PROCESS)) + if ((type & USYNC_PROCESS) && + (error == 0 || + error == EOWNERDEAD || error == ELOCKUNMAPPED)) (void) suword32(&lp->mutex_ownerpid, p->p_pid); if (tsp && !time_error) /* copyout the residual time left */ error = lwp_timer_copyout(&lwpt, error); @@ -1160,9 +1195,7 @@ lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp) } lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); locked = 1; - fuword8_noerr(&lp->mutex_waiters, &waiters); - suword8_noerr(&lp->mutex_waiters, 1); - if (type & USYNC_PROCESS_ROBUST) { + if (type & LOCK_ROBUST) { fuword16_noerr(&lp->mutex_flag, &flag); if (flag & LOCK_NOTRECOVERABLE) { lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); @@ -1170,6 +1203,8 @@ lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp) goto out; } } + fuword8_noerr(&lp->mutex_waiters, &waiters); + suword8_noerr(&lp->mutex_waiters, 1); /* * If watchpoints are set, they need to be restored, since @@ -1265,7 +1300,7 @@ lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp) locked = 1; fuword8_noerr(&lp->mutex_waiters, &waiters); suword8_noerr(&lp->mutex_waiters, 1); - if (type & USYNC_PROCESS_ROBUST) { + if (type & LOCK_ROBUST) { fuword16_noerr(&lp->mutex_flag, &flag); if (flag & LOCK_NOTRECOVERABLE) { error = ENOTRECOVERABLE; @@ -1277,14 +1312,19 @@ lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp) if (t->t_mstate == LMS_USER_LOCK) (void) new_mstate(t, LMS_SYSTEM); - if (!error && (type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST))) { - suword32_noerr(&lp->mutex_ownerpid, p->p_pid); - if (type & USYNC_PROCESS_ROBUST) { + if (error == 0) { + if (type & USYNC_PROCESS) + suword32_noerr(&lp->mutex_ownerpid, p->p_pid); + if (type & LOCK_ROBUST) { fuword16_noerr(&lp->mutex_flag, &flag); - if (flag & LOCK_OWNERDEAD) - error = EOWNERDEAD; - else if (flag & LOCK_UNMAPPED) - error = ELOCKUNMAPPED; + if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { + if (flag & LOCK_OWNERDEAD) + error = EOWNERDEAD; + else if (type & USYNC_PROCESS_ROBUST) + error = ELOCKUNMAPPED; + else + error = EOWNERDEAD; + } } } suword8_noerr(&lp->mutex_waiters, waiters); @@ -1435,7 +1475,7 @@ lwp_release_all(lwpchan_t *lwpchan) * lwp resumes and retries to acquire the lock. */ int -lwp_mutex_wakeup(lwp_mutex_t *lp) +lwp_mutex_wakeup(lwp_mutex_t *lp, int release_all) { proc_t *p = ttoproc(curthread); lwpchan_t lwpchan; @@ -1489,9 +1529,10 @@ lwp_mutex_wakeup(lwp_mutex_t *lp) * In this case, writing into the waiter bit would cause data * corruption. */ - if (lwp_release(&lwpchan, &waiters, 0) == 1) { + if (release_all) + lwp_release_all(&lwpchan); + else if (lwp_release(&lwpchan, &waiters, 0) == 1) suword8_noerr(&lp->mutex_waiters, waiters); - } lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); out: no_fault(); @@ -2804,7 +2845,7 @@ lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) } /* - * Clean up a locked a robust mutex + * Clean up a locked robust mutex */ static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg) @@ -2816,33 +2857,50 @@ lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg) lwp_mutex_t *lp; volatile int locked = 0; volatile int watched = 0; + volatile struct upimutex *upimutex = NULL; + volatile int upilocked = 0; - ASSERT(ent->lwpchan_type & USYNC_PROCESS_ROBUST); + ASSERT(ent->lwpchan_type & LOCK_ROBUST); lp = (lwp_mutex_t *)ent->lwpchan_addr; watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); if (on_fault(&ljb)) { if (locked) lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); + if (upilocked) + upimutex_unlock((upimutex_t *)upimutex, 0); goto out; } - fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid); - if (owner_pid != curproc->p_pid) { - goto out; + if (ent->lwpchan_type & USYNC_PROCESS) { + fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid); + if (owner_pid != curproc->p_pid) + goto out; } - lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); - locked = 1; - fuword16_noerr(&lp->mutex_flag, &flag); - if ((flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) == 0) { - flag |= lockflg; - suword16_noerr(&lp->mutex_flag, flag); + if (UPIMUTEX(ent->lwpchan_type)) { + lwpchan_t lwpchan = ent->lwpchan_lwpchan; + upib_t *upibp = &UPI_CHAIN(lwpchan); + + mutex_enter(&upibp->upib_lock); + upimutex = upi_get(upibp, &lwpchan); + if (upimutex == NULL || upimutex->upi_owner != curthread) { + mutex_exit(&upibp->upib_lock); + goto out; + } + mutex_exit(&upibp->upib_lock); + upilocked = 1; + flag = lwp_clear_mutex(lp, lockflg); + suword8_noerr(&lp->mutex_lockw, 0); + upimutex_unlock((upimutex_t *)upimutex, flag); + } else { + lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); + locked = 1; + (void) lwp_clear_mutex(lp, lockflg); + ulock_clear(&lp->mutex_lockw); + fuword8_noerr(&lp->mutex_waiters, &waiters); + if (waiters && lwp_release(&ent->lwpchan_lwpchan, &waiters, 0)) + suword8_noerr(&lp->mutex_waiters, waiters); + lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); } - suword32_noerr(&lp->mutex_ownerpid, 0); - ulock_clear(&lp->mutex_lockw); - fuword8_noerr(&lp->mutex_waiters, &waiters); - if (waiters && lwp_release(&ent->lwpchan_lwpchan, &waiters, 0)) - suword8_noerr(&lp->mutex_waiters, waiters); - lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); out: no_fault(); if (watched) @@ -2850,70 +2908,41 @@ out: } /* - * Register the mutex and initialize the mutex if it is not already + * Register a process-shared robust mutex in the lwpchan cache. */ int -lwp_mutex_init(lwp_mutex_t *lp, int type) +lwp_mutex_register(lwp_mutex_t *lp) { - proc_t *p = curproc; int error = 0; - volatile int locked = 0; - volatile int watched = 0; + volatile int watched; label_t ljb; - uint16_t flag; + uint8_t type; lwpchan_t lwpchan; - pid_t owner_pid; if ((caddr_t)lp >= (caddr_t)USERLIMIT) return (set_errno(EFAULT)); - if (type != USYNC_PROCESS_ROBUST) - return (set_errno(EINVAL)); - watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); if (on_fault(&ljb)) { - if (locked) - lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); error = EFAULT; - goto out; - } - /* - * Force Copy-on-write fault if lwp_mutex_t object is - * defined to be MAP_PRIVATE and it was initialized to - * USYNC_PROCESS. - */ - suword8_noerr(&lp->mutex_type, type); - if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, - &lwpchan, LWPCHAN_MPPOOL)) { - error = EFAULT; - goto out; - } - lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); - locked = 1; - fuword16_noerr(&lp->mutex_flag, &flag); - if (flag & LOCK_INITED) { - if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { - fuword32_noerr(&lp->mutex_ownerpid, - (uint32_t *)&owner_pid); - if (owner_pid == p->p_pid) { - flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); - suword16_noerr(&lp->mutex_flag, flag); - locked = 0; - lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); - goto out; - } - } - error = EBUSY; } else { - suword8_noerr(&lp->mutex_waiters, 0); - suword8_noerr(&lp->mutex_lockw, 0); - suword16_noerr(&lp->mutex_flag, LOCK_INITED); - suword32_noerr(&lp->mutex_ownerpid, 0); + fuword8_noerr(&lp->mutex_type, &type); + if ((type & (USYNC_PROCESS|LOCK_ROBUST)) + != (USYNC_PROCESS|LOCK_ROBUST)) { + error = EINVAL; + } else { + /* + * Force Copy-on-write fault if lwp_mutex_t object is + * defined to be MAP_PRIVATE and it was initialized to + * USYNC_PROCESS. + */ + suword8_noerr(&lp->mutex_type, type); + if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, + &lwpchan, LWPCHAN_MPPOOL)) + error = EFAULT; + } } - locked = 0; - lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); -out: no_fault(); if (watched) watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); @@ -2950,8 +2979,9 @@ lwp_mutex_trylock(lwp_mutex_t *lp) if (UPIMUTEX(type)) { no_fault(); error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL); - if ((error == 0 || error == EOWNERDEAD) && - (type & USYNC_PROCESS)) + if ((type & USYNC_PROCESS) && + (error == 0 || + error == EOWNERDEAD || error == ELOCKUNMAPPED)) (void) suword32(&lp->mutex_ownerpid, p->p_pid); if (error) return (set_errno(error)); @@ -2970,8 +3000,8 @@ lwp_mutex_trylock(lwp_mutex_t *lp) } lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); locked = 1; - if (type & USYNC_PROCESS_ROBUST) { - fuword16_noerr((uint16_t *)(&lp->mutex_flag), &flag); + if (type & LOCK_ROBUST) { + fuword16_noerr(&lp->mutex_flag, &flag); if (flag & LOCK_NOTRECOVERABLE) { lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); error = ENOTRECOVERABLE; @@ -2983,13 +3013,19 @@ lwp_mutex_trylock(lwp_mutex_t *lp) if (!ulock_try(&lp->mutex_lockw)) error = EBUSY; - else if (type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) { - suword32_noerr(&lp->mutex_ownerpid, p->p_pid); - if (type & USYNC_PROCESS_ROBUST) { - if (flag & LOCK_OWNERDEAD) - error = EOWNERDEAD; - else if (flag & LOCK_UNMAPPED) - error = ELOCKUNMAPPED; + else { + if (type & USYNC_PROCESS) + suword32_noerr(&lp->mutex_ownerpid, p->p_pid); + if (type & LOCK_ROBUST) { + fuword16_noerr(&lp->mutex_flag, &flag); + if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { + if (flag & LOCK_OWNERDEAD) + error = EOWNERDEAD; + else if (type & USYNC_PROCESS_ROBUST) + error = ELOCKUNMAPPED; + else + error = EOWNERDEAD; + } } } locked = 0; @@ -3056,17 +3092,16 @@ lwp_mutex_unlock(lwp_mutex_t *lp) } lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); locked = 1; - if (type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) { - if (type & USYNC_PROCESS_ROBUST) { - fuword16_noerr(&lp->mutex_flag, &flag); - if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { - flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); - flag |= LOCK_NOTRECOVERABLE; - suword16_noerr(&lp->mutex_flag, flag); - } + if (type & LOCK_ROBUST) { + fuword16_noerr(&lp->mutex_flag, &flag); + if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { + flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); + flag |= LOCK_NOTRECOVERABLE; + suword16_noerr(&lp->mutex_flag, flag); } - suword32_noerr(&lp->mutex_ownerpid, 0); } + if (type & USYNC_PROCESS) + suword32_noerr(&lp->mutex_ownerpid, 0); ulock_clear(&lp->mutex_lockw); /* * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will @@ -3089,7 +3124,7 @@ lwp_mutex_unlock(lwp_mutex_t *lp) */ fuword8_noerr(&lp->mutex_waiters, &waiters); if (waiters) { - if ((type & USYNC_PROCESS_ROBUST) && + if ((type & LOCK_ROBUST) && (flag & LOCK_NOTRECOVERABLE)) { lwp_release_all(&lwpchan); suword8_noerr(&lp->mutex_waiters, 0); diff --git a/usr/src/uts/intel/os/name_to_sysnum b/usr/src/uts/intel/os/name_to_sysnum index 3c26bfc9b9..3dec822452 100644 --- a/usr/src/uts/intel/os/name_to_sysnum +++ b/usr/src/uts/intel/os/name_to_sysnum @@ -234,7 +234,7 @@ ntp_gettime 248 ntp_adjtime 249 lwp_mutex_unlock 250 lwp_mutex_trylock 251 -lwp_mutex_init 252 +lwp_mutex_register 252 cladm 253 uucopy 254 umount2 255 diff --git a/usr/src/uts/sparc/os/name_to_sysnum b/usr/src/uts/sparc/os/name_to_sysnum index cf32acea5d..e78b26e72f 100644 --- a/usr/src/uts/sparc/os/name_to_sysnum +++ b/usr/src/uts/sparc/os/name_to_sysnum @@ -234,7 +234,7 @@ ntp_gettime 248 ntp_adjtime 249 lwp_mutex_unlock 250 lwp_mutex_trylock 251 -lwp_mutex_init 252 +lwp_mutex_register 252 cladm 253 uucopy 254 umount2 255 |