diff options
Diffstat (limited to 'usr/src/lib/libc/port/threads/synch.c')
| -rw-r--r-- | usr/src/lib/libc/port/threads/synch.c | 3194 |
1 files changed, 3194 insertions, 0 deletions
diff --git a/usr/src/lib/libc/port/threads/synch.c b/usr/src/lib/libc/port/threads/synch.c new file mode 100644 index 0000000000..21ecb0a2b7 --- /dev/null +++ b/usr/src/lib/libc/port/threads/synch.c @@ -0,0 +1,3194 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/sdt.h> + +#include "lint.h" +#include "thr_uberdata.h" + +/* + * This mutex is initialized to be held by lwp#1. + * It is used to block a thread that has returned from a mutex_lock() + * of a PTHREAD_PRIO_INHERIT mutex with an unrecoverable error. + */ +mutex_t stall_mutex = DEFAULTMUTEX; + +static int shared_mutex_held(mutex_t *); + +/* + * Lock statistics support functions. + */ +void +record_begin_hold(tdb_mutex_stats_t *msp) +{ + tdb_incr(msp->mutex_lock); + msp->mutex_begin_hold = gethrtime(); +} + +hrtime_t +record_hold_time(tdb_mutex_stats_t *msp) +{ + hrtime_t now = gethrtime(); + + if (msp->mutex_begin_hold) + msp->mutex_hold_time += now - msp->mutex_begin_hold; + msp->mutex_begin_hold = 0; + return (now); +} + +/* + * Called once at library initialization. + */ +void +mutex_setup(void) +{ + if (set_lock_byte(&stall_mutex.mutex_lockw)) + thr_panic("mutex_setup() cannot acquire stall_mutex"); + stall_mutex.mutex_owner = (uintptr_t)curthread; +} + +/* + * The default spin counts of 1000 and 500 are experimentally determined. + * On sun4u machines with any number of processors they could be raised + * to 10,000 but that (experimentally) makes almost no difference. + * The environment variables: + * _THREAD_ADAPTIVE_SPIN=count + * _THREAD_RELEASE_SPIN=count + * can be used to override and set the counts in the range [0 .. 1,000,000]. + */ +int thread_adaptive_spin = 1000; +uint_t thread_max_spinners = 100; +int thread_release_spin = 500; +int thread_queue_verify = 0; +static int ncpus; + +/* + * Distinguish spinning for queue locks from spinning for regular locks. + * The environment variable: + * _THREAD_QUEUE_SPIN=count + * can be used to override and set the count in the range [0 .. 1,000,000]. + * There is no release spin concept for queue locks. + */ +int thread_queue_spin = 1000; + +/* + * Use the otherwise-unused 'mutex_ownerpid' field of a USYNC_THREAD + * mutex to be a count of adaptive spins in progress. + */ +#define mutex_spinners mutex_ownerpid + +void +_mutex_set_typeattr(mutex_t *mp, int attr) +{ + mp->mutex_type |= (uint8_t)attr; +} + +/* + * 'type' can be one of USYNC_THREAD or USYNC_PROCESS, possibly + * augmented by the flags LOCK_RECURSIVE and/or LOCK_ERRORCHECK, + * or it can be USYNC_PROCESS_ROBUST with no extra flags. + */ +#pragma weak _private_mutex_init = __mutex_init +#pragma weak mutex_init = __mutex_init +#pragma weak _mutex_init = __mutex_init +/* ARGSUSED2 */ +int +__mutex_init(mutex_t *mp, int type, void *arg) +{ + int error; + + switch (type & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) { + case USYNC_THREAD: + case USYNC_PROCESS: + (void) _memset(mp, 0, sizeof (*mp)); + mp->mutex_type = (uint8_t)type; + mp->mutex_flag = LOCK_INITED; + error = 0; + break; + case USYNC_PROCESS_ROBUST: + if (type & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) + error = EINVAL; + else + error = ___lwp_mutex_init(mp, type); + break; + default: + error = EINVAL; + break; + } + if (error == 0) + mp->mutex_magic = MUTEX_MAGIC; + return (error); +} + +/* + * Delete mp from list of ceil mutexes owned by curthread. + * Return 1 if the head of the chain was updated. + */ +int +_ceil_mylist_del(mutex_t *mp) +{ + ulwp_t *self = curthread; + mxchain_t **mcpp; + mxchain_t *mcp; + + mcpp = &self->ul_mxchain; + while ((*mcpp)->mxchain_mx != mp) + mcpp = &(*mcpp)->mxchain_next; + mcp = *mcpp; + *mcpp = mcp->mxchain_next; + lfree(mcp, sizeof (*mcp)); + return (mcpp == &self->ul_mxchain); +} + +/* + * Add mp to head of list of ceil mutexes owned by curthread. + * Return ENOMEM if no memory could be allocated. + */ +int +_ceil_mylist_add(mutex_t *mp) +{ + ulwp_t *self = curthread; + mxchain_t *mcp; + + if ((mcp = lmalloc(sizeof (*mcp))) == NULL) + return (ENOMEM); + mcp->mxchain_mx = mp; + mcp->mxchain_next = self->ul_mxchain; + self->ul_mxchain = mcp; + return (0); +} + +/* + * Inherit priority from ceiling. The inheritance impacts the effective + * priority, not the assigned priority. See _thread_setschedparam_main(). + */ +void +_ceil_prio_inherit(int ceil) +{ + ulwp_t *self = curthread; + struct sched_param param; + + (void) _memset(¶m, 0, sizeof (param)); + param.sched_priority = ceil; + if (_thread_setschedparam_main(self->ul_lwpid, + self->ul_policy, ¶m, PRIO_INHERIT)) { + /* + * Panic since unclear what error code to return. + * If we do return the error codes returned by above + * called routine, update the man page... + */ + thr_panic("_thread_setschedparam_main() fails"); + } +} + +/* + * Waive inherited ceiling priority. Inherit from head of owned ceiling locks + * if holding at least one ceiling lock. If no ceiling locks are held at this + * point, disinherit completely, reverting back to assigned priority. + */ +void +_ceil_prio_waive(void) +{ + ulwp_t *self = curthread; + struct sched_param param; + + (void) _memset(¶m, 0, sizeof (param)); + if (self->ul_mxchain == NULL) { + /* + * No ceil locks held. Zero the epri, revert back to ul_pri. + * Since thread's hash lock is not held, one cannot just + * read ul_pri here...do it in the called routine... + */ + param.sched_priority = self->ul_pri; /* ignored */ + if (_thread_setschedparam_main(self->ul_lwpid, + self->ul_policy, ¶m, PRIO_DISINHERIT)) + thr_panic("_thread_setschedparam_main() fails"); + } else { + /* + * Set priority to that of the mutex at the head + * of the ceilmutex chain. + */ + param.sched_priority = + self->ul_mxchain->mxchain_mx->mutex_ceiling; + if (_thread_setschedparam_main(self->ul_lwpid, + self->ul_policy, ¶m, PRIO_INHERIT)) + thr_panic("_thread_setschedparam_main() fails"); + } +} + +/* + * Non-preemptive spin locks. Used by queue_lock(). + * No lock statistics are gathered for these locks. + */ +void +spin_lock_set(mutex_t *mp) +{ + ulwp_t *self = curthread; + + no_preempt(self); + if (set_lock_byte(&mp->mutex_lockw) == 0) { + mp->mutex_owner = (uintptr_t)self; + return; + } + /* + * Spin for a while, attempting to acquire the lock. + */ + if (self->ul_spin_lock_spin != UINT_MAX) + self->ul_spin_lock_spin++; + if (mutex_queuelock_adaptive(mp) == 0 || + set_lock_byte(&mp->mutex_lockw) == 0) { + mp->mutex_owner = (uintptr_t)self; + return; + } + /* + * Try harder if we were previously at a no premption level. + */ + if (self->ul_preempt > 1) { + if (self->ul_spin_lock_spin2 != UINT_MAX) + self->ul_spin_lock_spin2++; + if (mutex_queuelock_adaptive(mp) == 0 || + set_lock_byte(&mp->mutex_lockw) == 0) { + mp->mutex_owner = (uintptr_t)self; + return; + } + } + /* + * Give up and block in the kernel for the mutex. + */ + if (self->ul_spin_lock_sleep != UINT_MAX) + self->ul_spin_lock_sleep++; + (void) ___lwp_mutex_timedlock(mp, NULL); + mp->mutex_owner = (uintptr_t)self; +} + +void +spin_lock_clear(mutex_t *mp) +{ + ulwp_t *self = curthread; + + mp->mutex_owner = 0; + if (swap32(&mp->mutex_lockword, 0) & WAITERMASK) { + (void) ___lwp_mutex_wakeup(mp); + if (self->ul_spin_lock_wakeup != UINT_MAX) + self->ul_spin_lock_wakeup++; + } + preempt(self); +} + +/* + * Allocate the sleep queue hash table. + */ +void +queue_alloc(void) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + void *data; + int i; + + /* + * No locks are needed; we call here only when single-threaded. + */ + ASSERT(self == udp->ulwp_one); + ASSERT(!udp->uberflags.uf_mt); + if ((data = _private_mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t), + PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0)) + == MAP_FAILED) + thr_panic("cannot allocate thread queue_head table"); + udp->queue_head = (queue_head_t *)data; + for (i = 0; i < 2 * QHASHSIZE; i++) + udp->queue_head[i].qh_lock.mutex_magic = MUTEX_MAGIC; +} + +#if defined(THREAD_DEBUG) + +/* + * Debugging: verify correctness of a sleep queue. + */ +void +QVERIFY(queue_head_t *qp) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + ulwp_t *ulwp; + ulwp_t *prev; + uint_t index; + uint32_t cnt = 0; + char qtype; + void *wchan; + + ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE); + ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); + ASSERT((qp->qh_head != NULL && qp->qh_tail != NULL) || + (qp->qh_head == NULL && qp->qh_tail == NULL)); + if (!thread_queue_verify) + return; + /* real expensive stuff, only for _THREAD_QUEUE_VERIFY */ + qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV; + for (prev = NULL, ulwp = qp->qh_head; ulwp != NULL; + prev = ulwp, ulwp = ulwp->ul_link, cnt++) { + ASSERT(ulwp->ul_qtype == qtype); + ASSERT(ulwp->ul_wchan != NULL); + ASSERT(ulwp->ul_sleepq == qp); + wchan = ulwp->ul_wchan; + index = QUEUE_HASH(wchan, qtype); + ASSERT(&udp->queue_head[index] == qp); + } + ASSERT(qp->qh_tail == prev); + ASSERT(qp->qh_qlen == cnt); +} + +#else /* THREAD_DEBUG */ + +#define QVERIFY(qp) + +#endif /* THREAD_DEBUG */ + +/* + * Acquire a queue head. + */ +queue_head_t * +queue_lock(void *wchan, int qtype) +{ + uberdata_t *udp = curthread->ul_uberdata; + queue_head_t *qp; + + ASSERT(qtype == MX || qtype == CV); + + /* + * It is possible that we could be called while still single-threaded. + * If so, we call queue_alloc() to allocate the queue_head[] array. + */ + if ((qp = udp->queue_head) == NULL) { + queue_alloc(); + qp = udp->queue_head; + } + qp += QUEUE_HASH(wchan, qtype); + spin_lock_set(&qp->qh_lock); + /* + * At once per nanosecond, qh_lockcount will wrap after 512 years. + * Were we to care about this, we could peg the value at UINT64_MAX. + */ + qp->qh_lockcount++; + QVERIFY(qp); + return (qp); +} + +/* + * Release a queue head. + */ +void +queue_unlock(queue_head_t *qp) +{ + QVERIFY(qp); + spin_lock_clear(&qp->qh_lock); +} + +/* + * For rwlock queueing, we must queue writers ahead of readers of the + * same priority. We do this by making writers appear to have a half + * point higher priority for purposes of priority comparisons below. + */ +#define CMP_PRIO(ulwp) ((real_priority(ulwp) << 1) + (ulwp)->ul_writer) + +void +enqueue(queue_head_t *qp, ulwp_t *ulwp, void *wchan, int qtype) +{ + ulwp_t **ulwpp; + ulwp_t *next; + int pri = CMP_PRIO(ulwp); + int force_fifo = (qtype & FIFOQ); + int do_fifo; + + qtype &= ~FIFOQ; + ASSERT(qtype == MX || qtype == CV); + ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); + ASSERT(ulwp->ul_sleepq != qp); + + /* + * LIFO queue ordering is unfair and can lead to starvation, + * but it gives better performance for heavily contended locks. + * We use thread_queue_fifo (range is 0..8) to determine + * the frequency of FIFO vs LIFO queuing: + * 0 : every 256th time (almost always LIFO) + * 1 : every 128th time + * 2 : every 64th time + * 3 : every 32nd time + * 4 : every 16th time (the default value, mostly LIFO) + * 5 : every 8th time + * 6 : every 4th time + * 7 : every 2nd time + * 8 : every time (never LIFO, always FIFO) + * Note that there is always some degree of FIFO ordering. + * This breaks live lock conditions that occur in applications + * that are written assuming (incorrectly) that threads acquire + * locks fairly, that is, in roughly round-robin order. + * In any event, the queue is maintained in priority order. + * + * If we are given the FIFOQ flag in qtype, fifo queueing is forced. + * SUSV3 requires this for semaphores. + */ + do_fifo = (force_fifo || + ((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0); + + if (qp->qh_head == NULL) { + /* + * The queue is empty. LIFO/FIFO doesn't matter. + */ + ASSERT(qp->qh_tail == NULL); + ulwpp = &qp->qh_head; + } else if (do_fifo) { + /* + * Enqueue after the last thread whose priority is greater + * than or equal to the priority of the thread being queued. + * Attempt first to go directly onto the tail of the queue. + */ + if (pri <= CMP_PRIO(qp->qh_tail)) + ulwpp = &qp->qh_tail->ul_link; + else { + for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL; + ulwpp = &next->ul_link) + if (pri > CMP_PRIO(next)) + break; + } + } else { + /* + * Enqueue before the first thread whose priority is less + * than or equal to the priority of the thread being queued. + * Hopefully we can go directly onto the head of the queue. + */ + for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL; + ulwpp = &next->ul_link) + if (pri >= CMP_PRIO(next)) + break; + } + if ((ulwp->ul_link = *ulwpp) == NULL) + qp->qh_tail = ulwp; + *ulwpp = ulwp; + + ulwp->ul_sleepq = qp; + ulwp->ul_wchan = wchan; + ulwp->ul_qtype = qtype; + if (qp->qh_qmax < ++qp->qh_qlen) + qp->qh_qmax = qp->qh_qlen; +} + +/* + * Return a pointer to the queue slot of the + * highest priority thread on the queue. + * On return, prevp, if not NULL, will contain a pointer + * to the thread's predecessor on the queue + */ +static ulwp_t ** +queue_slot(queue_head_t *qp, void *wchan, int *more, ulwp_t **prevp) +{ + ulwp_t **ulwpp; + ulwp_t *ulwp; + ulwp_t *prev = NULL; + ulwp_t **suspp = NULL; + ulwp_t *susprev; + + ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); + + /* + * Find a waiter on the sleep queue. + */ + for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; + prev = ulwp, ulwpp = &ulwp->ul_link) { + if (ulwp->ul_wchan == wchan) { + if (!ulwp->ul_stop) + break; + /* + * Try not to return a suspended thread. + * This mimics the old libthread's behavior. + */ + if (suspp == NULL) { + suspp = ulwpp; + susprev = prev; + } + } + } + + if (ulwp == NULL && suspp != NULL) { + ulwp = *(ulwpp = suspp); + prev = susprev; + suspp = NULL; + } + if (ulwp == NULL) { + if (more != NULL) + *more = 0; + return (NULL); + } + + if (prevp != NULL) + *prevp = prev; + if (more == NULL) + return (ulwpp); + + /* + * Scan the remainder of the queue for another waiter. + */ + if (suspp != NULL) { + *more = 1; + return (ulwpp); + } + for (ulwp = ulwp->ul_link; ulwp != NULL; ulwp = ulwp->ul_link) { + if (ulwp->ul_wchan == wchan) { + *more = 1; + return (ulwpp); + } + } + + *more = 0; + return (ulwpp); +} + +ulwp_t * +dequeue(queue_head_t *qp, void *wchan, int *more) +{ + ulwp_t **ulwpp; + ulwp_t *ulwp; + ulwp_t *prev; + + if ((ulwpp = queue_slot(qp, wchan, more, &prev)) == NULL) + return (NULL); + + /* + * Dequeue the waiter. + */ + ulwp = *ulwpp; + *ulwpp = ulwp->ul_link; + ulwp->ul_link = NULL; + if (qp->qh_tail == ulwp) + qp->qh_tail = prev; + qp->qh_qlen--; + ulwp->ul_sleepq = NULL; + ulwp->ul_wchan = NULL; + + return (ulwp); +} + +/* + * Return a pointer to the highest priority thread sleeping on wchan. + */ +ulwp_t * +queue_waiter(queue_head_t *qp, void *wchan) +{ + ulwp_t **ulwpp; + + if ((ulwpp = queue_slot(qp, wchan, NULL, NULL)) == NULL) + return (NULL); + return (*ulwpp); +} + +uint8_t +dequeue_self(queue_head_t *qp, void *wchan) +{ + ulwp_t *self = curthread; + ulwp_t **ulwpp; + ulwp_t *ulwp; + ulwp_t *prev = NULL; + int found = 0; + int more = 0; + + ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); + + /* find self on the sleep queue */ + for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; + prev = ulwp, ulwpp = &ulwp->ul_link) { + if (ulwp == self) { + /* dequeue ourself */ + *ulwpp = self->ul_link; + if (qp->qh_tail == self) + qp->qh_tail = prev; + qp->qh_qlen--; + ASSERT(self->ul_wchan == wchan); + self->ul_cvmutex = NULL; + self->ul_sleepq = NULL; + self->ul_wchan = NULL; + self->ul_cv_wake = 0; + self->ul_link = NULL; + found = 1; + break; + } + if (ulwp->ul_wchan == wchan) + more = 1; + } + + if (!found) + thr_panic("dequeue_self(): curthread not found on queue"); + + if (more) + return (1); + + /* scan the remainder of the queue for another waiter */ + for (ulwp = *ulwpp; ulwp != NULL; ulwp = ulwp->ul_link) { + if (ulwp->ul_wchan == wchan) + return (1); + } + + return (0); +} + +/* + * Called from call_user_handler() and _thrp_suspend() to take + * ourself off of our sleep queue so we can grab locks. + */ +void +unsleep_self(void) +{ + ulwp_t *self = curthread; + queue_head_t *qp; + + /* + * Calling enter_critical()/exit_critical() here would lead + * to recursion. Just manipulate self->ul_critical directly. + */ + self->ul_critical++; + self->ul_writer = 0; + while (self->ul_sleepq != NULL) { + qp = queue_lock(self->ul_wchan, self->ul_qtype); + /* + * We may have been moved from a CV queue to a + * mutex queue while we were attempting queue_lock(). + * If so, just loop around and try again. + * dequeue_self() clears self->ul_sleepq. + */ + if (qp == self->ul_sleepq) + (void) dequeue_self(qp, self->ul_wchan); + queue_unlock(qp); + } + self->ul_critical--; +} + +/* + * Common code for calling the the ___lwp_mutex_timedlock() system call. + * Returns with mutex_owner and mutex_ownerpid set correctly. + */ +int +mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + hrtime_t begin_sleep; + int error; + + self->ul_sp = stkptr(); + self->ul_wchan = mp; + if (__td_event_report(self, TD_SLEEP, udp)) { + self->ul_td_evbuf.eventnum = TD_SLEEP; + self->ul_td_evbuf.eventdata = mp; + tdb_event(TD_SLEEP, udp); + } + if (msp) { + tdb_incr(msp->mutex_sleep); + begin_sleep = gethrtime(); + } + + DTRACE_PROBE1(plockstat, mutex__block, mp); + + for (;;) { + if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0) { + DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); + DTRACE_PROBE2(plockstat, mutex__error, mp, error); + break; + } + + if (mp->mutex_type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) { + /* + * Defend against forkall(). We may be the child, + * in which case we don't actually own the mutex. + */ + enter_critical(self); + if (mp->mutex_ownerpid == udp->pid) { + mp->mutex_owner = (uintptr_t)self; + exit_critical(self); + DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); + DTRACE_PROBE3(plockstat, mutex__acquire, mp, + 0, 0); + break; + } + exit_critical(self); + } else { + mp->mutex_owner = (uintptr_t)self; + DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + break; + } + } + if (msp) + msp->mutex_sleep_time += gethrtime() - begin_sleep; + self->ul_wchan = NULL; + self->ul_sp = 0; + + return (error); +} + +/* + * Common code for calling the ___lwp_mutex_trylock() system call. + * Returns with mutex_owner and mutex_ownerpid set correctly. + */ +int +mutex_trylock_kernel(mutex_t *mp) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + int error; + + for (;;) { + if ((error = ___lwp_mutex_trylock(mp)) != 0) { + if (error != EBUSY) { + DTRACE_PROBE2(plockstat, mutex__error, mp, + error); + } + break; + } + + if (mp->mutex_type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) { + /* + * Defend against forkall(). We may be the child, + * in which case we don't actually own the mutex. + */ + enter_critical(self); + if (mp->mutex_ownerpid == udp->pid) { + mp->mutex_owner = (uintptr_t)self; + exit_critical(self); + DTRACE_PROBE3(plockstat, mutex__acquire, mp, + 0, 0); + break; + } + exit_critical(self); + } else { + mp->mutex_owner = (uintptr_t)self; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + break; + } + } + + return (error); +} + +volatile sc_shared_t * +setup_schedctl(void) +{ + ulwp_t *self = curthread; + volatile sc_shared_t *scp; + sc_shared_t *tmp; + + if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */ + !self->ul_vfork && /* not a child of vfork() */ + !self->ul_schedctl_called) { /* haven't been called before */ + enter_critical(self); + self->ul_schedctl_called = &self->ul_uberdata->uberflags; + if ((tmp = __schedctl()) != (sc_shared_t *)(-1)) + self->ul_schedctl = scp = tmp; + exit_critical(self); + } + /* + * Unless the call to setup_schedctl() is surrounded + * by enter_critical()/exit_critical(), the address + * we are returning could be invalid due to a forkall() + * having occurred in another thread. + */ + return (scp); +} + +/* + * Interfaces from libsched, incorporated into libc. + * libsched.so.1 is now a filter library onto libc. + */ +#pragma weak schedctl_lookup = _schedctl_init +#pragma weak _schedctl_lookup = _schedctl_init +#pragma weak schedctl_init = _schedctl_init +schedctl_t * +_schedctl_init(void) +{ + volatile sc_shared_t *scp = setup_schedctl(); + return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl); +} + +#pragma weak schedctl_exit = _schedctl_exit +void +_schedctl_exit(void) +{ +} + +/* + * Contract private interface for java. + * Set up the schedctl data if it doesn't exist yet. + * Return a pointer to the pointer to the schedctl data. + */ +volatile sc_shared_t *volatile * +_thr_schedctl(void) +{ + ulwp_t *self = curthread; + volatile sc_shared_t *volatile *ptr; + + if (self->ul_vfork) + return (NULL); + if (*(ptr = &self->ul_schedctl) == NULL) + (void) setup_schedctl(); + return (ptr); +} + +/* + * Block signals and attempt to block preemption. + * no_preempt()/preempt() must be used in pairs but can be nested. + */ +void +no_preempt(ulwp_t *self) +{ + volatile sc_shared_t *scp; + + if (self->ul_preempt++ == 0) { + enter_critical(self); + if ((scp = self->ul_schedctl) != NULL || + (scp = setup_schedctl()) != NULL) { + /* + * Save the pre-existing preempt value. + */ + self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt; + scp->sc_preemptctl.sc_nopreempt = 1; + } + } +} + +/* + * Undo the effects of no_preempt(). + */ +void +preempt(ulwp_t *self) +{ + volatile sc_shared_t *scp; + + ASSERT(self->ul_preempt > 0); + if (--self->ul_preempt == 0) { + if ((scp = self->ul_schedctl) != NULL) { + /* + * Restore the pre-existing preempt value. + */ + scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt; + if (scp->sc_preemptctl.sc_yield && + scp->sc_preemptctl.sc_nopreempt == 0) { + lwp_yield(); + if (scp->sc_preemptctl.sc_yield) { + /* + * Shouldn't happen. This is either + * a race condition or the thread + * just entered the real-time class. + */ + lwp_yield(); + scp->sc_preemptctl.sc_yield = 0; + } + } + } + exit_critical(self); + } +} + +/* + * If a call to preempt() would cause the current thread to yield or to + * take deferred actions in exit_critical(), then unpark the specified + * lwp so it can run while we delay. Return the original lwpid if the + * unpark was not performed, else return zero. The tests are a repeat + * of some of the tests in preempt(), above. This is a statistical + * optimization solely for cond_sleep_queue(), below. + */ +static lwpid_t +preempt_unpark(ulwp_t *self, lwpid_t lwpid) +{ + volatile sc_shared_t *scp = self->ul_schedctl; + + ASSERT(self->ul_preempt == 1 && self->ul_critical > 0); + if ((scp != NULL && scp->sc_preemptctl.sc_yield) || + (self->ul_curplease && self->ul_critical == 1)) { + (void) __lwp_unpark(lwpid); + lwpid = 0; + } + return (lwpid); +} + +/* + * Spin for a while, trying to grab the lock. We know that we + * failed set_lock_byte(&mp->mutex_lockw) once before coming here. + * If this fails, return EBUSY and let the caller deal with it. + * If this succeeds, return 0 with mutex_owner set to curthread. + */ +int +mutex_trylock_adaptive(mutex_t *mp) +{ + ulwp_t *self = curthread; + ulwp_t *ulwp; + volatile sc_shared_t *scp; + volatile uint8_t *lockp; + volatile uint64_t *ownerp; + int count, max = self->ul_adaptive_spin; + + ASSERT(!(mp->mutex_type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST))); + + if (max == 0 || (mp->mutex_spinners >= self->ul_max_spinners)) + return (EBUSY); + + lockp = (volatile uint8_t *)&mp->mutex_lockw; + ownerp = (volatile uint64_t *)&mp->mutex_owner; + + DTRACE_PROBE1(plockstat, mutex__spin, mp); + + /* + * This spin loop is unfair to lwps that have already dropped into + * the kernel to sleep. They will starve on a highly-contended mutex. + * This is just too bad. The adaptive spin algorithm is intended + * to allow programs with highly-contended locks (that is, broken + * programs) to execute with reasonable speed despite their contention. + * Being fair would reduce the speed of such programs and well-written + * programs will not suffer in any case. + */ + enter_critical(self); /* protects ul_schedctl */ + incr32(&mp->mutex_spinners); + for (count = 0; count < max; count++) { + if (*lockp == 0 && set_lock_byte(lockp) == 0) { + *ownerp = (uintptr_t)self; + decr32(&mp->mutex_spinners); + exit_critical(self); + DTRACE_PROBE2(plockstat, mutex__spun, 1, count); + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); + return (0); + } + SMT_PAUSE(); + /* + * Stop spinning if the mutex owner is not running on + * a processor; it will not drop the lock any time soon + * and we would just be wasting time to keep spinning. + * + * Note that we are looking at another thread (ulwp_t) + * without ensuring that the other thread does not exit. + * The scheme relies on ulwp_t structures never being + * deallocated by the library (the library employs a free + * list of ulwp_t structs that are reused when new threads + * are created) and on schedctl shared memory never being + * deallocated once created via __schedctl(). + * + * Thus, the worst that can happen when the spinning thread + * looks at the owner's schedctl data is that it is looking + * at some other thread's schedctl data. This almost never + * happens and is benign when it does. + */ + if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && + ((scp = ulwp->ul_schedctl) == NULL || + scp->sc_state != SC_ONPROC)) + break; + } + decr32(&mp->mutex_spinners); + exit_critical(self); + + DTRACE_PROBE2(plockstat, mutex__spun, 0, count); + + return (EBUSY); +} + +/* + * Same as mutex_trylock_adaptive(), except specifically for queue locks. + * The owner field is not set here; the caller (spin_lock_set()) sets it. + */ +int +mutex_queuelock_adaptive(mutex_t *mp) +{ + ulwp_t *ulwp; + volatile sc_shared_t *scp; + volatile uint8_t *lockp; + volatile uint64_t *ownerp; + int count = curthread->ul_queue_spin; + + ASSERT(mp->mutex_type == USYNC_THREAD); + + if (count == 0) + return (EBUSY); + + lockp = (volatile uint8_t *)&mp->mutex_lockw; + ownerp = (volatile uint64_t *)&mp->mutex_owner; + while (--count >= 0) { + if (*lockp == 0 && set_lock_byte(lockp) == 0) + return (0); + SMT_PAUSE(); + if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && + ((scp = ulwp->ul_schedctl) == NULL || + scp->sc_state != SC_ONPROC)) + break; + } + + return (EBUSY); +} + +/* + * Like mutex_trylock_adaptive(), but for process-shared mutexes. + * Spin for a while, trying to grab the lock. We know that we + * failed set_lock_byte(&mp->mutex_lockw) once before coming here. + * If this fails, return EBUSY and let the caller deal with it. + * If this succeeds, return 0 with mutex_owner set to curthread + * and mutex_ownerpid set to the current pid. + */ +int +mutex_trylock_process(mutex_t *mp) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + int count; + volatile uint8_t *lockp; + volatile uint64_t *ownerp; + volatile int32_t *pidp; + pid_t pid, newpid; + uint64_t owner, newowner; + + if ((count = ncpus) == 0) + count = ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); + count = (count > 1)? self->ul_adaptive_spin : 0; + + ASSERT((mp->mutex_type & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == + USYNC_PROCESS); + + if (count == 0) + return (EBUSY); + + lockp = (volatile uint8_t *)&mp->mutex_lockw; + ownerp = (volatile uint64_t *)&mp->mutex_owner; + pidp = (volatile int32_t *)&mp->mutex_ownerpid; + owner = *ownerp; + pid = *pidp; + /* + * This is a process-shared mutex. + * We cannot know if the owner is running on a processor. + * We just spin and hope that it is on a processor. + */ + while (--count >= 0) { + if (*lockp == 0) { + enter_critical(self); + if (set_lock_byte(lockp) == 0) { + *ownerp = (uintptr_t)self; + *pidp = udp->pid; + exit_critical(self); + DTRACE_PROBE3(plockstat, mutex__acquire, mp, + 0, 0); + return (0); + } + exit_critical(self); + } else if ((newowner = *ownerp) == owner && + (newpid = *pidp) == pid) { + SMT_PAUSE(); + continue; + } + /* + * The owner of the lock changed; start the count over again. + * This may be too aggressive; it needs testing. + */ + owner = newowner; + pid = newpid; + count = self->ul_adaptive_spin; + } + + return (EBUSY); +} + +/* + * Mutex wakeup code for releasing a USYNC_THREAD mutex. + * Returns the lwpid of the thread that was dequeued, if any. + * The caller of mutex_wakeup() must call __lwp_unpark(lwpid) + * to wake up the specified lwp. + */ +lwpid_t +mutex_wakeup(mutex_t *mp) +{ + lwpid_t lwpid = 0; + queue_head_t *qp; + ulwp_t *ulwp; + int more; + + /* + * Dequeue a waiter from the sleep queue. Don't touch the mutex + * waiters bit if no one was found on the queue because the mutex + * might have been deallocated or reallocated for another purpose. + */ + qp = queue_lock(mp, MX); + if ((ulwp = dequeue(qp, mp, &more)) != NULL) { + lwpid = ulwp->ul_lwpid; + mp->mutex_waiters = (more? 1 : 0); + } + queue_unlock(qp); + return (lwpid); +} + +/* + * Spin for a while, testing to see if the lock has been grabbed. + * If this fails, call mutex_wakeup() to release a waiter. + */ +lwpid_t +mutex_unlock_queue(mutex_t *mp) +{ + ulwp_t *self = curthread; + uint32_t *lockw = &mp->mutex_lockword; + lwpid_t lwpid; + volatile uint8_t *lockp; + volatile uint32_t *spinp; + int count; + + /* + * We use the swap primitive to clear the lock, but we must + * atomically retain the waiters bit for the remainder of this + * code to work. We first check to see if the waiters bit is + * set and if so clear the lock by swapping in a word containing + * only the waiters bit. This could produce a false positive test + * for whether there are waiters that need to be waked up, but + * this just causes an extra call to mutex_wakeup() to do nothing. + * The opposite case is more delicate: If there are no waiters, + * we swap in a zero lock byte and a zero waiters bit. The result + * of the swap could indicate that there really was a waiter so in + * this case we go directly to mutex_wakeup() without performing + * any of the adaptive code because the waiter bit has been cleared + * and the adaptive code is unreliable in this case. + */ + if (!(*lockw & WAITERMASK)) { /* no waiter exists right now */ + mp->mutex_owner = 0; + DTRACE_PROBE2(plockstat, mutex__release, mp, 0); + if (!(swap32(lockw, 0) & WAITERMASK)) /* still no waiters */ + return (0); + no_preempt(self); /* ensure a prompt wakeup */ + lwpid = mutex_wakeup(mp); + } else { + no_preempt(self); /* ensure a prompt wakeup */ + lockp = (volatile uint8_t *)&mp->mutex_lockw; + spinp = (volatile uint32_t *)&mp->mutex_spinners; + mp->mutex_owner = 0; + DTRACE_PROBE2(plockstat, mutex__release, mp, 0); + (void) swap32(lockw, WAITER); /* clear lock, retain waiter */ + + /* + * We spin here fewer times than mutex_trylock_adaptive(). + * We are trying to balance two conflicting goals: + * 1. Avoid waking up anyone if a spinning thread + * grabs the lock. + * 2. Wake up a sleeping thread promptly to get on + * with useful work. + * We don't spin at all if there is no acquiring spinner; + * (mp->mutex_spinners is non-zero if there are spinners). + */ + for (count = self->ul_release_spin; + *spinp && count > 0; count--) { + /* + * There is a waiter that we will have to wake + * up unless someone else grabs the lock while + * we are busy spinning. Like the spin loop in + * mutex_trylock_adaptive(), this spin loop is + * unfair to lwps that have already dropped into + * the kernel to sleep. They will starve on a + * highly-contended mutex. Too bad. + */ + if (*lockp != 0) { /* somebody grabbed the lock */ + preempt(self); + return (0); + } + SMT_PAUSE(); + } + + /* + * No one grabbed the lock. + * Wake up some lwp that is waiting for it. + */ + mp->mutex_waiters = 0; + lwpid = mutex_wakeup(mp); + } + + if (lwpid == 0) + preempt(self); + return (lwpid); +} + +/* + * Like mutex_unlock_queue(), but for process-shared mutexes. + * We tested the waiters field before calling here and it was non-zero. + */ +void +mutex_unlock_process(mutex_t *mp) +{ + ulwp_t *self = curthread; + int count; + volatile uint8_t *lockp; + + /* + * See the comments in mutex_unlock_queue(), above. + */ + if ((count = ncpus) == 0) + count = ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); + count = (count > 1)? self->ul_release_spin : 0; + no_preempt(self); + mp->mutex_owner = 0; + mp->mutex_ownerpid = 0; + DTRACE_PROBE2(plockstat, mutex__release, mp, 0); + if (count == 0) { + /* clear lock, test waiter */ + if (!(swap32(&mp->mutex_lockword, 0) & WAITERMASK)) { + /* no waiters now */ + preempt(self); + return; + } + } else { + /* clear lock, retain waiter */ + (void) swap32(&mp->mutex_lockword, WAITER); + lockp = (volatile uint8_t *)&mp->mutex_lockw; + while (--count >= 0) { + if (*lockp != 0) { + /* somebody grabbed the lock */ + preempt(self); + return; + } + SMT_PAUSE(); + } + /* + * We must clear the waiters field before going + * to the kernel, else it could remain set forever. + */ + mp->mutex_waiters = 0; + } + (void) ___lwp_mutex_wakeup(mp); + preempt(self); +} + +/* + * Return the real priority of a thread. + */ +int +real_priority(ulwp_t *ulwp) +{ + if (ulwp->ul_epri == 0) + return (ulwp->ul_mappedpri? ulwp->ul_mappedpri : ulwp->ul_pri); + return (ulwp->ul_emappedpri? ulwp->ul_emappedpri : ulwp->ul_epri); +} + +void +stall(void) +{ + for (;;) + (void) mutex_lock_kernel(&stall_mutex, NULL, NULL); +} + +/* + * Acquire a USYNC_THREAD mutex via user-level sleep queues. + * We failed set_lock_byte(&mp->mutex_lockw) before coming here. + * Returns with mutex_owner set correctly. + */ +int +mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, + timespec_t *tsp) +{ + uberdata_t *udp = curthread->ul_uberdata; + queue_head_t *qp; + hrtime_t begin_sleep; + int error = 0; + + self->ul_sp = stkptr(); + if (__td_event_report(self, TD_SLEEP, udp)) { + self->ul_wchan = mp; + self->ul_td_evbuf.eventnum = TD_SLEEP; + self->ul_td_evbuf.eventdata = mp; + tdb_event(TD_SLEEP, udp); + } + if (msp) { + tdb_incr(msp->mutex_sleep); + begin_sleep = gethrtime(); + } + + DTRACE_PROBE1(plockstat, mutex__block, mp); + + /* + * Put ourself on the sleep queue, and while we are + * unable to grab the lock, go park in the kernel. + * Take ourself off the sleep queue after we acquire the lock. + * The waiter bit can be set/cleared only while holding the queue lock. + */ + qp = queue_lock(mp, MX); + enqueue(qp, self, mp, MX); + mp->mutex_waiters = 1; + for (;;) { + if (set_lock_byte(&mp->mutex_lockw) == 0) { + mp->mutex_owner = (uintptr_t)self; + DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + mp->mutex_waiters = dequeue_self(qp, mp); + break; + } + set_parking_flag(self, 1); + queue_unlock(qp); + /* + * __lwp_park() will return the residual time in tsp + * if we are unparked before the timeout expires. + */ + if ((error = __lwp_park(tsp, 0)) == EINTR) + error = 0; + set_parking_flag(self, 0); + /* + * We could have taken a signal or suspended ourself. + * If we did, then we removed ourself from the queue. + * Someone else may have removed us from the queue + * as a consequence of mutex_unlock(). We may have + * gotten a timeout from __lwp_park(). Or we may still + * be on the queue and this is just a spurious wakeup. + */ + qp = queue_lock(mp, MX); + if (self->ul_sleepq == NULL) { + if (error) { + DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); + DTRACE_PROBE2(plockstat, mutex__error, mp, + error); + break; + } + if (set_lock_byte(&mp->mutex_lockw) == 0) { + mp->mutex_owner = (uintptr_t)self; + DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); + DTRACE_PROBE3(plockstat, mutex__acquire, mp, + 0, 0); + break; + } + enqueue(qp, self, mp, MX); + mp->mutex_waiters = 1; + } + ASSERT(self->ul_sleepq == qp && + self->ul_qtype == MX && + self->ul_wchan == mp); + if (error) { + mp->mutex_waiters = dequeue_self(qp, mp); + DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); + DTRACE_PROBE2(plockstat, mutex__error, mp, error); + break; + } + } + + ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && + self->ul_wchan == NULL); + self->ul_sp = 0; + + queue_unlock(qp); + if (msp) + msp->mutex_sleep_time += gethrtime() - begin_sleep; + + ASSERT(error == 0 || error == EINVAL || error == ETIME); + return (error); +} + +/* + * Returns with mutex_owner set correctly. + */ +int +mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + int mtype = mp->mutex_type; + tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); + int error = 0; + + ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); + + if (!self->ul_schedctl_called) + (void) setup_schedctl(); + + if (msp && try == MUTEX_TRY) + tdb_incr(msp->mutex_try); + + if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_is_held(mp)) { + if (mtype & LOCK_RECURSIVE) { + if (mp->mutex_rcount == RECURSION_MAX) { + error = EAGAIN; + } else { + mp->mutex_rcount++; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, + 1, 0); + return (0); + } + } else if (try == MUTEX_TRY) { + return (EBUSY); + } else { + DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); + return (EDEADLK); + } + } + + if (self->ul_error_detection && try == MUTEX_LOCK && + tsp == NULL && mutex_is_held(mp)) + lock_error(mp, "mutex_lock", NULL, NULL); + + if (mtype & + (USYNC_PROCESS_ROBUST|PTHREAD_PRIO_INHERIT|PTHREAD_PRIO_PROTECT)) { + uint8_t ceil; + int myprio; + + if (mtype & PTHREAD_PRIO_PROTECT) { + ceil = mp->mutex_ceiling; + ASSERT(_validate_rt_prio(SCHED_FIFO, ceil) == 0); + myprio = real_priority(self); + if (myprio > ceil) { + DTRACE_PROBE2(plockstat, mutex__error, mp, + EINVAL); + return (EINVAL); + } + if ((error = _ceil_mylist_add(mp)) != 0) { + DTRACE_PROBE2(plockstat, mutex__error, mp, + error); + return (error); + } + if (myprio < ceil) + _ceil_prio_inherit(ceil); + } + + if (mtype & PTHREAD_PRIO_INHERIT) { + /* go straight to the kernel */ + if (try == MUTEX_TRY) + error = mutex_trylock_kernel(mp); + else /* MUTEX_LOCK */ + error = mutex_lock_kernel(mp, tsp, msp); + /* + * The kernel never sets or clears the lock byte + * for PTHREAD_PRIO_INHERIT mutexes. + * Set it here for debugging consistency. + */ + switch (error) { + case 0: + case EOWNERDEAD: + mp->mutex_lockw = LOCKSET; + break; + } + } else if (mtype & USYNC_PROCESS_ROBUST) { + /* go straight to the kernel */ + if (try == MUTEX_TRY) + error = mutex_trylock_kernel(mp); + else /* MUTEX_LOCK */ + error = mutex_lock_kernel(mp, tsp, msp); + } else { /* PTHREAD_PRIO_PROTECT */ + /* + * Try once at user level before going to the kernel. + * If this is a process shared mutex then protect + * against forkall() while setting mp->mutex_ownerpid. + */ + if (mtype & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) { + enter_critical(self); + if (set_lock_byte(&mp->mutex_lockw) == 0) { + mp->mutex_owner = (uintptr_t)self; + mp->mutex_ownerpid = udp->pid; + exit_critical(self); + DTRACE_PROBE3(plockstat, + mutex__acquire, mp, 0, 0); + } else { + exit_critical(self); + error = EBUSY; + } + } else { + if (set_lock_byte(&mp->mutex_lockw) == 0) { + mp->mutex_owner = (uintptr_t)self; + DTRACE_PROBE3(plockstat, + mutex__acquire, mp, 0, 0); + } else { + error = EBUSY; + } + } + if (error && try == MUTEX_LOCK) + error = mutex_lock_kernel(mp, tsp, msp); + } + + if (error) { + if (mtype & PTHREAD_PRIO_INHERIT) { + switch (error) { + case EOWNERDEAD: + case ENOTRECOVERABLE: + if (mtype & PTHREAD_MUTEX_ROBUST_NP) + break; + if (error == EOWNERDEAD) { + /* + * We own the mutex; unlock it. + * It becomes ENOTRECOVERABLE. + * All waiters are waked up. + */ + mp->mutex_owner = 0; + mp->mutex_ownerpid = 0; + DTRACE_PROBE2(plockstat, + mutex__release, mp, 0); + mp->mutex_lockw = LOCKCLEAR; + (void) ___lwp_mutex_unlock(mp); + } + /* FALLTHROUGH */ + case EDEADLK: + if (try == MUTEX_LOCK) + stall(); + error = EBUSY; + break; + } + } + if ((mtype & PTHREAD_PRIO_PROTECT) && + error != EOWNERDEAD) { + (void) _ceil_mylist_del(mp); + if (myprio < ceil) + _ceil_prio_waive(); + } + } + } else if (mtype & USYNC_PROCESS) { + /* + * This is a process shared mutex. Protect against + * forkall() while setting mp->mutex_ownerpid. + */ + enter_critical(self); + if (set_lock_byte(&mp->mutex_lockw) == 0) { + mp->mutex_owner = (uintptr_t)self; + mp->mutex_ownerpid = udp->pid; + exit_critical(self); + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + } else { + /* try a little harder */ + exit_critical(self); + error = mutex_trylock_process(mp); + } + if (error && try == MUTEX_LOCK) + error = mutex_lock_kernel(mp, tsp, msp); + } else { /* USYNC_THREAD */ + /* try once */ + if (set_lock_byte(&mp->mutex_lockw) == 0) { + mp->mutex_owner = (uintptr_t)self; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + } else { + /* try a little harder if we don't own the mutex */ + error = EBUSY; + if (MUTEX_OWNER(mp) != self) + error = mutex_trylock_adaptive(mp); + if (error && try == MUTEX_LOCK) /* go park */ + error = mutex_lock_queue(self, msp, mp, tsp); + } + } + + switch (error) { + case EOWNERDEAD: + case ELOCKUNMAPPED: + mp->mutex_owner = (uintptr_t)self; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + /* FALLTHROUGH */ + case 0: + if (msp) + record_begin_hold(msp); + break; + default: + if (try == MUTEX_TRY) { + if (msp) + tdb_incr(msp->mutex_try_fail); + if (__td_event_report(self, TD_LOCK_TRY, udp)) { + self->ul_td_evbuf.eventnum = TD_LOCK_TRY; + tdb_event(TD_LOCK_TRY, udp); + } + } + break; + } + + return (error); +} + +int +fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + + /* + * We know that USYNC_PROCESS is set in mtype and that + * zero, one, or both of the flags LOCK_RECURSIVE and + * LOCK_ERRORCHECK are set, and that no other flags are set. + */ + enter_critical(self); + if (set_lock_byte(&mp->mutex_lockw) == 0) { + mp->mutex_owner = (uintptr_t)self; + mp->mutex_ownerpid = udp->pid; + exit_critical(self); + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + return (0); + } + exit_critical(self); + + if ((mtype & ~USYNC_PROCESS) && shared_mutex_held(mp)) { + if (mtype & LOCK_RECURSIVE) { + if (mp->mutex_rcount == RECURSION_MAX) + return (EAGAIN); + mp->mutex_rcount++; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0); + return (0); + } + if (try == MUTEX_LOCK) { + DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); + return (EDEADLK); + } + return (EBUSY); + } + + /* try a little harder if we don't own the mutex */ + if (!shared_mutex_held(mp) && mutex_trylock_process(mp) == 0) + return (0); + + if (try == MUTEX_LOCK) + return (mutex_lock_kernel(mp, tsp, NULL)); + + if (__td_event_report(self, TD_LOCK_TRY, udp)) { + self->ul_td_evbuf.eventnum = TD_LOCK_TRY; + tdb_event(TD_LOCK_TRY, udp); + } + return (EBUSY); +} + +static int +slow_lock(ulwp_t *self, mutex_t *mp, timespec_t *tsp) +{ + int error = 0; + + if (MUTEX_OWNER(mp) == self || mutex_trylock_adaptive(mp) != 0) + error = mutex_lock_queue(self, NULL, mp, tsp); + return (error); +} + +int +mutex_lock_impl(mutex_t *mp, timespec_t *tsp) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + uberflags_t *gflags; + int mtype; + + /* + * Optimize the case of USYNC_THREAD, including + * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, + * no error detection, no lock statistics, + * and the process has only a single thread. + * (Most likely a traditional single-threaded application.) + */ + if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | + udp->uberflags.uf_all) == 0) { + /* + * Only one thread exists so we don't need an atomic operation. + */ + if (mp->mutex_lockw == 0) { + mp->mutex_lockw = LOCKSET; + mp->mutex_owner = (uintptr_t)self; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + return (0); + } + if (mtype && MUTEX_OWNER(mp) == self) { + /* + * LOCK_RECURSIVE, LOCK_ERRORCHECK, or both. + */ + if (mtype & LOCK_RECURSIVE) { + if (mp->mutex_rcount == RECURSION_MAX) + return (EAGAIN); + mp->mutex_rcount++; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, + 1, 0); + return (0); + } + DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); + return (EDEADLK); /* LOCK_ERRORCHECK */ + } + /* + * We have reached a deadlock, probably because the + * process is executing non-async-signal-safe code in + * a signal handler and is attempting to acquire a lock + * that it already owns. This is not surprising, given + * bad programming practices over the years that has + * resulted in applications calling printf() and such + * in their signal handlers. Unless the user has told + * us that the signal handlers are safe by setting: + * export _THREAD_ASYNC_SAFE=1 + * we return EDEADLK rather than actually deadlocking. + */ + if (tsp == NULL && + MUTEX_OWNER(mp) == self && !self->ul_async_safe) { + DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); + return (EDEADLK); + } + } + + /* + * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, + * no error detection, and no lock statistics. + * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. + */ + if ((gflags = self->ul_schedctl_called) != NULL && + (gflags->uf_trs_ted | + (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { + + if (mtype & USYNC_PROCESS) + return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK)); + + if (set_lock_byte(&mp->mutex_lockw) == 0) { + mp->mutex_owner = (uintptr_t)self; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + return (0); + } + + if (mtype && MUTEX_OWNER(mp) == self) { + if (mtype & LOCK_RECURSIVE) { + if (mp->mutex_rcount == RECURSION_MAX) + return (EAGAIN); + mp->mutex_rcount++; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, + 1, 0); + return (0); + } + DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); + return (EDEADLK); /* LOCK_ERRORCHECK */ + } + + return (slow_lock(self, mp, tsp)); + } + + /* else do it the long way */ + return (mutex_lock_internal(mp, tsp, MUTEX_LOCK)); +} + +#pragma weak _private_mutex_lock = __mutex_lock +#pragma weak mutex_lock = __mutex_lock +#pragma weak _mutex_lock = __mutex_lock +#pragma weak pthread_mutex_lock = __mutex_lock +#pragma weak _pthread_mutex_lock = __mutex_lock +int +__mutex_lock(mutex_t *mp) +{ + ASSERT(!curthread->ul_critical || curthread->ul_bindflags); + return (mutex_lock_impl(mp, NULL)); +} + +#pragma weak pthread_mutex_timedlock = _pthread_mutex_timedlock +int +_pthread_mutex_timedlock(mutex_t *mp, const timespec_t *abstime) +{ + timespec_t tslocal; + int error; + + ASSERT(!curthread->ul_critical || curthread->ul_bindflags); + abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal); + error = mutex_lock_impl(mp, &tslocal); + if (error == ETIME) + error = ETIMEDOUT; + return (error); +} + +#pragma weak pthread_mutex_reltimedlock_np = _pthread_mutex_reltimedlock_np +int +_pthread_mutex_reltimedlock_np(mutex_t *mp, const timespec_t *reltime) +{ + timespec_t tslocal; + int error; + + ASSERT(!curthread->ul_critical || curthread->ul_bindflags); + tslocal = *reltime; + error = mutex_lock_impl(mp, &tslocal); + if (error == ETIME) + error = ETIMEDOUT; + return (error); +} + +static int +slow_trylock(mutex_t *mp, ulwp_t *self) +{ + if (MUTEX_OWNER(mp) == self || + mutex_trylock_adaptive(mp) != 0) { + uberdata_t *udp = self->ul_uberdata; + + if (__td_event_report(self, TD_LOCK_TRY, udp)) { + self->ul_td_evbuf.eventnum = TD_LOCK_TRY; + tdb_event(TD_LOCK_TRY, udp); + } + return (EBUSY); + } + return (0); +} + +#pragma weak _private_mutex_trylock = __mutex_trylock +#pragma weak mutex_trylock = __mutex_trylock +#pragma weak _mutex_trylock = __mutex_trylock +#pragma weak pthread_mutex_trylock = __mutex_trylock +#pragma weak _pthread_mutex_trylock = __mutex_trylock +int +__mutex_trylock(mutex_t *mp) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + uberflags_t *gflags; + int mtype; + + ASSERT(!curthread->ul_critical || curthread->ul_bindflags); + /* + * Optimize the case of USYNC_THREAD, including + * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, + * no error detection, no lock statistics, + * and the process has only a single thread. + * (Most likely a traditional single-threaded application.) + */ + if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | + udp->uberflags.uf_all) == 0) { + /* + * Only one thread exists so we don't need an atomic operation. + */ + if (mp->mutex_lockw == 0) { + mp->mutex_lockw = LOCKSET; + mp->mutex_owner = (uintptr_t)self; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + return (0); + } + if (mtype && MUTEX_OWNER(mp) == self) { + if (mtype & LOCK_RECURSIVE) { + if (mp->mutex_rcount == RECURSION_MAX) + return (EAGAIN); + mp->mutex_rcount++; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, + 1, 0); + return (0); + } + return (EDEADLK); /* LOCK_ERRORCHECK */ + } + return (EBUSY); + } + + /* + * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, + * no error detection, and no lock statistics. + * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. + */ + if ((gflags = self->ul_schedctl_called) != NULL && + (gflags->uf_trs_ted | + (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { + + if (mtype & USYNC_PROCESS) + return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY)); + + if (set_lock_byte(&mp->mutex_lockw) == 0) { + mp->mutex_owner = (uintptr_t)self; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + return (0); + } + + if (mtype && MUTEX_OWNER(mp) == self) { + if (mtype & LOCK_RECURSIVE) { + if (mp->mutex_rcount == RECURSION_MAX) + return (EAGAIN); + mp->mutex_rcount++; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, + 1, 0); + return (0); + } + return (EBUSY); /* LOCK_ERRORCHECK */ + } + + return (slow_trylock(mp, self)); + } + + /* else do it the long way */ + return (mutex_lock_internal(mp, NULL, MUTEX_TRY)); +} + +int +mutex_unlock_internal(mutex_t *mp) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + int mtype = mp->mutex_type; + tdb_mutex_stats_t *msp; + int error; + lwpid_t lwpid; + + if ((mtype & LOCK_ERRORCHECK) && !mutex_is_held(mp)) + return (EPERM); + + if (self->ul_error_detection && !mutex_is_held(mp)) + lock_error(mp, "mutex_unlock", NULL, NULL); + + if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { + mp->mutex_rcount--; + DTRACE_PROBE2(plockstat, mutex__release, mp, 1); + return (0); + } + + if ((msp = MUTEX_STATS(mp, udp)) != NULL) + (void) record_hold_time(msp); + + if (mtype & + (USYNC_PROCESS_ROBUST|PTHREAD_PRIO_INHERIT|PTHREAD_PRIO_PROTECT)) { + no_preempt(self); + mp->mutex_owner = 0; + mp->mutex_ownerpid = 0; + DTRACE_PROBE2(plockstat, mutex__release, mp, 0); + if (mtype & PTHREAD_PRIO_INHERIT) { + mp->mutex_lockw = LOCKCLEAR; + error = ___lwp_mutex_unlock(mp); + } else if (mtype & USYNC_PROCESS_ROBUST) { + error = ___lwp_mutex_unlock(mp); + } else { + if (swap32(&mp->mutex_lockword, 0) & WAITERMASK) + (void) ___lwp_mutex_wakeup(mp); + error = 0; + } + if (mtype & PTHREAD_PRIO_PROTECT) { + if (_ceil_mylist_del(mp)) + _ceil_prio_waive(); + } + preempt(self); + } else if (mtype & USYNC_PROCESS) { + if (mp->mutex_lockword & WAITERMASK) + mutex_unlock_process(mp); + else { + mp->mutex_owner = 0; + mp->mutex_ownerpid = 0; + DTRACE_PROBE2(plockstat, mutex__release, mp, 0); + if (swap32(&mp->mutex_lockword, 0) & WAITERMASK) { + no_preempt(self); + (void) ___lwp_mutex_wakeup(mp); + preempt(self); + } + } + error = 0; + } else { /* USYNC_THREAD */ + if ((lwpid = mutex_unlock_queue(mp)) != 0) { + (void) __lwp_unpark(lwpid); + preempt(self); + } + error = 0; + } + + return (error); +} + +#pragma weak _private_mutex_unlock = __mutex_unlock +#pragma weak mutex_unlock = __mutex_unlock +#pragma weak _mutex_unlock = __mutex_unlock +#pragma weak pthread_mutex_unlock = __mutex_unlock +#pragma weak _pthread_mutex_unlock = __mutex_unlock +int +__mutex_unlock(mutex_t *mp) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + uberflags_t *gflags; + lwpid_t lwpid; + int mtype; + short el; + + /* + * Optimize the case of USYNC_THREAD, including + * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, + * no error detection, no lock statistics, + * and the process has only a single thread. + * (Most likely a traditional single-threaded application.) + */ + if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | + udp->uberflags.uf_all) == 0) { + if (mtype) { + /* + * At this point we know that one or both of the + * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. + */ + if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) + return (EPERM); + if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { + mp->mutex_rcount--; + DTRACE_PROBE2(plockstat, mutex__release, mp, 1); + return (0); + } + } + /* + * Only one thread exists so we don't need an atomic operation. + * Also, there can be no waiters. + */ + mp->mutex_owner = 0; + mp->mutex_lockword = 0; + DTRACE_PROBE2(plockstat, mutex__release, mp, 0); + return (0); + } + + /* + * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, + * no error detection, and no lock statistics. + * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. + */ + if ((gflags = self->ul_schedctl_called) != NULL) { + if (((el = gflags->uf_trs_ted) | mtype) == 0) { +fast_unlock: + if (!(mp->mutex_lockword & WAITERMASK)) { + /* no waiter exists right now */ + mp->mutex_owner = 0; + DTRACE_PROBE2(plockstat, mutex__release, mp, 0); + if (swap32(&mp->mutex_lockword, 0) & + WAITERMASK) { + /* a waiter suddenly appeared */ + no_preempt(self); + if ((lwpid = mutex_wakeup(mp)) != 0) + (void) __lwp_unpark(lwpid); + preempt(self); + } + } else if ((lwpid = mutex_unlock_queue(mp)) != 0) { + (void) __lwp_unpark(lwpid); + preempt(self); + } + return (0); + } + if (el) /* error detection or lock statistics */ + goto slow_unlock; + if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { + /* + * At this point we know that one or both of the + * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. + */ + if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) + return (EPERM); + if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { + mp->mutex_rcount--; + DTRACE_PROBE2(plockstat, mutex__release, mp, 1); + return (0); + } + goto fast_unlock; + } + if ((mtype & + ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { + /* + * At this point we know that zero, one, or both of the + * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and + * that the USYNC_PROCESS flag is set. + */ + if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp)) + return (EPERM); + if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { + mp->mutex_rcount--; + DTRACE_PROBE2(plockstat, mutex__release, mp, 1); + return (0); + } + if (mp->mutex_lockword & WAITERMASK) + mutex_unlock_process(mp); + else { + mp->mutex_owner = 0; + mp->mutex_ownerpid = 0; + DTRACE_PROBE2(plockstat, mutex__release, mp, 0); + if (swap32(&mp->mutex_lockword, 0) & + WAITERMASK) { + no_preempt(self); + (void) ___lwp_mutex_wakeup(mp); + preempt(self); + } + } + return (0); + } + } + + /* else do it the long way */ +slow_unlock: + return (mutex_unlock_internal(mp)); +} + +/* + * Internally to the library, almost all mutex lock/unlock actions + * go through these lmutex_ functions, to protect critical regions. + * We replicate a bit of code from __mutex_lock() and __mutex_unlock() + * to make these functions faster since we know that the mutex type + * of all internal locks is USYNC_THREAD. We also know that internal + * locking can never fail, so we panic if it does. + */ +void +lmutex_lock(mutex_t *mp) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + + ASSERT(mp->mutex_type == USYNC_THREAD); + + enter_critical(self); + /* + * Optimize the case of no lock statistics and only a single thread. + * (Most likely a traditional single-threaded application.) + */ + if (udp->uberflags.uf_all == 0) { + /* + * Only one thread exists; the mutex must be free. + */ + ASSERT(mp->mutex_lockw == 0); + mp->mutex_lockw = LOCKSET; + mp->mutex_owner = (uintptr_t)self; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + } else { + tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); + + if (!self->ul_schedctl_called) + (void) setup_schedctl(); + + if (set_lock_byte(&mp->mutex_lockw) == 0) { + mp->mutex_owner = (uintptr_t)self; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + } else if (mutex_trylock_adaptive(mp) != 0) { + (void) mutex_lock_queue(self, msp, mp, NULL); + } + + if (msp) + record_begin_hold(msp); + } +} + +void +lmutex_unlock(mutex_t *mp) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + + ASSERT(mp->mutex_type == USYNC_THREAD); + + /* + * Optimize the case of no lock statistics and only a single thread. + * (Most likely a traditional single-threaded application.) + */ + if (udp->uberflags.uf_all == 0) { + /* + * Only one thread exists so there can be no waiters. + */ + mp->mutex_owner = 0; + mp->mutex_lockword = 0; + DTRACE_PROBE2(plockstat, mutex__release, mp, 0); + } else { + tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); + lwpid_t lwpid; + + if (msp) + (void) record_hold_time(msp); + if ((lwpid = mutex_unlock_queue(mp)) != 0) { + (void) __lwp_unpark(lwpid); + preempt(self); + } + } + exit_critical(self); +} + +static int +shared_mutex_held(mutex_t *mparg) +{ + /* + * There is an inherent data race in the current ownership design. + * The mutex_owner and mutex_ownerpid fields cannot be set or tested + * atomically as a pair. The original implementation tested each + * field just once. This was exposed to trivial false positives in + * the case of multiple multithreaded processes with thread addresses + * in common. To close the window to an acceptable level we now use a + * sequence of five tests: pid-thr-pid-thr-pid. This ensures that any + * single interruption will still leave one uninterrupted sequence of + * pid-thr-pid tests intact. + * + * It is assumed that all updates are always ordered thr-pid and that + * we have TSO hardware. + */ + volatile mutex_t *mp = (volatile mutex_t *)mparg; + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + + if (mp->mutex_ownerpid != udp->pid) + return (0); + + if (!MUTEX_OWNED(mp, self)) + return (0); + + if (mp->mutex_ownerpid != udp->pid) + return (0); + + if (!MUTEX_OWNED(mp, self)) + return (0); + + if (mp->mutex_ownerpid != udp->pid) + return (0); + + return (1); +} + +/* + * Some crufty old programs define their own version of _mutex_held() + * to be simply return(1). This breaks internal libc logic, so we + * define a private version for exclusive use by libc, mutex_is_held(), + * and also a new public function, __mutex_held(), to be used in new + * code to circumvent these crufty old programs. + */ +#pragma weak mutex_held = mutex_is_held +#pragma weak _mutex_held = mutex_is_held +#pragma weak __mutex_held = mutex_is_held +int +mutex_is_held(mutex_t *mp) +{ + if (mp->mutex_type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) + return (shared_mutex_held(mp)); + return (MUTEX_OWNED(mp, curthread)); +} + +#pragma weak _private_mutex_destroy = __mutex_destroy +#pragma weak mutex_destroy = __mutex_destroy +#pragma weak _mutex_destroy = __mutex_destroy +#pragma weak pthread_mutex_destroy = __mutex_destroy +#pragma weak _pthread_mutex_destroy = __mutex_destroy +int +__mutex_destroy(mutex_t *mp) +{ + mp->mutex_magic = 0; + mp->mutex_flag &= ~LOCK_INITED; + tdb_sync_obj_deregister(mp); + return (0); +} + +/* + * Spin locks are separate from ordinary mutexes, + * but we use the same data structure for them. + */ + +#pragma weak pthread_spin_init = _pthread_spin_init +int +_pthread_spin_init(pthread_spinlock_t *lock, int pshared) +{ + mutex_t *mp = (mutex_t *)lock; + + (void) _memset(mp, 0, sizeof (*mp)); + if (pshared == PTHREAD_PROCESS_SHARED) + mp->mutex_type = USYNC_PROCESS; + else + mp->mutex_type = USYNC_THREAD; + mp->mutex_flag = LOCK_INITED; + mp->mutex_magic = MUTEX_MAGIC; + return (0); +} + +#pragma weak pthread_spin_destroy = _pthread_spin_destroy +int +_pthread_spin_destroy(pthread_spinlock_t *lock) +{ + (void) _memset(lock, 0, sizeof (*lock)); + return (0); +} + +#pragma weak pthread_spin_trylock = _pthread_spin_trylock +int +_pthread_spin_trylock(pthread_spinlock_t *lock) +{ + mutex_t *mp = (mutex_t *)lock; + ulwp_t *self = curthread; + int error = 0; + + no_preempt(self); + if (set_lock_byte(&mp->mutex_lockw) != 0) + error = EBUSY; + else { + mp->mutex_owner = (uintptr_t)self; + if (mp->mutex_type == USYNC_PROCESS) + mp->mutex_ownerpid = self->ul_uberdata->pid; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + } + preempt(self); + return (error); +} + +#pragma weak pthread_spin_lock = _pthread_spin_lock +int +_pthread_spin_lock(pthread_spinlock_t *lock) +{ + volatile uint8_t *lockp = + (volatile uint8_t *)&((mutex_t *)lock)->mutex_lockw; + + ASSERT(!curthread->ul_critical || curthread->ul_bindflags); + /* + * We don't care whether the owner is running on a processor. + * We just spin because that's what this interface requires. + */ + for (;;) { + if (*lockp == 0) { /* lock byte appears to be clear */ + if (_pthread_spin_trylock(lock) == 0) + return (0); + } + SMT_PAUSE(); + } +} + +#pragma weak pthread_spin_unlock = _pthread_spin_unlock +int +_pthread_spin_unlock(pthread_spinlock_t *lock) +{ + mutex_t *mp = (mutex_t *)lock; + ulwp_t *self = curthread; + + no_preempt(self); + mp->mutex_owner = 0; + mp->mutex_ownerpid = 0; + DTRACE_PROBE2(plockstat, mutex__release, mp, 0); + (void) swap32(&mp->mutex_lockword, 0); + preempt(self); + return (0); +} + +#pragma weak cond_init = _cond_init +/* ARGSUSED2 */ +int +_cond_init(cond_t *cvp, int type, void *arg) +{ + if (type != USYNC_THREAD && type != USYNC_PROCESS) + return (EINVAL); + (void) _memset(cvp, 0, sizeof (*cvp)); + cvp->cond_type = (uint16_t)type; + cvp->cond_magic = COND_MAGIC; + return (0); +} + +/* + * cond_sleep_queue(): utility function for cond_wait_queue(). + * + * Go to sleep on a condvar sleep queue, expect to be waked up + * by someone calling cond_signal() or cond_broadcast() or due + * to receiving a UNIX signal or being cancelled, or just simply + * due to a spurious wakeup (like someome calling forkall()). + * + * The associated mutex is *not* reacquired before returning. + * That must be done by the caller of cond_sleep_queue(). + */ +int +cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) +{ + ulwp_t *self = curthread; + queue_head_t *qp; + queue_head_t *mqp; + lwpid_t lwpid; + int signalled; + int error; + + /* + * Put ourself on the CV sleep queue, unlock the mutex, then + * park ourself and unpark a candidate lwp to grab the mutex. + * We must go onto the CV sleep queue before dropping the + * mutex in order to guarantee atomicity of the operation. + */ + self->ul_sp = stkptr(); + qp = queue_lock(cvp, CV); + enqueue(qp, self, cvp, CV); + cvp->cond_waiters_user = 1; + self->ul_cvmutex = mp; + self->ul_cv_wake = (tsp != NULL); + self->ul_signalled = 0; + lwpid = mutex_unlock_queue(mp); + for (;;) { + set_parking_flag(self, 1); + queue_unlock(qp); + if (lwpid != 0) { + lwpid = preempt_unpark(self, lwpid); + preempt(self); + } + /* + * We may have a deferred signal present, + * in which case we should return EINTR. + * Also, we may have received a SIGCANCEL; if so + * and we are cancelable we should return EINTR. + * We force an immediate EINTR return from + * __lwp_park() by turning our parking flag off. + */ + if (self->ul_cursig != 0 || + (self->ul_cancelable && self->ul_cancel_pending)) + set_parking_flag(self, 0); + /* + * __lwp_park() will return the residual time in tsp + * if we are unparked before the timeout expires. + */ + error = __lwp_park(tsp, lwpid); + set_parking_flag(self, 0); + lwpid = 0; /* unpark the other lwp only once */ + /* + * We were waked up by cond_signal(), cond_broadcast(), + * by an interrupt or timeout (EINTR or ETIME), + * or we may just have gotten a spurious wakeup. + */ + qp = queue_lock(cvp, CV); + mqp = queue_lock(mp, MX); + if (self->ul_sleepq == NULL) + break; + /* + * We are on either the condvar sleep queue or the + * mutex sleep queue. If we are on the mutex sleep + * queue, continue sleeping. If we are on the condvar + * sleep queue, break out of the sleep if we were + * interrupted or we timed out (EINTR or ETIME). + * Else this is a spurious wakeup; continue the loop. + */ + if (self->ul_sleepq == mqp) /* mutex queue */ + tsp = NULL; + else if (self->ul_sleepq == qp) { /* condvar queue */ + if (error) { + cvp->cond_waiters_user = dequeue_self(qp, cvp); + break; + } + /* + * Else a spurious wakeup on the condvar queue. + * __lwp_park() has already adjusted the timeout. + */ + } else { + thr_panic("cond_sleep_queue(): thread not on queue"); + } + queue_unlock(mqp); + } + + self->ul_sp = 0; + ASSERT(self->ul_cvmutex == NULL && self->ul_cv_wake == 0); + ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && + self->ul_wchan == NULL); + + signalled = self->ul_signalled; + self->ul_signalled = 0; + queue_unlock(qp); + queue_unlock(mqp); + + /* + * If we were concurrently cond_signal()d and any of: + * received a UNIX signal, were cancelled, or got a timeout, + * then perform another cond_signal() to avoid consuming it. + */ + if (error && signalled) + (void) cond_signal_internal(cvp); + + return (error); +} + +int +cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp, + tdb_mutex_stats_t *msp) +{ + ulwp_t *self = curthread; + int error; + + /* + * The old thread library was programmed to defer signals + * while in cond_wait() so that the associated mutex would + * be guaranteed to be held when the application signal + * handler was invoked. + * + * We do not behave this way by default; the state of the + * associated mutex in the signal handler is undefined. + * + * To accommodate applications that depend on the old + * behavior, the _THREAD_COND_WAIT_DEFER environment + * variable can be set to 1 and we will behave in the + * old way with respect to cond_wait(). + */ + if (self->ul_cond_wait_defer) + sigoff(self); + + error = cond_sleep_queue(cvp, mp, tsp); + + /* + * Reacquire the mutex. + */ + if (set_lock_byte(&mp->mutex_lockw) == 0) { + mp->mutex_owner = (uintptr_t)self; + DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); + } else if (mutex_trylock_adaptive(mp) != 0) { + (void) mutex_lock_queue(self, msp, mp, NULL); + } + + if (msp) + record_begin_hold(msp); + + /* + * Take any deferred signal now, after we have reacquired the mutex. + */ + if (self->ul_cond_wait_defer) + sigon(self); + + return (error); +} + +/* + * cond_sleep_kernel(): utility function for cond_wait_kernel(). + * See the comment ahead of cond_sleep_queue(), above. + */ +int +cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) +{ + int mtype = mp->mutex_type; + ulwp_t *self = curthread; + int error; + + if (mtype & PTHREAD_PRIO_PROTECT) { + if (_ceil_mylist_del(mp)) + _ceil_prio_waive(); + } + + self->ul_sp = stkptr(); + self->ul_wchan = cvp; + mp->mutex_owner = 0; + mp->mutex_ownerpid = 0; + if (mtype & PTHREAD_PRIO_INHERIT) + mp->mutex_lockw = LOCKCLEAR; + /* + * ___lwp_cond_wait() returns immediately with EINTR if + * set_parking_flag(self,0) is called on this lwp before it + * goes to sleep in the kernel. sigacthandler() calls this + * when a deferred signal is noted. This assures that we don't + * get stuck in ___lwp_cond_wait() with all signals blocked + * due to taking a deferred signal before going to sleep. + */ + set_parking_flag(self, 1); + if (self->ul_cursig != 0 || + (self->ul_cancelable && self->ul_cancel_pending)) + set_parking_flag(self, 0); + error = ___lwp_cond_wait(cvp, mp, tsp, 1); + set_parking_flag(self, 0); + self->ul_sp = 0; + self->ul_wchan = NULL; + return (error); +} + +int +cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) +{ + ulwp_t *self = curthread; + int error; + int merror; + + /* + * See the large comment in cond_wait_queue(), above. + */ + if (self->ul_cond_wait_defer) + sigoff(self); + + error = cond_sleep_kernel(cvp, mp, tsp); + + /* + * Override the return code from ___lwp_cond_wait() + * with any non-zero return code from mutex_lock(). + * This addresses robust lock failures in particular; + * the caller must see the EOWNERDEAD or ENOTRECOVERABLE + * errors in order to take corrective action. + */ + if ((merror = _private_mutex_lock(mp)) != 0) + error = merror; + + /* + * Take any deferred signal now, after we have reacquired the mutex. + */ + if (self->ul_cond_wait_defer) + sigon(self); + + return (error); +} + +/* + * Common code for _cond_wait() and _cond_timedwait() + */ +int +cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp) +{ + int mtype = mp->mutex_type; + hrtime_t begin_sleep = 0; + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + tdb_cond_stats_t *csp = COND_STATS(cvp, udp); + tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); + uint8_t rcount; + int error = 0; + + /* + * The SUSV3 Posix spec for pthread_cond_timedwait() states: + * Except in the case of [ETIMEDOUT], all these error checks + * shall act as if they were performed immediately at the + * beginning of processing for the function and shall cause + * an error return, in effect, prior to modifying the state + * of the mutex specified by mutex or the condition variable + * specified by cond. + * Therefore, we must return EINVAL now if the timout is invalid. + */ + if (tsp != NULL && + (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC)) + return (EINVAL); + + if (__td_event_report(self, TD_SLEEP, udp)) { + self->ul_sp = stkptr(); + self->ul_wchan = cvp; + self->ul_td_evbuf.eventnum = TD_SLEEP; + self->ul_td_evbuf.eventdata = cvp; + tdb_event(TD_SLEEP, udp); + self->ul_sp = 0; + } + if (csp) { + if (tsp) + tdb_incr(csp->cond_timedwait); + else + tdb_incr(csp->cond_wait); + } + if (msp) + begin_sleep = record_hold_time(msp); + else if (csp) + begin_sleep = gethrtime(); + + if (self->ul_error_detection) { + if (!mutex_is_held(mp)) + lock_error(mp, "cond_wait", cvp, NULL); + if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) + lock_error(mp, "recursive mutex in cond_wait", + cvp, NULL); + if (cvp->cond_type & USYNC_PROCESS) { + if (!(mtype & (USYNC_PROCESS | USYNC_PROCESS_ROBUST))) + lock_error(mp, "cond_wait", cvp, + "condvar process-shared, " + "mutex process-private"); + } else { + if (mtype & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) + lock_error(mp, "cond_wait", cvp, + "condvar process-private, " + "mutex process-shared"); + } + } + + /* + * We deal with recursive mutexes by completely + * dropping the lock and restoring the recursion + * count after waking up. This is arguably wrong, + * but it obeys the principle of least astonishment. + */ + rcount = mp->mutex_rcount; + mp->mutex_rcount = 0; + if ((mtype & (USYNC_PROCESS | USYNC_PROCESS_ROBUST | + PTHREAD_PRIO_INHERIT | PTHREAD_PRIO_PROTECT)) | + (cvp->cond_type & USYNC_PROCESS)) + error = cond_wait_kernel(cvp, mp, tsp); + else + error = cond_wait_queue(cvp, mp, tsp, msp); + mp->mutex_rcount = rcount; + + if (csp) { + hrtime_t lapse = gethrtime() - begin_sleep; + if (tsp == NULL) + csp->cond_wait_sleep_time += lapse; + else { + csp->cond_timedwait_sleep_time += lapse; + if (error == ETIME) + tdb_incr(csp->cond_timedwait_timeout); + } + } + return (error); +} + +/* + * cond_wait() is a cancellation point but _cond_wait() is not. + * System libraries call the non-cancellation version. + * It is expected that only applications call the cancellation version. + */ +int +_cond_wait(cond_t *cvp, mutex_t *mp) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + uberflags_t *gflags; + + /* + * Optimize the common case of USYNC_THREAD plus + * no error detection, no lock statistics, and no event tracing. + */ + if ((gflags = self->ul_schedctl_called) != NULL && + (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted | + self->ul_td_events_enable | + udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0) + return (cond_wait_queue(cvp, mp, NULL, NULL)); + + /* + * Else do it the long way. + */ + return (cond_wait_common(cvp, mp, NULL)); +} + +int +cond_wait(cond_t *cvp, mutex_t *mp) +{ + int error; + + _cancelon(); + error = _cond_wait(cvp, mp); + if (error == EINTR) + _canceloff(); + else + _canceloff_nocancel(); + return (error); +} + +#pragma weak pthread_cond_wait = _pthread_cond_wait +int +_pthread_cond_wait(cond_t *cvp, mutex_t *mp) +{ + int error; + + error = cond_wait(cvp, mp); + return ((error == EINTR)? 0 : error); +} + +/* + * cond_timedwait() is a cancellation point but _cond_timedwait() is not. + * System libraries call the non-cancellation version. + * It is expected that only applications call the cancellation version. + */ +int +_cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) +{ + clockid_t clock_id = cvp->cond_clockid; + timespec_t reltime; + int error; + + if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES) + clock_id = CLOCK_REALTIME; + abstime_to_reltime(clock_id, abstime, &reltime); + error = cond_wait_common(cvp, mp, &reltime); + if (error == ETIME && clock_id == CLOCK_HIGHRES) { + /* + * Don't return ETIME if we didn't really get a timeout. + * This can happen if we return because someone resets + * the system clock. Just return zero in this case, + * giving a spurious wakeup but not a timeout. + */ + if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC + + abstime->tv_nsec > gethrtime()) + error = 0; + } + return (error); +} + +int +cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) +{ + int error; + + _cancelon(); + error = _cond_timedwait(cvp, mp, abstime); + if (error == EINTR) + _canceloff(); + else + _canceloff_nocancel(); + return (error); +} + +#pragma weak pthread_cond_timedwait = _pthread_cond_timedwait +int +_pthread_cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) +{ + int error; + + error = cond_timedwait(cvp, mp, abstime); + if (error == ETIME) + error = ETIMEDOUT; + else if (error == EINTR) + error = 0; + return (error); +} + +/* + * cond_reltimedwait() is a cancellation point but _cond_reltimedwait() + * is not. System libraries call the non-cancellation version. + * It is expected that only applications call the cancellation version. + */ +int +_cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) +{ + timespec_t tslocal = *reltime; + + return (cond_wait_common(cvp, mp, &tslocal)); +} + +#pragma weak cond_reltimedwait = _cond_reltimedwait_cancel +int +_cond_reltimedwait_cancel(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) +{ + int error; + + _cancelon(); + error = _cond_reltimedwait(cvp, mp, reltime); + if (error == EINTR) + _canceloff(); + else + _canceloff_nocancel(); + return (error); +} + +#pragma weak pthread_cond_reltimedwait_np = _pthread_cond_reltimedwait_np +int +_pthread_cond_reltimedwait_np(cond_t *cvp, mutex_t *mp, + const timespec_t *reltime) +{ + int error; + + error = _cond_reltimedwait_cancel(cvp, mp, reltime); + if (error == ETIME) + error = ETIMEDOUT; + else if (error == EINTR) + error = 0; + return (error); +} + +#pragma weak pthread_cond_signal = cond_signal_internal +#pragma weak _pthread_cond_signal = cond_signal_internal +#pragma weak cond_signal = cond_signal_internal +#pragma weak _cond_signal = cond_signal_internal +int +cond_signal_internal(cond_t *cvp) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + tdb_cond_stats_t *csp = COND_STATS(cvp, udp); + int error = 0; + queue_head_t *qp; + mutex_t *mp; + queue_head_t *mqp; + ulwp_t **ulwpp; + ulwp_t *ulwp; + ulwp_t *prev = NULL; + ulwp_t *next; + ulwp_t **suspp = NULL; + ulwp_t *susprev; + + if (csp) + tdb_incr(csp->cond_signal); + + if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ + error = __lwp_cond_signal(cvp); + + if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ + return (error); + + /* + * Move someone from the condvar sleep queue to the mutex sleep + * queue for the mutex that he will acquire on being waked up. + * We can do this only if we own the mutex he will acquire. + * If we do not own the mutex, or if his ul_cv_wake flag + * is set, just dequeue and unpark him. + */ + qp = queue_lock(cvp, CV); + for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; + prev = ulwp, ulwpp = &ulwp->ul_link) { + if (ulwp->ul_wchan == cvp) { + if (!ulwp->ul_stop) + break; + /* + * Try not to dequeue a suspended thread. + * This mimics the old libthread's behavior. + */ + if (suspp == NULL) { + suspp = ulwpp; + susprev = prev; + } + } + } + if (ulwp == NULL && suspp != NULL) { + ulwp = *(ulwpp = suspp); + prev = susprev; + suspp = NULL; + } + if (ulwp == NULL) { /* no one on the sleep queue */ + cvp->cond_waiters_user = 0; + queue_unlock(qp); + return (error); + } + /* + * Scan the remainder of the CV queue for another waiter. + */ + if (suspp != NULL) { + next = *suspp; + } else { + for (next = ulwp->ul_link; next != NULL; next = next->ul_link) + if (next->ul_wchan == cvp) + break; + } + if (next == NULL) + cvp->cond_waiters_user = 0; + + /* + * Inform the thread that he was the recipient of a cond_signal(). + * This lets him deal with cond_signal() and, concurrently, + * one or more of a cancellation, a UNIX signal, or a timeout. + * These latter conditions must not consume a cond_signal(). + */ + ulwp->ul_signalled = 1; + + /* + * Dequeue the waiter but leave his ul_sleepq non-NULL + * while we move him to the mutex queue so that he can + * deal properly with spurious wakeups. + */ + *ulwpp = ulwp->ul_link; + if (qp->qh_tail == ulwp) + qp->qh_tail = prev; + qp->qh_qlen--; + ulwp->ul_link = NULL; + + mp = ulwp->ul_cvmutex; /* the mutex he will acquire */ + ulwp->ul_cvmutex = NULL; + ASSERT(mp != NULL); + + if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { + lwpid_t lwpid = ulwp->ul_lwpid; + + no_preempt(self); + ulwp->ul_sleepq = NULL; + ulwp->ul_wchan = NULL; + ulwp->ul_cv_wake = 0; + queue_unlock(qp); + (void) __lwp_unpark(lwpid); + preempt(self); + } else { + mqp = queue_lock(mp, MX); + enqueue(mqp, ulwp, mp, MX); + mp->mutex_waiters = 1; + queue_unlock(mqp); + queue_unlock(qp); + } + + return (error); +} + +#define MAXLWPS 128 /* max remembered lwpids before overflow */ +#define NEWLWPS 2048 /* max remembered lwpids at first overflow */ + +#pragma weak pthread_cond_broadcast = cond_broadcast_internal +#pragma weak _pthread_cond_broadcast = cond_broadcast_internal +#pragma weak cond_broadcast = cond_broadcast_internal +#pragma weak _cond_broadcast = cond_broadcast_internal +int +cond_broadcast_internal(cond_t *cvp) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + tdb_cond_stats_t *csp = COND_STATS(cvp, udp); + int error = 0; + queue_head_t *qp; + mutex_t *mp; + queue_head_t *mqp; + mutex_t *mp_cache = NULL; + queue_head_t *mqp_cache = NULL; + ulwp_t **ulwpp; + ulwp_t *ulwp; + ulwp_t *prev = NULL; + lwpid_t buffer[MAXLWPS]; + lwpid_t *lwpid = buffer; + int nlwpid = 0; + int maxlwps = MAXLWPS; + + if (csp) + tdb_incr(csp->cond_broadcast); + + if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ + error = __lwp_cond_broadcast(cvp); + + if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ + return (error); + + /* + * Move everyone from the condvar sleep queue to the mutex sleep + * queue for the mutex that they will acquire on being waked up. + * We can do this only if we own the mutex they will acquire. + * If we do not own the mutex, or if their ul_cv_wake flag + * is set, just dequeue and unpark them. + * + * We keep track of lwpids that are to be unparked in lwpid[]. + * __lwp_unpark_all() is called to unpark all of them after + * they have been removed from the sleep queue and the sleep + * queue lock has been dropped. If we run out of space in our + * on-stack buffer, we need to allocate more but we can't call + * lmalloc() because we are holding a queue lock when the overflow + * occurs and lmalloc() acquires a lock. We can't use alloca() + * either because the application may have allocated a small stack + * and we don't want to overrun the stack. So we use the mmap() + * system call directly since that path acquires no locks. + */ + qp = queue_lock(cvp, CV); + cvp->cond_waiters_user = 0; + ulwpp = &qp->qh_head; + while ((ulwp = *ulwpp) != NULL) { + + if (ulwp->ul_wchan != cvp) { + prev = ulwp; + ulwpp = &ulwp->ul_link; + continue; + } + + *ulwpp = ulwp->ul_link; + if (qp->qh_tail == ulwp) + qp->qh_tail = prev; + qp->qh_qlen--; + ulwp->ul_link = NULL; + + mp = ulwp->ul_cvmutex; /* his mutex */ + ulwp->ul_cvmutex = NULL; + ASSERT(mp != NULL); + + if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { + ulwp->ul_sleepq = NULL; + ulwp->ul_wchan = NULL; + ulwp->ul_cv_wake = 0; + if (nlwpid == maxlwps) { + /* + * Allocate NEWLWPS ids on the first overflow. + * Double the allocation each time after that. + */ + int newlwps = (lwpid == buffer)? NEWLWPS : + 2 * maxlwps; + void *vaddr = _private_mmap(NULL, + newlwps * sizeof (lwpid_t), + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, (off_t)0); + if (vaddr == MAP_FAILED) { + /* + * Let's hope this never happens. + * If it does, then we have a terrible + * thundering herd on our hands. + */ + (void) __lwp_unpark_all(lwpid, nlwpid); + nlwpid = 0; + } else { + (void) _memcpy(vaddr, lwpid, + maxlwps * sizeof (lwpid_t)); + if (lwpid != buffer) + (void) _private_munmap(lwpid, + maxlwps * sizeof (lwpid_t)); + lwpid = vaddr; + maxlwps = newlwps; + } + } + lwpid[nlwpid++] = ulwp->ul_lwpid; + } else { + if (mp != mp_cache) { + if (mqp_cache != NULL) + queue_unlock(mqp_cache); + mqp_cache = queue_lock(mp, MX); + mp_cache = mp; + } + mqp = mqp_cache; + enqueue(mqp, ulwp, mp, MX); + mp->mutex_waiters = 1; + } + } + if (mqp_cache != NULL) + queue_unlock(mqp_cache); + queue_unlock(qp); + if (nlwpid) { + if (nlwpid == 1) + (void) __lwp_unpark(lwpid[0]); + else + (void) __lwp_unpark_all(lwpid, nlwpid); + } + if (lwpid != buffer) + (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); + + return (error); +} + +#pragma weak pthread_cond_destroy = _cond_destroy +#pragma weak _pthread_cond_destroy = _cond_destroy +#pragma weak cond_destroy = _cond_destroy +int +_cond_destroy(cond_t *cvp) +{ + cvp->cond_magic = 0; + tdb_sync_obj_deregister(cvp); + return (0); +} + +#if defined(THREAD_DEBUG) +void +assert_no_libc_locks_held(void) +{ + ASSERT(!curthread->ul_critical || curthread->ul_bindflags); +} +#endif + +/* protected by link_lock */ +uint64_t spin_lock_spin; +uint64_t spin_lock_spin2; +uint64_t spin_lock_sleep; +uint64_t spin_lock_wakeup; + +/* + * Record spin lock statistics. + * Called by a thread exiting itself in thrp_exit(). + * Also called via atexit() from the thread calling + * exit() to do all the other threads as well. + */ +void +record_spin_locks(ulwp_t *ulwp) +{ + spin_lock_spin += ulwp->ul_spin_lock_spin; + spin_lock_spin2 += ulwp->ul_spin_lock_spin2; + spin_lock_sleep += ulwp->ul_spin_lock_sleep; + spin_lock_wakeup += ulwp->ul_spin_lock_wakeup; + ulwp->ul_spin_lock_spin = 0; + ulwp->ul_spin_lock_spin2 = 0; + ulwp->ul_spin_lock_sleep = 0; + ulwp->ul_spin_lock_wakeup = 0; +} + +/* + * atexit function: dump the queue statistics to stderr. + */ +#include <stdio.h> +void +dump_queue_statistics(void) +{ + uberdata_t *udp = curthread->ul_uberdata; + queue_head_t *qp; + int qn; + uint64_t spin_lock_total = 0; + + if (udp->queue_head == NULL || thread_queue_dump == 0) + return; + + if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 || + fprintf(stderr, "queue# lockcount max qlen\n") < 0) + return; + for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) { + if (qp->qh_lockcount == 0) + continue; + spin_lock_total += qp->qh_lockcount; + if (fprintf(stderr, "%5d %12llu%12u\n", qn, + (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0) + return; + } + + if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 || + fprintf(stderr, "queue# lockcount max qlen\n") < 0) + return; + for (qn = 0; qn < QHASHSIZE; qn++, qp++) { + if (qp->qh_lockcount == 0) + continue; + spin_lock_total += qp->qh_lockcount; + if (fprintf(stderr, "%5d %12llu%12u\n", qn, + (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0) + return; + } + + (void) fprintf(stderr, "\n spin_lock_total = %10llu\n", + (u_longlong_t)spin_lock_total); + (void) fprintf(stderr, " spin_lock_spin = %10llu\n", + (u_longlong_t)spin_lock_spin); + (void) fprintf(stderr, " spin_lock_spin2 = %10llu\n", + (u_longlong_t)spin_lock_spin2); + (void) fprintf(stderr, " spin_lock_sleep = %10llu\n", + (u_longlong_t)spin_lock_sleep); + (void) fprintf(stderr, " spin_lock_wakeup = %10llu\n", + (u_longlong_t)spin_lock_wakeup); +} |
