diff options
Diffstat (limited to 'usr/src/uts/common/syscall/poll.c')
-rw-r--r-- | usr/src/uts/common/syscall/poll.c | 298 |
1 files changed, 268 insertions, 30 deletions
diff --git a/usr/src/uts/common/syscall/poll.c b/usr/src/uts/common/syscall/poll.c index c33156a4fc..cc125f127a 100644 --- a/usr/src/uts/common/syscall/poll.c +++ b/usr/src/uts/common/syscall/poll.c @@ -29,7 +29,7 @@ /* * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ /* @@ -77,11 +77,13 @@ static struct { kstat_named_t pollcachehit; /* list matched 100% w/ cached one */ kstat_named_t pollcachephit; /* list matched < 100% w/ cached one */ kstat_named_t pollcachemiss; /* every list entry is dif from cache */ + kstat_named_t pollunlockfail; /* failed to perform pollunlock */ } pollstats = { { "polllistmiss", KSTAT_DATA_UINT64 }, { "pollcachehit", KSTAT_DATA_UINT64 }, { "pollcachephit", KSTAT_DATA_UINT64 }, - { "pollcachemiss", KSTAT_DATA_UINT64 } + { "pollcachemiss", KSTAT_DATA_UINT64 }, + { "pollunlockfail", KSTAT_DATA_UINT64 } }; kstat_named_t *pollstats_ptr = (kstat_named_t *)&pollstats; @@ -96,6 +98,10 @@ struct pplock { static struct pplock plocks[NPHLOCKS]; /* Hash array of pollhead locks */ +/* Contention lock & list for preventing deadlocks in recursive /dev/poll. */ +static kmutex_t pollstate_contenders_lock; +static pollstate_t *pollstate_contenders = NULL; + #ifdef DEBUG static int pollchecksanity(pollstate_t *, nfds_t); static int pollcheckxref(pollstate_t *, int); @@ -223,19 +229,35 @@ static int plist_chkdupfd(file_t *, polldat_t *, pollstate_t *, pollfd_t *, int, * (which hold poll locks on entry to xx_poll(), then acquire foo) * and pollwakeup() threads (which hold foo, then acquire poll locks). * - * pollunlock(void) releases whatever poll locks the current thread holds, - * returning a cookie for use by pollrelock(); + * pollunlock(*cookie) releases whatever poll locks the current thread holds, + * setting a cookie for use by pollrelock(); * * pollrelock(cookie) reacquires previously dropped poll locks; * * polllock(php, mutex) does the common case: pollunlock(), * acquire the problematic mutex, pollrelock(). + * + * If polllock() or pollunlock() return non-zero, it indicates that a recursive + * /dev/poll is in progress and pollcache locks cannot be dropped. Callers + * must handle this by indicating a POLLNVAL in the revents of the VOP_POLL. */ int -pollunlock(void) +pollunlock(int *lockstate) { + pollstate_t *ps = curthread->t_pollstate; pollcache_t *pcp; - int lockstate = 0; + + ASSERT(lockstate != NULL); + + /* + * There is no way to safely perform a pollunlock() while in the depths + * of a recursive /dev/poll operation. + */ + if (ps != NULL && ps->ps_depth > 1) { + ps->ps_flags |= POLLSTATE_ULFAIL; + pollstats.pollunlockfail.value.ui64++; + return (-1); + } /* * t_pollcache is set by /dev/poll and event ports (port_fd.c). @@ -243,45 +265,56 @@ pollunlock(void) * the t_pollcache should be NULL. */ if (curthread->t_pollcache == NULL) - pcp = curthread->t_pollstate->ps_pcache; + pcp = ps->ps_pcache; else pcp = curthread->t_pollcache; - if (mutex_owned(&pcp->pc_lock)) { - lockstate = 1; + if (!mutex_owned(&pcp->pc_lock)) { + *lockstate = 0; + } else { + *lockstate = 1; mutex_exit(&pcp->pc_lock); } - return (lockstate); + return (0); } void pollrelock(int lockstate) { + pollstate_t *ps = curthread->t_pollstate; pollcache_t *pcp; + /* Skip this whole ordeal if the pollcache was not locked to begin */ + if (lockstate == 0) + return; + /* * t_pollcache is set by /dev/poll and event ports (port_fd.c). * If the pollrelock/pollunlock is called as a result of poll(2), * the t_pollcache should be NULL. */ if (curthread->t_pollcache == NULL) - pcp = curthread->t_pollstate->ps_pcache; + pcp = ps->ps_pcache; else pcp = curthread->t_pollcache; - if (lockstate > 0) - mutex_enter(&pcp->pc_lock); + mutex_enter(&pcp->pc_lock); } /* ARGSUSED */ -void +int polllock(pollhead_t *php, kmutex_t *lp) { - if (!mutex_tryenter(lp)) { - int lockstate = pollunlock(); + if (mutex_tryenter(lp) == 0) { + int state; + + if (pollunlock(&state) != 0) { + return (-1); + } mutex_enter(lp); - pollrelock(lockstate); + pollrelock(state); } + return (0); } static int @@ -370,10 +403,7 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) * Need to allocate memory for pollstate before anything because * the mutex and cv are created in this space */ - if ((ps = t->t_pollstate) == NULL) { - t->t_pollstate = pollstate_create(); - ps = t->t_pollstate; - } + ps = pollstate_create(); if (ps->ps_pcache == NULL) ps->ps_pcache = pcache_alloc(); @@ -899,6 +929,7 @@ pollnotify(pollcache_t *pcp, int fd) BT_SET(pcp->pc_bitmap, fd); pcp->pc_flag |= PC_POLLWAKE; cv_broadcast(&pcp->pc_cv); + pcache_wake_parents(pcp); } /* @@ -2221,20 +2252,47 @@ pcache_clean_entry(pollstate_t *ps, int fd) } } +void +pcache_wake_parents(pollcache_t *pcp) +{ + pcachelink_t *pl, *pln; + + ASSERT(MUTEX_HELD(&pcp->pc_lock)); + + for (pl = pcp->pc_parents; pl != NULL; pl = pln) { + mutex_enter(&pl->pcl_lock); + if (pl->pcl_state == PCL_VALID) { + ASSERT(pl->pcl_parent_pc != NULL); + cv_broadcast(&pl->pcl_parent_pc->pc_cv); + } + pln = pl->pcl_parent_next; + mutex_exit(&pl->pcl_lock); + } +} + /* - * This is the first time this thread has ever polled, - * so we have to create its pollstate structure. - * This will persist for the life of the thread, - * until it calls pollcleanup(). + * Initialize thread pollstate structure. + * It will persist for the life of the thread, until it calls pollcleanup(). */ pollstate_t * -pollstate_create(void) +pollstate_create() { - pollstate_t *ps; + pollstate_t *ps = curthread->t_pollstate; - ps = kmem_zalloc(sizeof (pollstate_t), KM_SLEEP); - ps->ps_nsets = POLLFDSETS; - ps->ps_pcacheset = pcacheset_create(ps->ps_nsets); + if (ps == NULL) { + /* + * This is the first time this thread has ever polled, so we + * have to create its pollstate structure. + */ + ps = kmem_zalloc(sizeof (pollstate_t), KM_SLEEP); + ps->ps_nsets = POLLFDSETS; + ps->ps_pcacheset = pcacheset_create(ps->ps_nsets); + curthread->t_pollstate = ps; + } else { + ASSERT(ps->ps_depth == 0); + ASSERT(ps->ps_flags == 0); + ASSERT(ps->ps_pc_stack[0] == 0); + } return (ps); } @@ -2259,6 +2317,186 @@ pollstate_destroy(pollstate_t *ps) kmem_free(ps, sizeof (pollstate_t)); } +static int +pollstate_contend(pollstate_t *ps, pollcache_t *pcp) +{ + pollstate_t *rem, *next; + pollcache_t *desired_pc; + int result = 0, depth_total; + + mutex_enter(&pollstate_contenders_lock); + /* + * There is a small chance that the pollcache of interest became + * available while we were waiting on the contenders lock. + */ + if (mutex_tryenter(&pcp->pc_lock) != 0) { + goto out; + } + + /* + * Walk the list of contended pollstates, searching for evidence of a + * deadlock condition. + */ + depth_total = ps->ps_depth; + desired_pc = pcp; + for (rem = pollstate_contenders; rem != NULL; rem = next) { + int i, j; + next = rem->ps_contend_nextp; + + /* Is this pollstate holding the pollcache of interest? */ + for (i = 0; i < rem->ps_depth; i++) { + if (rem->ps_pc_stack[i] != desired_pc) { + continue; + } + + /* + * The remote pollstate holds the pollcache lock we + * desire. If it is waiting on a pollcache we hold, + * then we can report the obvious deadlock. + */ + ASSERT(rem->ps_contend_pc != NULL); + for (j = 0; j < ps->ps_depth; j++) { + if (rem->ps_contend_pc == ps->ps_pc_stack[j]) { + rem->ps_flags |= POLLSTATE_STALEMATE; + result = -1; + goto out; + } + } + + /* + * The remote pollstate is not blocking on a pollcache + * which would deadlock against us. That pollcache + * may, however, be held by a pollstate which would + * result in a deadlock. + * + * To detect such a condition, we continue walking + * through the list using the pollcache blocking the + * remote thread as our new search target. + * + * Return to the front of pollstate_contenders since it + * is not ordered to guarantee complete dependency + * traversal. The below depth tracking places an upper + * bound on iterations. + */ + desired_pc = rem->ps_contend_pc; + next = pollstate_contenders; + + /* + * The recursion depth of the remote pollstate is used + * to calculate a final depth for the local /dev/poll + * recursion, since those locks will be acquired + * eventually. If that value exceeds the defined + * limit, we can report the failure now instead of + * recursing to that failure depth. + */ + depth_total += (rem->ps_depth - i); + if (depth_total >= POLLMAXDEPTH) { + result = -1; + goto out; + } + } + } + + /* + * No deadlock partner was found. The only course of action is to + * record ourself as a contended pollstate and wait for the pollcache + * mutex to become available. + */ + ps->ps_contend_pc = pcp; + ps->ps_contend_nextp = pollstate_contenders; + ps->ps_contend_pnextp = &pollstate_contenders; + if (pollstate_contenders != NULL) { + pollstate_contenders->ps_contend_pnextp = + &ps->ps_contend_nextp; + } + pollstate_contenders = ps; + + mutex_exit(&pollstate_contenders_lock); + mutex_enter(&pcp->pc_lock); + mutex_enter(&pollstate_contenders_lock); + + /* + * Our acquisition of the pollcache mutex may be due to another thread + * giving up in the face of deadlock with us. If that is the case, + * we too should report the failure. + */ + if ((ps->ps_flags & POLLSTATE_STALEMATE) != 0) { + result = -1; + ps->ps_flags &= ~POLLSTATE_STALEMATE; + mutex_exit(&pcp->pc_lock); + } + + /* Remove ourself from the contenders list. */ + if (ps->ps_contend_nextp != NULL) { + ps->ps_contend_nextp->ps_contend_pnextp = + ps->ps_contend_pnextp; + } + *ps->ps_contend_pnextp = ps->ps_contend_nextp; + ps->ps_contend_pc = NULL; + ps->ps_contend_nextp = NULL; + ps->ps_contend_pnextp = NULL; + +out: + mutex_exit(&pollstate_contenders_lock); + return (result); +} + +int +pollstate_enter(pollcache_t *pcp) +{ + pollstate_t *ps = curthread->t_pollstate; + int i; + + if (ps == NULL) { + /* + * The thread pollstate may not be initialized if VOP_POLL is + * called on a recursion-enabled /dev/poll handle from outside + * the poll() or /dev/poll codepaths. + */ + return (PSE_FAIL_POLLSTATE); + } + if (ps->ps_depth >= POLLMAXDEPTH) { + return (PSE_FAIL_DEPTH); + } + /* + * Check the desired pollcache against pollcaches we already have + * locked. Such a loop is the most simple deadlock scenario. + */ + for (i = 0; i < ps->ps_depth; i++) { + if (ps->ps_pc_stack[i] == pcp) { + return (PSE_FAIL_LOOP); + } + } + ASSERT(ps->ps_pc_stack[i] == NULL); + + if (ps->ps_depth == 0) { + /* Locking initial the pollcache requires no caution */ + mutex_enter(&pcp->pc_lock); + } else if (mutex_tryenter(&pcp->pc_lock) == 0) { + if (pollstate_contend(ps, pcp) != 0) { + /* This pollcache cannot safely be locked. */ + return (PSE_FAIL_DEADLOCK); + } + } + + ps->ps_pc_stack[ps->ps_depth++] = pcp; + return (PSE_SUCCESS); +} + +void +pollstate_exit(pollcache_t *pcp) +{ + pollstate_t *ps = curthread->t_pollstate; + + VERIFY(ps != NULL); + VERIFY(ps->ps_pc_stack[ps->ps_depth - 1] == pcp); + + mutex_exit(&pcp->pc_lock); + ps->ps_pc_stack[--ps->ps_depth] = NULL; + VERIFY(ps->ps_depth >= 0); +} + + /* * We are holding the appropriate uf_lock entering this routine. * Bump up the ps_busy count to prevent the thread from exiting. |