diff options
author | Patrick Mooney <pmooney@pfmooney.com> | 2016-04-28 17:29:34 +0000 |
---|---|---|
committer | Patrick Mooney <pmooney@pfmooney.com> | 2016-05-16 20:12:50 +0000 |
commit | a257e301376666442c2b655cf573c9d3e34b1ed5 (patch) | |
tree | aa6e8bd4f5ee60edcb41cc85028b452ebe7d0031 | |
parent | 853d741a1b53497dd22f8c9cc2d126ba76142275 (diff) | |
download | illumos-joyent-a257e301376666442c2b655cf573c9d3e34b1ed5.tar.gz |
OS-5370 panic in signalfd
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
-rw-r--r-- | usr/src/man/man3c/signalfd.3c | 22 | ||||
-rw-r--r-- | usr/src/uts/common/io/signalfd.c | 484 | ||||
-rw-r--r-- | usr/src/uts/common/sys/signalfd.h | 8 |
3 files changed, 286 insertions, 228 deletions
diff --git a/usr/src/man/man3c/signalfd.3c b/usr/src/man/man3c/signalfd.3c index 43699a50a5..eda57204b7 100644 --- a/usr/src/man/man3c/signalfd.3c +++ b/usr/src/man/man3c/signalfd.3c @@ -8,9 +8,9 @@ .\" source. A copy of the CDDL is also available via the Internet at .\" http://www.illumos.org/license/CDDL. .\" -.\" Copyright 2015, Joyent, Inc. +.\" Copyright 2016, Joyent, Inc. .\" -.Dd "Jun 15, 2015" +.Dd "May 05, 2016" .Dt SIGNALFD 3C .Os .Sh NAME @@ -150,6 +150,20 @@ typedef struct signalfd_siginfo { uint8_t ssi_pad[48]; /* pad size to 128 bytes */ } signalfd_siginfo_t; .Ed +.Sh NOTES +File descriptor duplication during fork presents a challenge to the +.Sy signalfd +facility since signal data and events are dependent on the process from which +they are queried. Its use with caching event systems such as +.Xr epoll 5 , +.Sy /dev/poll , +or +.Xr port_create 3C +can result in undefined behavior if signalfd and polling descriptors are used +together after being shared across a fork. Such restrictions do not apply if +the child only calls +.Xr close 2 +on the descriptors. .Sh RETURN VALUES Upon succesful completion, a file descriptor associated with the instance is returned. Otherwise, -1 is returned and errno is set to indicate the error. @@ -189,4 +203,6 @@ Unable to allocate state for the file descriptor. .Xr sigwait 2 , .Xr sigsetops 3C , .Xr sigwaitinfo 3C , -.Xr signal.h 3HEAD +.Xr signal.h 3HEAD , +.Xr epoll 5 , +.Xr port_create 3C diff --git a/usr/src/uts/common/io/signalfd.c b/usr/src/uts/common/io/signalfd.c index 850f321125..4ab4f36d4e 100644 --- a/usr/src/uts/common/io/signalfd.c +++ b/usr/src/uts/common/io/signalfd.c @@ -10,7 +10,7 @@ */ /* - * Copyright 2015 Joyent, Inc. + * Copyright 2016 Joyent, Inc. */ /* @@ -19,97 +19,73 @@ * * As described on the signalfd(3C) man page, the general idea behind these * file descriptors is that they can be used to synchronously consume signals - * via the read(2) syscall. That capability already exists with the - * sigwaitinfo(3C) function but the key advantage of signalfd is that, because - * it is file descriptor based, poll(2) can be used to determine when signals - * are available to be consumed. + * via the read(2) syscall. While that capability already exists with the + * sigwaitinfo(3C) function, signalfd holds an advantage since it is file + * descriptor based: It is able use the event facilities (poll(2), /dev/poll, + * event ports) to notify interested parties when consumable signals arrive. * - * The general implementation uses signalfd_state to hold both the signal set - * and poll head for an open file descriptor. Because a process can be using - * different sigfds with different signal sets, each signalfd_state poll head - * can be thought of as an independent signal stream and the thread(s) waiting - * on that stream will get poll notification when any signal in the - * corresponding set is received. + * The signalfd lifecycle begins When a process opens /dev/signalfd. A minor + * will be allocated for them along with an associated signalfd_state_t struct. + * It is there where the mask of desired signals resides. * - * The sigfd_proc_state_t struct lives on the proc_t and maintains per-proc - * state for function callbacks and data when the proc needs to do work during - * signal delivery for pollwakeup. + * Reading from the signalfd is straightforward and mimics the kernel behavior + * for sigtimedwait(). Signals continue to live on either the proc's p_sig, or + * thread's t_sig, member. During a read operation, those which match the mask + * are consumed so they are no longer pending. * - * The read side of the implementation is straightforward and mimics the - * kernel behavior for sigtimedwait(). Signals continue to live on either - * the proc's p_sig, or thread's t_sig, member. Read consumes the signal so - * that it is no longer pending. + * The poll side is more complex. Every time a signal is delivered, all of the + * signalfds on the process need to be examined in order to pollwake threads + * waiting for signal arrival. * - * The poll side is more complex since all of the sigfds on the process need - * to be examined every time a signal is delivered to the process in order to - * pollwake any thread waiting in poll for that signal. + * When a thread polling on a signalfd requires a pollhead, several steps must + * be taken to safely ensure the proper result. A sigfd_proc_state_t is + * created for the calling process if it does not yet exist. It is there where + * a list of sigfd_poll_waiter_t structures reside which associate pollheads to + * signalfd_state_t entries. The sigfd_proc_state_t list is walked to find a + * sigfd_poll_waiter_t matching the signalfd_state_t which corresponds to the + * polled resource. If one is found, it is reused. Otherwise a new one is + * created, incrementing the refcount on the signalfd_state_t, and it is added + * to the sigfd_poll_waiter_t list. * - * Because it is likely that a process will only be using one, or a few, sigfds, - * but many total file descriptors, we maintain a list of sigfds which need - * pollwakeup. The list lives on the proc's p_sigfd struct. In this way only - * zero, or a few, of the state structs will need to be examined every time a - * signal is delivered to the process, instead of having to examine all of the - * file descriptors to find the state structs. When a state struct with a - * matching signal set is found then pollwakeup is called. + * The complications imposed by fork(2) are why the pollhead is stored in the + * associated sigfd_poll_waiter_t instead of directly in the signalfd_state_t. + * More than one process can hold a reference to the signalfd at a time but + * arriving signals should wake only process-local pollers. Additionally, + * signalfd_close is called only when the last referencing fd is closed, hiding + * occurrences of preceeding threads which released their references. This + * necessitates reference counting on the signalfd_state_t so it is able to + * persist after close until all poll references have been cleansed. Doing so + * ensures that blocked pollers which hold references to the signalfd_state_t + * will be able to do clean-up after the descriptor itself has been closed. * - * The sigfd_list is self-cleaning; as signalfd_pollwake_cb is called, the list - * will clear out on its own. There is an exit helper (signalfd_exit_helper) - * which cleans up any remaining per-proc state when the process exits. + * When a signal arrives in a process polling on signalfd, signalfd_pollwake_cb + * is called via the pointer in sigfd_proc_state_t. It will walk over the + * sigfd_poll_waiter_t entries present in the list, searching for any + * associated with a signalfd_state_t with a matching signal mask. The + * approach of keeping the poller list in p_sigfd was chosen because a process + * is likely to use few signalfds relative to its total file descriptors. It + * reduces the work required for each received signal. * - * The main complexity with signalfd is the interaction of forking and polling. - * This interaction is complex because now two processes have a fd that - * references the same dev_t (and its associated signalfd_state), but signals - * go to only one of those processes. Also, we don't know when one of the - * processes closes its fd because our 'close' entry point is only called when - * the last fd is closed (which could be by either process). + * The sigfd_list is self-cleaning; as signalfd_pollwake_cb is called, the list + * will clear out on its own. Any remaining per-process state which remains + * will be cleaned up by the exit helper (signalfd_exit_helper). * - * Because the state struct is referenced by both file descriptors, and the - * state struct represents a signal stream needing a pollwakeup, if both - * processes were polling then both processes would get a pollwakeup when a - * signal arrives for either process (that is, the pollhead is associated with - * our dev_t so when a signal arrives the pollwakeup wakes up all waiters). + * The structures associated with signalfd state are designed to operate + * correctly across fork, but there is one caveat that applies. Using + * fork-shared signalfd descriptors in conjuction with fork-shared caching poll + * descriptors (such as /dev/poll or event ports) will result in missed poll + * wake-ups. This is caused by the pollhead identity of signalfd descriptors + * being dependent on the process they are polled from. Because it has a + * thread-local cache, poll(2) is unaffected by this limitation. * - * Fortunately this is not a common problem in practice, but the implementation - * attempts to mitigate unexpected behavior. The typical behavior is that the - * parent has been polling the signalfd (which is why it was open in the first - * place) and the parent might have a pending signalfd_state (with the - * pollhead) on its per-process sigfd_list. After the fork the child will - * simply close that fd (among others) as part of the typical fork/close/exec - * pattern. Because the child will never poll that fd, it will never get any - * state onto its own sigfd_list (the child starts with a null list). The - * intention is that the child sees no pollwakeup activity for signals unless - * it explicitly reinvokes poll on the sigfd. + * Lock ordering: * - * As background, there are two primary polling cases to consider when the - * parent process forks: - * 1) If any thread is blocked in poll(2) then both the parent and child will - * return from the poll syscall with EINTR. This means that if either - * process wants to re-poll on a sigfd then it needs to re-run poll and - * would come back in to the signalfd_poll entry point. The parent would - * already have the dev_t's state on its sigfd_list and the child would not - * have anything there unless it called poll again on its fd. - * 2) If the process is using /dev/poll(7D) then the polling info is being - * cached by the poll device and the process might not currently be blocked - * on anything polling related. A subsequent DP_POLL ioctl will not invoke - * our signalfd_poll entry point again. Because the parent still has its - * sigfd_list setup, an incoming signal will hit our signalfd_pollwake_cb - * entry point, which in turn calls pollwake, and /dev/poll will do the - * right thing on DP_POLL. The child will not have a sigfd_list yet so the - * signal will not cause a pollwakeup. The dp code does its own handling for - * cleaning up its cache. + * 1. signalfd_lock + * 2. signalfd_state_t`sfd_lock * - * This leaves only one odd corner case. If the parent and child both use - * the dup-ed sigfd to poll then when a signal is delivered to either process - * there is no way to determine which one should get the pollwakeup (since - * both processes will be queued on the same signal stream poll head). What - * happens in this case is that both processes will return from poll, but only - * one of them will actually have a signal to read. The other will return - * from read with EAGAIN, or block. This case is actually similar to the - * situation within a single process which got two different sigfd's with the - * same mask (or poll on two fd's that are dup-ed). Both would return from poll - * when a signal arrives but only one read would consume the signal and the - * other read would fail or block. Applications which poll on shared fd's - * cannot assume that a subsequent read will actually obtain data. + * 1. proc_t`p_lock (to walk p_sigfd) + * 2. signalfd_state_t`sfd_lock + * 2a. signalfd_lock (after sfd_lock is dropped, when sfd_count falls to 0) */ #include <sys/ddi.h> @@ -128,114 +104,145 @@ typedef struct signalfd_state signalfd_state_t; struct signalfd_state { - kmutex_t sfd_lock; /* lock protecting state */ - pollhead_t sfd_pollhd; /* poll head */ - k_sigset_t sfd_set; /* signals for this fd */ - signalfd_state_t *sfd_next; /* next state on global list */ + list_node_t sfd_list; /* node in global list */ + kmutex_t sfd_lock; /* protects fields below */ + uint_t sfd_count; /* ref count */ + boolean_t sfd_valid; /* valid while open */ + k_sigset_t sfd_set; /* signals for this fd */ }; +typedef struct sigfd_poll_waiter { + list_node_t spw_list; + signalfd_state_t *spw_state; + pollhead_t spw_pollhd; +} sigfd_poll_waiter_t; + /* - * Internal global variables. + * Protects global state in signalfd_devi, signalfd_minor, signalfd_softstate, + * and signalfd_state (including sfd_list field of members) */ -static kmutex_t signalfd_lock; /* lock protecting state */ +static kmutex_t signalfd_lock; static dev_info_t *signalfd_devi; /* device info */ static id_space_t *signalfd_minor; /* minor number arena */ static void *signalfd_softstate; /* softstate pointer */ -static signalfd_state_t *signalfd_state; /* global list of state */ +static list_t signalfd_state; /* global list of state */ + -/* - * If we don't already have an entry in the proc's list for this state, add one. - */ static void -signalfd_wake_list_add(signalfd_state_t *state) +signalfd_state_enter(signalfd_state_t *state) { - proc_t *p = curproc; - list_t *lst; - sigfd_wake_list_t *wlp; - - ASSERT(MUTEX_HELD(&p->p_lock)); - ASSERT(p->p_sigfd != NULL); + ASSERT(MUTEX_HELD(&state->sfd_lock)); + ASSERT(state->sfd_count > 0); + VERIFY(state->sfd_valid == B_TRUE); - lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list; - for (wlp = list_head(lst); wlp != NULL; wlp = list_next(lst, wlp)) { - if (wlp->sigfd_wl_state == state) - break; - } - - if (wlp == NULL) { - wlp = kmem_zalloc(sizeof (sigfd_wake_list_t), KM_SLEEP); - wlp->sigfd_wl_state = state; - list_insert_head(lst, wlp); - } + state->sfd_count++; } static void -signalfd_wake_rm(list_t *lst, sigfd_wake_list_t *wlp) +signalfd_state_release(signalfd_state_t *state, boolean_t locked) { - list_remove(lst, wlp); - kmem_free(wlp, sizeof (sigfd_wake_list_t)); + ASSERT(MUTEX_HELD(&state->sfd_lock)); + ASSERT(state->sfd_count > 0); + + if (state->sfd_count == 1) { + VERIFY(state->sfd_valid == B_FALSE); + mutex_exit(&state->sfd_lock); + if (locked) { + ASSERT(MUTEX_HELD(&signalfd_lock)); + list_remove(&signalfd_state, state); + } else { + ASSERT(MUTEX_NOT_HELD(&signalfd_lock)); + mutex_enter(&signalfd_lock); + list_remove(&signalfd_state, state); + mutex_exit(&signalfd_lock); + } + kmem_free(state, sizeof (*state)); + return; + } + state->sfd_count--; + mutex_exit(&state->sfd_lock); } -static void -signalfd_wake_list_rm(proc_t *p, signalfd_state_t *state) +static sigfd_poll_waiter_t * +signalfd_wake_list_add(sigfd_proc_state_t *pstate, signalfd_state_t *state) { - sigfd_wake_list_t *wlp; - list_t *lst; + list_t *lst = &pstate->sigfd_list; + sigfd_poll_waiter_t *pw; - ASSERT(MUTEX_HELD(&p->p_lock)); + for (pw = list_head(lst); pw != NULL; pw = list_next(lst, pw)) { + if (pw->spw_state == state) + break; + } - if (p->p_sigfd == NULL) - return; + if (pw == NULL) { + pw = kmem_zalloc(sizeof (*pw), KM_SLEEP); + + mutex_enter(&state->sfd_lock); + signalfd_state_enter(state); + pw->spw_state = state; + mutex_exit(&state->sfd_lock); + list_insert_head(lst, pw); + } + return (pw); +} + +static sigfd_poll_waiter_t * +signalfd_wake_list_rm(sigfd_proc_state_t *pstate, signalfd_state_t *state) +{ + list_t *lst = &pstate->sigfd_list; + sigfd_poll_waiter_t *pw; - lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list; - for (wlp = list_head(lst); wlp != NULL; wlp = list_next(lst, wlp)) { - if (wlp->sigfd_wl_state == state) { - signalfd_wake_rm(lst, wlp); + for (pw = list_head(lst); pw != NULL; pw = list_next(lst, pw)) { + if (pw->spw_state == state) { break; } } - if (list_is_empty(lst)) { - ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb = NULL; - list_destroy(lst); - kmem_free(p->p_sigfd, sizeof (sigfd_proc_state_t)); - p->p_sigfd = NULL; + if (pw != NULL) { + list_remove(lst, pw); + mutex_enter(&state->sfd_lock); + signalfd_state_release(state, B_FALSE); + pw->spw_state = NULL; } + + return (pw); } static void signalfd_wake_list_cleanup(proc_t *p) { - sigfd_wake_list_t *wlp; + sigfd_proc_state_t *pstate = p->p_sigfd; + sigfd_poll_waiter_t *pw; list_t *lst; ASSERT(MUTEX_HELD(&p->p_lock)); + ASSERT(pstate != NULL); + + lst = &pstate->sigfd_list; + while ((pw = list_remove_head(lst)) != NULL) { + signalfd_state_t *state = pw->spw_state; - ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb = NULL; + pw->spw_state = NULL; + mutex_enter(&state->sfd_lock); + signalfd_state_release(state, B_FALSE); - lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list; - while (!list_is_empty(lst)) { - wlp = (sigfd_wake_list_t *)list_remove_head(lst); - kmem_free(wlp, sizeof (sigfd_wake_list_t)); + pollwakeup(&pw->spw_pollhd, POLLERR); + pollhead_clean(&pw->spw_pollhd); + kmem_free(pw, sizeof (*pw)); } + list_destroy(lst); + + p->p_sigfd = NULL; + kmem_free(pstate, sizeof (*pstate)); } static void signalfd_exit_helper(void) { proc_t *p = curproc; - list_t *lst; - - /* This being non-null is the only way we can get here */ - ASSERT(p->p_sigfd != NULL); mutex_enter(&p->p_lock); - lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list; - signalfd_wake_list_cleanup(p); - list_destroy(lst); - kmem_free(p->p_sigfd, sizeof (sigfd_proc_state_t)); - p->p_sigfd = NULL; mutex_exit(&p->p_lock); } @@ -255,35 +262,40 @@ static void signalfd_pollwake_cb(void *arg0, int sig) { proc_t *p = (proc_t *)arg0; + sigfd_proc_state_t *pstate = (sigfd_proc_state_t *)p->p_sigfd; list_t *lst; - sigfd_wake_list_t *wlp; + sigfd_poll_waiter_t *pw; ASSERT(MUTEX_HELD(&p->p_lock)); + ASSERT(pstate != NULL); - if (p->p_sigfd == NULL) - return; - - lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list; - wlp = list_head(lst); - while (wlp != NULL) { - signalfd_state_t *state = wlp->sigfd_wl_state; + lst = &pstate->sigfd_list; + pw = list_head(lst); + while (pw != NULL) { + signalfd_state_t *state = pw->spw_state; + sigfd_poll_waiter_t *next; + short pollev; mutex_enter(&state->sfd_lock); - - if (sigismember(&state->sfd_set, sig) && - state->sfd_pollhd.ph_list != NULL) { - sigfd_wake_list_t *tmp = wlp; - - /* remove it from the list */ - wlp = list_next(lst, wlp); - signalfd_wake_rm(lst, tmp); - - mutex_exit(&state->sfd_lock); - pollwakeup(&state->sfd_pollhd, POLLRDNORM | POLLIN); + if (!state->sfd_valid) { + pollev = POLLERR; + } else if (sigismember(&state->sfd_set, sig)) { + pollev = POLLRDNORM | POLLIN; } else { mutex_exit(&state->sfd_lock); - wlp = list_next(lst, wlp); + pw = list_next(lst, pw); + continue; } + + signalfd_state_release(state, B_FALSE); + pw->spw_state = NULL; + pollwakeup(&pw->spw_pollhd, pollev); + pollhead_clean(&pw->spw_pollhd); + + next = list_next(lst, pw); + list_remove(lst, pw); + kmem_free(pw, sizeof (*pw)); + pw = next; } } @@ -291,7 +303,7 @@ _NOTE(ARGSUSED(1)) static int signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) { - signalfd_state_t *state; + signalfd_state_t *state, **sstate; major_t major = getemajor(*devp); minor_t minor = getminor(*devp); @@ -301,18 +313,20 @@ signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) mutex_enter(&signalfd_lock); minor = (minor_t)id_allocff(signalfd_minor); - if (ddi_soft_state_zalloc(signalfd_softstate, minor) != DDI_SUCCESS) { id_free(signalfd_minor, minor); mutex_exit(&signalfd_lock); return (ENODEV); } - state = ddi_get_soft_state(signalfd_softstate, minor); - *devp = makedevice(major, minor); + state = kmem_zalloc(sizeof (*state), KM_SLEEP); + state->sfd_valid = B_TRUE; + state->sfd_count = 1; + list_insert_head(&signalfd_state, (void *)state); - state->sfd_next = signalfd_state; - signalfd_state = state; + sstate = ddi_get_soft_state(signalfd_softstate, minor); + *sstate = state; + *devp = makedevice(major, minor); mutex_exit(&signalfd_lock); @@ -443,7 +457,7 @@ _NOTE(ARGSUSED(2)) static int signalfd_read(dev_t dev, uio_t *uio, cred_t *cr) { - signalfd_state_t *state; + signalfd_state_t *state, **sstate; minor_t minor = getminor(dev); boolean_t block = B_TRUE; k_sigset_t set; @@ -453,7 +467,8 @@ signalfd_read(dev_t dev, uio_t *uio, cred_t *cr) if (uio->uio_resid < sizeof (signalfd_siginfo_t)) return (EINVAL); - state = ddi_get_soft_state(signalfd_softstate, minor); + sstate = ddi_get_soft_state(signalfd_softstate, minor); + state = *sstate; if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) block = B_FALSE; @@ -466,15 +481,26 @@ signalfd_read(dev_t dev, uio_t *uio, cred_t *cr) return (set_errno(EINVAL)); do { - res = consume_signal(state->sfd_set, uio, block); - if (res == 0) - got_one = B_TRUE; + res = consume_signal(set, uio, block); - /* - * After consuming one signal we won't block trying to consume - * further signals. - */ - block = B_FALSE; + if (res == 0) { + /* + * After consuming one signal, do not block while + * trying to consume more. + */ + got_one = B_TRUE; + block = B_FALSE; + + /* + * Refresh the matching signal set in case it was + * updated during the wait. + */ + mutex_enter(&state->sfd_lock); + set = state->sfd_set; + mutex_exit(&state->sfd_lock); + if (sigisempty(&set)) + break; + } } while (res == 0 && uio->uio_resid >= sizeof (signalfd_siginfo_t)); if (got_one) @@ -503,13 +529,14 @@ static int signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp, struct pollhead **phpp) { - signalfd_state_t *state; + signalfd_state_t *state, **sstate; minor_t minor = getminor(dev); kthread_t *t = curthread; proc_t *p = ttoproc(t); short revents = 0; - state = ddi_get_soft_state(signalfd_softstate, minor); + sstate = ddi_get_soft_state(signalfd_softstate, minor); + state = *sstate; mutex_enter(&state->sfd_lock); @@ -519,39 +546,36 @@ signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp, mutex_exit(&state->sfd_lock); if (!(*reventsp = revents & events) && !anyyet) { - *phpp = &state->sfd_pollhd; + sigfd_proc_state_t *pstate; + sigfd_poll_waiter_t *pw; /* * Enable pollwakeup handling. */ - if (p->p_sigfd == NULL) { - sigfd_proc_state_t *pstate; + mutex_enter(&p->p_lock); + if ((pstate = (sigfd_proc_state_t *)p->p_sigfd) == NULL) { - pstate = kmem_zalloc(sizeof (sigfd_proc_state_t), - KM_SLEEP); + mutex_exit(&p->p_lock); + pstate = kmem_zalloc(sizeof (*pstate), KM_SLEEP); list_create(&pstate->sigfd_list, - sizeof (sigfd_wake_list_t), - offsetof(sigfd_wake_list_t, sigfd_wl_lst)); + sizeof (sigfd_poll_waiter_t), + offsetof(sigfd_poll_waiter_t, spw_list)); + pstate->sigfd_pollwake_cb = signalfd_pollwake_cb; + /* Check again, after blocking for the alloc. */ mutex_enter(&p->p_lock); - /* check again now that we're locked */ if (p->p_sigfd == NULL) { p->p_sigfd = pstate; } else { /* someone beat us to it */ list_destroy(&pstate->sigfd_list); - kmem_free(pstate, sizeof (sigfd_proc_state_t)); + kmem_free(pstate, sizeof (*pstate)); + pstate = p->p_sigfd; } - mutex_exit(&p->p_lock); } - mutex_enter(&p->p_lock); - if (((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb == - NULL) { - ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb = - signalfd_pollwake_cb; - } - signalfd_wake_list_add(state); + pw = signalfd_wake_list_add(pstate, state); + *phpp = &pw->spw_pollhd; mutex_exit(&p->p_lock); } @@ -562,11 +586,12 @@ _NOTE(ARGSUSED(4)) static int signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) { - signalfd_state_t *state; + signalfd_state_t *state, **sstate; minor_t minor = getminor(dev); sigset_t mask; - state = ddi_get_soft_state(signalfd_softstate, minor); + sstate = ddi_get_soft_state(signalfd_softstate, minor); + state = *sstate; switch (cmd) { case SIGNALFDIOC_MASK: @@ -591,33 +616,42 @@ _NOTE(ARGSUSED(1)) static int signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p) { - signalfd_state_t *state, **sp; + signalfd_state_t *state, **sstate; + sigfd_poll_waiter_t *pw = NULL; minor_t minor = getminor(dev); proc_t *p = curproc; - state = ddi_get_soft_state(signalfd_softstate, minor); - - if (state->sfd_pollhd.ph_list != NULL) { - pollwakeup(&state->sfd_pollhd, POLLERR); - pollhead_clean(&state->sfd_pollhd); - } + sstate = ddi_get_soft_state(signalfd_softstate, minor); + state = *sstate; - /* Make sure our state is removed from our proc's pollwake list. */ + /* Make sure state is removed from this proc's pollwake list. */ mutex_enter(&p->p_lock); - signalfd_wake_list_rm(p, state); - mutex_exit(&p->p_lock); + if (p->p_sigfd != NULL) { + sigfd_proc_state_t *pstate = p->p_sigfd; - mutex_enter(&signalfd_lock); + pw = signalfd_wake_list_rm(pstate, state); + if (list_is_empty(&pstate->sigfd_list)) { + signalfd_wake_list_cleanup(p); + } + } + mutex_exit(&p->p_lock); - /* Remove our state from our global list. */ - for (sp = &signalfd_state; *sp != state; sp = &((*sp)->sfd_next)) - VERIFY(*sp != NULL); + if (pw != NULL) { + pollwakeup(&pw->spw_pollhd, POLLERR); + pollhead_clean(&pw->spw_pollhd); + kmem_free(pw, sizeof (*pw)); + } - *sp = (*sp)->sfd_next; + mutex_enter(&signalfd_lock); + *sstate = NULL; ddi_soft_state_free(signalfd_softstate, minor); id_free(signalfd_minor, minor); + mutex_enter(&state->sfd_lock); + state->sfd_valid = B_FALSE; + signalfd_state_release(state, B_TRUE); + mutex_exit(&signalfd_lock); return (0); @@ -639,7 +673,7 @@ signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) } if (ddi_soft_state_init(&signalfd_softstate, - sizeof (signalfd_state_t), 0) != 0) { + sizeof (signalfd_state_t *), 0) != 0) { cmn_err(CE_WARN, "signalfd failed to create soft state"); id_space_destroy(signalfd_minor); mutex_exit(&signalfd_lock); @@ -660,6 +694,9 @@ signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) sigfd_exit_helper = signalfd_exit_helper; + list_create(&signalfd_state, sizeof (signalfd_state_t), + offsetof(signalfd_state_t, sfd_list)); + mutex_exit(&signalfd_lock); return (DDI_SUCCESS); @@ -677,10 +714,19 @@ signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) return (DDI_FAILURE); } - /* list should be empty */ - VERIFY(signalfd_state == NULL); - mutex_enter(&signalfd_lock); + + if (!list_is_empty(&signalfd_state)) { + /* + * There are dangling poll waiters holding signalfd_state_t + * entries on the global list. Detach is not possible until + * they purge themselves. + */ + return (DDI_FAILURE); + mutex_exit(&signalfd_lock); + } + list_destroy(&signalfd_state); + id_space_destroy(signalfd_minor); ddi_remove_minor_node(signalfd_devi, NULL); diff --git a/usr/src/uts/common/sys/signalfd.h b/usr/src/uts/common/sys/signalfd.h index 2661d5a05f..89d0647020 100644 --- a/usr/src/uts/common/sys/signalfd.h +++ b/usr/src/uts/common/sys/signalfd.h @@ -10,7 +10,7 @@ */ /* - * Copyright 2015 Joyent, Inc. + * Copyright 2016 Joyent, Inc. */ /* @@ -75,13 +75,9 @@ extern int signalfd(int, const sigset_t *, int); #define SIGNALFDMNRN_SIGNALFD 0 #define SIGNALFDMNRN_CLONE 1 -typedef struct sigfd_wake_list { - list_node_t sigfd_wl_lst; - void *sigfd_wl_state; -} sigfd_wake_list_t; - /* * This holds the proc_t state for a process which is using signalfd. + * Its presence and contents are protected by p_lock. */ typedef struct sigfd_proc_state { void (*sigfd_pollwake_cb)(void *, int); |