diff options
Diffstat (limited to 'usr/src/uts/common/io/devpoll.c')
-rw-r--r-- | usr/src/uts/common/io/devpoll.c | 490 |
1 files changed, 444 insertions, 46 deletions
diff --git a/usr/src/uts/common/io/devpoll.c b/usr/src/uts/common/io/devpoll.c index a3fcbbba03..764636e218 100644 --- a/usr/src/uts/common/io/devpoll.c +++ b/usr/src/uts/common/io/devpoll.c @@ -25,6 +25,7 @@ /* * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2015, Joyent, Inc. All rights reserved. */ #include <sys/types.h> @@ -45,6 +46,8 @@ #include <sys/devpoll.h> #include <sys/rctl.h> #include <sys/resource.h> +#include <sys/schedctl.h> +#include <sys/epoll.h> #define RESERVED 1 @@ -237,7 +240,8 @@ dpinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) * stale entries! */ static int -dp_pcache_poll(pollfd_t *pfdp, pollcache_t *pcp, nfds_t nfds, int *fdcntp) +dp_pcache_poll(dp_entry_t *dpep, void *dpbuf, + pollcache_t *pcp, nfds_t nfds, int *fdcntp) { int start, ostart, end; int fdcnt, fd; @@ -247,7 +251,10 @@ dp_pcache_poll(pollfd_t *pfdp, pollcache_t *pcp, nfds_t nfds, int *fdcntp) boolean_t no_wrap; pollhead_t *php; polldat_t *pdp; + pollfd_t *pfdp; + epoll_event_t *epoll; int error = 0; + short mask = POLLRDHUP | POLLWRBAND; ASSERT(MUTEX_HELD(&pcp->pc_lock)); if (pcp->pc_bitmap == NULL) { @@ -257,6 +264,14 @@ dp_pcache_poll(pollfd_t *pfdp, pollcache_t *pcp, nfds_t nfds, int *fdcntp) */ return (error); } + + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + pfdp = NULL; + epoll = (epoll_event_t *)dpbuf; + } else { + pfdp = (pollfd_t *)dpbuf; + epoll = NULL; + } retry: start = ostart = pcp->pc_mapstart; end = pcp->pc_mapend; @@ -316,11 +331,32 @@ repoll: * polling a closed fd. Hope this will remind * user to do a POLLREMOVE. */ - pfdp[fdcnt].fd = fd; - pfdp[fdcnt].revents = POLLNVAL; - fdcnt++; + if (pfdp != NULL) { + pfdp[fdcnt].fd = fd; + pfdp[fdcnt].revents = POLLNVAL; + fdcnt++; + continue; + } + + /* + * In the epoll compatibility case, we actually + * perform the implicit removal to remain + * closer to the epoll semantics. + */ + ASSERT(epoll != NULL); + + pdp->pd_fp = NULL; + pdp->pd_events = 0; + + if (php != NULL) { + pollhead_delete(php, pdp); + pdp->pd_php = NULL; + } + + BT_CLEAR(pcp->pc_bitmap, fd); continue; } + if (fp != pdp->pd_fp) { /* * user is polling on a cached fd which was @@ -376,9 +412,69 @@ repoll: } if (revent != 0) { - pfdp[fdcnt].fd = fd; - pfdp[fdcnt].events = pdp->pd_events; - pfdp[fdcnt].revents = revent; + if (pfdp != NULL) { + pfdp[fdcnt].fd = fd; + pfdp[fdcnt].events = pdp->pd_events; + pfdp[fdcnt].revents = revent; + } else { + epoll_event_t *ep = &epoll[fdcnt]; + + ASSERT(epoll != NULL); + ep->data.u64 = pdp->pd_epolldata; + + /* + * If any of the event bits are set for + * which poll and epoll representations + * differ, swizzle in the native epoll + * values. + */ + if (revent & mask) { + ep->events = (revent & ~mask) | + ((revent & POLLRDHUP) ? + EPOLLRDHUP : 0) | + ((revent & POLLWRBAND) ? + EPOLLWRBAND : 0); + } else { + ep->events = revent; + } + + /* + * We define POLLWRNORM to be POLLOUT, + * but epoll has separate definitions + * for them; if POLLOUT is set and the + * user has asked for EPOLLWRNORM, set + * that as well. + */ + if ((revent & POLLOUT) && + (pdp->pd_events & EPOLLWRNORM)) { + ep->events |= EPOLLWRNORM; + } + } + + /* + * If POLLET is set, clear the bit in the + * bitmap -- which effectively latches the + * edge on a pollwakeup() from the driver. + */ + if (pdp->pd_events & POLLET) + BT_CLEAR(pcp->pc_bitmap, fd); + + /* + * If POLLONESHOT is set, perform the implicit + * POLLREMOVE. + */ + if (pdp->pd_events & POLLONESHOT) { + pdp->pd_fp = NULL; + pdp->pd_events = 0; + + if (php != NULL) { + pollhead_delete(php, pdp); + pdp->pd_php = NULL; + } + + BT_CLEAR(pcp->pc_bitmap, fd); + } + fdcnt++; } else if (php != NULL) { /* @@ -392,7 +488,7 @@ repoll: * in bitmap. */ if ((pdp->pd_php != NULL) && - ((pcp->pc_flag & T_POLLWAKE) == 0)) { + ((pcp->pc_flag & PC_POLLWAKE) == 0)) { BT_CLEAR(pcp->pc_bitmap, fd); } if (pdp->pd_php == NULL) { @@ -473,11 +569,15 @@ dpopen(dev_t *devp, int flag, int otyp, cred_t *credp) /* * allocate a pollcache skeleton here. Delay allocating bitmap * structures until dpwrite() time, since we don't know the - * optimal size yet. + * optimal size yet. We also delay setting the pid until either + * dpwrite() or attempt to poll on the instance, allowing parents + * to create instances of /dev/poll for their children. (In the + * epoll compatibility case, this check isn't performed to maintain + * semantic compatibility.) */ pcp = pcache_alloc(); dpep->dpe_pcache = pcp; - pcp->pc_pid = curproc->p_pid; + pcp->pc_pid = -1; *devp = makedevice(getmajor(*devp), minordev); /* clone the driver */ mutex_enter(&devpoll_lock); ASSERT(minordev < dptblsize); @@ -499,7 +599,9 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) dp_entry_t *dpep; pollcache_t *pcp; pollfd_t *pollfdp, *pfdp; - int error; + dvpoll_epollfd_t *epfdp; + uintptr_t limit; + int error, size; ssize_t uiosize; nfds_t pollfdnum; struct pollhead *php = NULL; @@ -515,11 +617,23 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) ASSERT(dpep != NULL); mutex_exit(&devpoll_lock); pcp = dpep->dpe_pcache; - if (curproc->p_pid != pcp->pc_pid) { - return (EACCES); + + if (!(dpep->dpe_flag & DP_ISEPOLLCOMPAT) && + curproc->p_pid != pcp->pc_pid) { + if (pcp->pc_pid != -1) + return (EACCES); + + pcp->pc_pid = curproc->p_pid; } + + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + size = sizeof (dvpoll_epollfd_t); + } else { + size = sizeof (pollfd_t); + } + uiosize = uiop->uio_resid; - pollfdnum = uiosize / sizeof (pollfd_t); + pollfdnum = uiosize / size; mutex_enter(&curproc->p_lock); if (pollfdnum > (uint_t)rctl_enforced_value( rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc)) { @@ -534,6 +648,7 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) * each polled fd to the cached set. */ pollfdp = kmem_alloc(uiosize, KM_SLEEP); + limit = (uintptr_t)pollfdp + (pollfdnum * size); /* * Although /dev/poll uses the write(2) interface to cache fds, it's @@ -555,9 +670,27 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) mutex_enter(&dpep->dpe_lock); dpep->dpe_writerwait++; while (dpep->dpe_refcnt != 0) { + /* + * We need to do a bit of a dance here: we need to drop + * our dpe_lock and grab the pc_lock to broadcast the pc_cv to + * kick any DP_POLL/DP_PPOLL sleepers. + */ + mutex_exit(&dpep->dpe_lock); + mutex_enter(&pcp->pc_lock); + pcp->pc_flag |= PC_WRITEWANTED; + cv_broadcast(&pcp->pc_cv); + mutex_exit(&pcp->pc_lock); + mutex_enter(&dpep->dpe_lock); + + if (dpep->dpe_refcnt == 0) + break; + if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { dpep->dpe_writerwait--; mutex_exit(&dpep->dpe_lock); + mutex_enter(&pcp->pc_lock); + pcp->pc_flag &= ~PC_WRITEWANTED; + mutex_exit(&pcp->pc_lock); kmem_free(pollfdp, uiosize); return (set_errno(EINTR)); } @@ -565,24 +698,103 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) dpep->dpe_writerwait--; dpep->dpe_flag |= DP_WRITER_PRESENT; dpep->dpe_refcnt++; + mutex_exit(&dpep->dpe_lock); mutex_enter(&pcp->pc_lock); + pcp->pc_flag &= ~PC_WRITEWANTED; + if (pcp->pc_bitmap == NULL) { pcache_create(pcp, pollfdnum); } - for (pfdp = pollfdp; pfdp < pollfdp + pollfdnum; pfdp++) { + for (pfdp = pollfdp; (uintptr_t)pfdp < limit; + pfdp = (pollfd_t *)((uintptr_t)pfdp + size)) { fd = pfdp->fd; - if ((uint_t)fd >= P_FINFO(curproc)->fi_nfiles) + if ((uint_t)fd >= P_FINFO(curproc)->fi_nfiles) { + /* + * epoll semantics demand that we return EBADF if our + * specified fd is invalid. + */ + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + error = EBADF; + break; + } + continue; + } + pdp = pcache_lookup_fd(pcp, fd); if (pfdp->events != POLLREMOVE) { + + fp = NULL; + if (pdp == NULL) { + /* + * If we're in epoll compatibility mode, check + * that the fd is valid before allocating + * anything for it; epoll semantics demand that + * we return EBADF if our specified fd is + * invalid. + */ + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + if ((fp = getf(fd)) == NULL) { + error = EBADF; + break; + } + + /* + * To (greatly) reduce EEXIST false + * positives, we denote that this + * fp has been epoll()'d; see below. + */ + fp->f_flag2 |= FEPOLLED; + } + pdp = pcache_alloc_fd(0); pdp->pd_fd = fd; pdp->pd_pcache = pcp; pcache_insert_fd(pcp, pdp, pollfdnum); + } else { + /* + * epoll semantics demand that we error out if + * a file descriptor is added twice, which we + * check (imperfectly) by checking if we both + * have the file descriptor cached and the + * file pointer that correponds to the file + * descriptor matches our cached value. (If + * there is a pointer mismatch, the file + * descriptor was closed without being removed. + * The converse is clearly not true, however, + * so to narrow the window by which a spurious + * EEXIST may be returned, we also check if + * this fp has been added to an epoll control + * descriptor in the past; if it hasn't, we + * know that this is due to fp reuse -- it's + * not a true EEXIST case. (By performing this + * additional check, we limit the window of + * spurious EEXIST to situations where a single + * file descriptor is being used across two or + * more epoll control descriptors -- and even + * then, the file descriptor must be closed and + * reused in a relatively tight time span.) + */ + if ((dpep->dpe_flag & DP_ISEPOLLCOMPAT) && + pdp->pd_fp != NULL && + (fp = getf(fd)) != NULL && + fp == pdp->pd_fp && + (fp->f_flag2 & FEPOLLED)) { + error = EEXIST; + releasef(fd); + break; + } } + + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + /* LINTED pointer alignment */ + epfdp = (dvpoll_epollfd_t *)pfdp; + pdp->pd_epolldata = epfdp->dpep_data; + } + ASSERT(pdp->pd_fd == fd); ASSERT(pdp->pd_pcache == pcp); if (fd >= pcp->pc_mapsize) { @@ -593,7 +805,7 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) if (fd > pcp->pc_mapend) { pcp->pc_mapend = fd; } - if ((fp = getf(fd)) == NULL) { + if (fp == NULL && (fp = getf(fd)) == NULL) { /* * The fd is not valid. Since we can't pass * this error back in the write() call, set @@ -609,7 +821,7 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) * same poll events. */ if ((pdp->pd_events == pfdp->events) && - (pdp->pd_fp != NULL)) { + (pdp->pd_fp == fp)) { /* * the events are already cached */ @@ -665,7 +877,17 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) } releasef(fd); } else { - if (pdp == NULL) { + if (pdp == NULL || pdp->pd_fp == NULL) { + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + /* + * As with the add case (above), epoll + * semantics demand that we error out + * in this case. + */ + error = ENOENT; + break; + } + continue; } ASSERT(pdp->pd_fd == fd); @@ -690,6 +912,17 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) return (error); } +#define DP_SIGMASK_RESTORE(ksetp) { \ + if (ksetp != NULL) { \ + mutex_enter(&p->p_lock); \ + if (lwp->lwp_cursig == 0) { \ + t->t_hold = lwp->lwp_sigoldmask; \ + t->t_flag &= ~T_TOMASK; \ + } \ + mutex_exit(&p->p_lock); \ + } \ +} + /*ARGSUSED*/ static int dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) @@ -701,7 +934,7 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) int error = 0; STRUCT_DECL(dvpoll, dvpoll); - if (cmd == DP_POLL) { + if (cmd == DP_POLL || cmd == DP_PPOLL) { /* do this now, before we sleep on DP_WRITER_PRESENT */ now = gethrtime(); } @@ -713,10 +946,39 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) mutex_exit(&devpoll_lock); ASSERT(dpep != NULL); pcp = dpep->dpe_pcache; - if (curproc->p_pid != pcp->pc_pid) - return (EACCES); mutex_enter(&dpep->dpe_lock); + + if (cmd == DP_EPOLLCOMPAT) { + if (dpep->dpe_refcnt != 0) { + /* + * We can't turn on epoll compatibility while there + * are outstanding operations. + */ + mutex_exit(&dpep->dpe_lock); + return (EBUSY); + } + + /* + * epoll compatibility is a one-way street: there's no way + * to turn it off for a particular open. + */ + dpep->dpe_flag |= DP_ISEPOLLCOMPAT; + mutex_exit(&dpep->dpe_lock); + + return (0); + } + + if (!(dpep->dpe_flag & DP_ISEPOLLCOMPAT) && + curproc->p_pid != pcp->pc_pid) { + if (pcp->pc_pid != -1) { + mutex_exit(&dpep->dpe_lock); + return (EACCES); + } + + pcp->pc_pid = curproc->p_pid; + } + while ((dpep->dpe_flag & DP_WRITER_PRESENT) || (dpep->dpe_writerwait != 0)) { if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { @@ -729,15 +991,36 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) switch (cmd) { case DP_POLL: + case DP_PPOLL: { pollstate_t *ps; nfds_t nfds; int fdcnt = 0; + size_t size, fdsize, dpsize; hrtime_t deadline = 0; + k_sigset_t *ksetp = NULL; + k_sigset_t kset; + sigset_t set; + kthread_t *t = curthread; + klwp_t *lwp = ttolwp(t); + struct proc *p = ttoproc(curthread); STRUCT_INIT(dvpoll, mode); - error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll), - STRUCT_SIZE(dvpoll)); + + /* + * The dp_setp member is only required/consumed for DP_PPOLL, + * which otherwise uses the same structure as DP_POLL. + */ + if (cmd == DP_POLL) { + dpsize = (uintptr_t)STRUCT_FADDR(dvpoll, dp_setp) - + (uintptr_t)STRUCT_FADDR(dvpoll, dp_fds); + } else { + ASSERT(cmd == DP_PPOLL); + dpsize = STRUCT_SIZE(dvpoll); + } + + error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll), dpsize); + if (error) { DP_REFRELE(dpep); return (EFAULT); @@ -755,6 +1038,52 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) deadline += now; } + if (cmd == DP_PPOLL) { + void *setp = STRUCT_FGETP(dvpoll, dp_setp); + + if (setp != NULL) { + if (copyin(setp, &set, sizeof (set))) { + DP_REFRELE(dpep); + return (EFAULT); + } + + sigutok(&set, &kset); + ksetp = &kset; + + mutex_enter(&p->p_lock); + schedctl_finish_sigblock(t); + lwp->lwp_sigoldmask = t->t_hold; + t->t_hold = *ksetp; + t->t_flag |= T_TOMASK; + + /* + * Like ppoll() with a non-NULL sigset, we'll + * call cv_reltimedwait_sig() just to check for + * signals. This call will return immediately + * with either 0 (signalled) or -1 (no signal). + * There are some conditions whereby we can + * get 0 from cv_reltimedwait_sig() without + * a true signal (e.g., a directed stop), so + * we restore our signal mask in the unlikely + * event that lwp_cursig is 0. + */ + if (!cv_reltimedwait_sig(&t->t_delay_cv, + &p->p_lock, 0, TR_CLOCK_TICK)) { + if (lwp->lwp_cursig == 0) { + t->t_hold = lwp->lwp_sigoldmask; + t->t_flag &= ~T_TOMASK; + } + + mutex_exit(&p->p_lock); + + DP_REFRELE(dpep); + return (EINTR); + } + + mutex_exit(&p->p_lock); + } + } + if ((nfds = STRUCT_FGET(dvpoll, dp_nfds)) == 0) { /* * We are just using DP_POLL to sleep, so @@ -762,17 +1091,29 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) * Do not check for signals if we have a zero timeout. */ DP_REFRELE(dpep); - if (deadline == 0) + if (deadline == 0) { + DP_SIGMASK_RESTORE(ksetp); return (0); + } + mutex_enter(&curthread->t_delay_lock); while ((error = cv_timedwait_sig_hrtime(&curthread->t_delay_cv, &curthread->t_delay_lock, deadline)) > 0) continue; mutex_exit(&curthread->t_delay_lock); + + DP_SIGMASK_RESTORE(ksetp); + return (error == 0 ? EINTR : 0); } + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + size = nfds * (fdsize = sizeof (epoll_event_t)); + } else { + size = nfds * (fdsize = sizeof (pollfd_t)); + } + /* * XXX It would be nice not to have to alloc each time, but it * requires another per thread structure hook. This can be @@ -782,37 +1123,45 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) curthread->t_pollstate = pollstate_create(); ps = curthread->t_pollstate; } - if (ps->ps_dpbufsize < nfds) { - struct proc *p = ttoproc(curthread); + + if (ps->ps_dpbufsize < size) { /* - * The maximum size should be no large than - * current maximum open file count. + * If nfds is larger than twice the current maximum + * open file count, we'll silently clamp it. This + * only limits our exposure to allocating an + * inordinate amount of kernel memory; it doesn't + * otherwise affect the semantics. (We have this + * check at twice the maximum instead of merely the + * maximum because some applications pass an nfds that + * is only slightly larger than their limit.) */ mutex_enter(&p->p_lock); - if (nfds > p->p_fno_ctl) { - mutex_exit(&p->p_lock); - DP_REFRELE(dpep); - return (EINVAL); + if ((nfds >> 1) > p->p_fno_ctl) { + nfds = p->p_fno_ctl; + size = nfds * fdsize; } mutex_exit(&p->p_lock); - kmem_free(ps->ps_dpbuf, sizeof (pollfd_t) * - ps->ps_dpbufsize); - ps->ps_dpbuf = kmem_zalloc(sizeof (pollfd_t) * - nfds, KM_SLEEP); - ps->ps_dpbufsize = nfds; + + if (ps->ps_dpbufsize < size) { + kmem_free(ps->ps_dpbuf, ps->ps_dpbufsize); + ps->ps_dpbuf = kmem_zalloc(size, KM_SLEEP); + ps->ps_dpbufsize = size; + } } mutex_enter(&pcp->pc_lock); for (;;) { - pcp->pc_flag = 0; - error = dp_pcache_poll(ps->ps_dpbuf, pcp, nfds, &fdcnt); + pcp->pc_flag &= ~PC_POLLWAKE; + + error = dp_pcache_poll(dpep, ps->ps_dpbuf, + pcp, nfds, &fdcnt); if (fdcnt > 0 || error != 0) break; /* * A pollwake has happened since we polled cache. */ - if (pcp->pc_flag & T_POLLWAKE) + if (pcp->pc_flag & PC_POLLWAKE) continue; /* @@ -822,8 +1171,40 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) /* immediate timeout; do not check signals */ break; } - error = cv_timedwait_sig_hrtime(&pcp->pc_cv, - &pcp->pc_lock, deadline); + + if (!(pcp->pc_flag & PC_WRITEWANTED)) { + error = cv_timedwait_sig_hrtime(&pcp->pc_cv, + &pcp->pc_lock, deadline); + } else { + error = 1; + } + + if (error > 0 && (pcp->pc_flag & PC_WRITEWANTED)) { + /* + * We've been kicked off of our cv because a + * writer wants in. We're going to drop our + * reference count and then wait until the + * writer is gone -- at which point we'll + * reacquire the pc_lock and call into + * dp_pcache_poll() to get the updated state. + */ + mutex_exit(&pcp->pc_lock); + + mutex_enter(&dpep->dpe_lock); + dpep->dpe_refcnt--; + cv_broadcast(&dpep->dpe_cv); + + while ((dpep->dpe_flag & DP_WRITER_PRESENT) || + (dpep->dpe_writerwait != 0)) { + error = cv_wait_sig_swap(&dpep->dpe_cv, + &dpep->dpe_lock); + } + + dpep->dpe_refcnt++; + mutex_exit(&dpep->dpe_lock); + mutex_enter(&pcp->pc_lock); + } + /* * If we were awakened by a signal or timeout * then break the loop, else poll again. @@ -837,9 +1218,11 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) } mutex_exit(&pcp->pc_lock); + DP_SIGMASK_RESTORE(ksetp); + if (error == 0 && fdcnt > 0) { - if (copyout(ps->ps_dpbuf, STRUCT_FGETP(dvpoll, - dp_fds), sizeof (pollfd_t) * fdcnt)) { + if (copyout(ps->ps_dpbuf, + STRUCT_FGETP(dvpoll, dp_fds), fdcnt * fdsize)) { DP_REFRELE(dpep); return (EFAULT); } @@ -901,10 +1284,25 @@ static int dppoll(dev_t dev, short events, int anyyet, short *reventsp, struct pollhead **phpp) { + minor_t minor; + dp_entry_t *dpep; + + minor = getminor(dev); + + mutex_enter(&devpoll_lock); + dpep = devpolltbl[minor]; + ASSERT(dpep != NULL); + mutex_exit(&devpoll_lock); + /* * Polling on a /dev/poll fd is not fully supported yet. */ - *reventsp = POLLERR; + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + /* no error in epoll compat. mode */ + *reventsp = 0; + } else { + *reventsp = POLLERR; + } return (0); } |