diff options
Diffstat (limited to 'usr/src/uts/common/io/eventfd.c')
-rw-r--r-- | usr/src/uts/common/io/eventfd.c | 414 |
1 files changed, 414 insertions, 0 deletions
diff --git a/usr/src/uts/common/io/eventfd.c b/usr/src/uts/common/io/eventfd.c new file mode 100644 index 0000000000..6683a9ca8e --- /dev/null +++ b/usr/src/uts/common/io/eventfd.c @@ -0,0 +1,414 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2015 Joyent, Inc. All rights reserved. + */ + +/* + * Support for the eventfd facility, a Linux-borne facility for user-generated + * file descriptor-based events. + */ + +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/eventfd.h> +#include <sys/conf.h> +#include <sys/vmem.h> +#include <sys/sysmacros.h> +#include <sys/filio.h> +#include <sys/stat.h> +#include <sys/file.h> + +struct eventfd_state; +typedef struct eventfd_state eventfd_state_t; + +struct eventfd_state { + kmutex_t efd_lock; /* lock protecting state */ + boolean_t efd_semaphore; /* boolean: sema. semantics */ + kcondvar_t efd_cv; /* condvar */ + pollhead_t efd_pollhd; /* poll head */ + uint64_t efd_value; /* value */ + eventfd_state_t *efd_next; /* next state on global list */ +}; + +/* + * Internal global variables. + */ +static kmutex_t eventfd_lock; /* lock protecting state */ +static dev_info_t *eventfd_devi; /* device info */ +static vmem_t *eventfd_minor; /* minor number arena */ +static void *eventfd_softstate; /* softstate pointer */ +static eventfd_state_t *eventfd_state; /* global list of state */ + +/*ARGSUSED*/ +static int +eventfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) +{ + eventfd_state_t *state; + major_t major = getemajor(*devp); + minor_t minor = getminor(*devp); + + if (minor != EVENTFDMNRN_EVENTFD) + return (ENXIO); + + mutex_enter(&eventfd_lock); + + minor = (minor_t)(uintptr_t)vmem_alloc(eventfd_minor, 1, + VM_BESTFIT | VM_SLEEP); + + if (ddi_soft_state_zalloc(eventfd_softstate, minor) != DDI_SUCCESS) { + vmem_free(eventfd_minor, (void *)(uintptr_t)minor, 1); + mutex_exit(&eventfd_lock); + return (NULL); + } + + state = ddi_get_soft_state(eventfd_softstate, minor); + *devp = makedevice(major, minor); + + state->efd_next = eventfd_state; + eventfd_state = state; + + mutex_exit(&eventfd_lock); + + return (0); +} + +/*ARGSUSED*/ +static int +eventfd_read(dev_t dev, uio_t *uio, cred_t *cr) +{ + eventfd_state_t *state; + minor_t minor = getminor(dev); + uint64_t val, oval; + int err; + + if (uio->uio_resid < sizeof (val)) + return (EINVAL); + + state = ddi_get_soft_state(eventfd_softstate, minor); + + mutex_enter(&state->efd_lock); + + while (state->efd_value == 0) { + if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) { + mutex_exit(&state->efd_lock); + return (EAGAIN); + } + + if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) { + mutex_exit(&state->efd_lock); + return (EINTR); + } + } + + /* + * We have a non-zero value and we own the lock; our behavior now + * depends on whether or not EFD_SEMAPHORE was set when the eventfd + * was created. + */ + val = oval = state->efd_value; + + if (state->efd_semaphore) { + state->efd_value--; + val = 1; + } else { + state->efd_value = 0; + } + + err = uiomove(&val, sizeof (val), UIO_READ, uio); + + mutex_exit(&state->efd_lock); + + if (oval == EVENTFD_VALMAX) { + cv_broadcast(&state->efd_cv); + pollwakeup(&state->efd_pollhd, POLLWRNORM | POLLOUT); + } + + return (err); +} + +/*ARGSUSED*/ +static int +eventfd_write(dev_t dev, struct uio *uio, cred_t *credp) +{ + eventfd_state_t *state; + minor_t minor = getminor(dev); + uint64_t val, oval; + int err; + + if (uio->uio_resid < sizeof (val)) + return (EINVAL); + + if ((err = uiomove(&val, sizeof (val), UIO_WRITE, uio)) != 0) + return (err); + + if (val > EVENTFD_VALMAX) + return (EINVAL); + + state = ddi_get_soft_state(eventfd_softstate, minor); + + mutex_enter(&state->efd_lock); + + while (val > EVENTFD_VALMAX - state->efd_value) { + if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) { + mutex_exit(&state->efd_lock); + return (EAGAIN); + } + + if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) { + mutex_exit(&state->efd_lock); + return (EINTR); + } + } + + /* + * We now know that we can add the value without overflowing. + */ + state->efd_value = (oval = state->efd_value) + val; + + mutex_exit(&state->efd_lock); + + if (oval == 0) { + cv_broadcast(&state->efd_cv); + pollwakeup(&state->efd_pollhd, POLLRDNORM | POLLIN); + } + + return (0); +} + +/*ARGSUSED*/ +static int +eventfd_poll(dev_t dev, short events, int anyyet, short *reventsp, + struct pollhead **phpp) +{ + eventfd_state_t *state; + minor_t minor = getminor(dev); + short revents = 0; + + state = ddi_get_soft_state(eventfd_softstate, minor); + + mutex_enter(&state->efd_lock); + + if (state->efd_value > 0) + revents |= POLLRDNORM | POLLIN; + + if (state->efd_value < EVENTFD_VALMAX) + revents |= POLLWRNORM | POLLOUT; + + if (!(*reventsp = revents & events) && !anyyet) + *phpp = &state->efd_pollhd; + + mutex_exit(&state->efd_lock); + + return (0); +} + +/*ARGSUSED*/ +static int +eventfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) +{ + eventfd_state_t *state; + minor_t minor = getminor(dev); + + state = ddi_get_soft_state(eventfd_softstate, minor); + + switch (cmd) { + case EVENTFDIOC_SEMAPHORE: { + mutex_enter(&state->efd_lock); + state->efd_semaphore ^= 1; + mutex_exit(&state->efd_lock); + + return (0); + } + + default: + break; + } + + return (ENOTTY); +} + +/*ARGSUSED*/ +static int +eventfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p) +{ + eventfd_state_t *state, **sp; + minor_t minor = getminor(dev); + + state = ddi_get_soft_state(eventfd_softstate, minor); + + if (state->efd_pollhd.ph_list != NULL) { + pollwakeup(&state->efd_pollhd, POLLERR); + pollhead_clean(&state->efd_pollhd); + } + + mutex_enter(&eventfd_lock); + + /* + * Remove our state from our global list. + */ + for (sp = &eventfd_state; *sp != state; sp = &((*sp)->efd_next)) + VERIFY(*sp != NULL); + + *sp = (*sp)->efd_next; + + ddi_soft_state_free(eventfd_softstate, minor); + vmem_free(eventfd_minor, (void *)(uintptr_t)minor, 1); + + mutex_exit(&eventfd_lock); + + return (0); +} + +/*ARGSUSED*/ +static int +eventfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) +{ + mutex_enter(&eventfd_lock); + + if (ddi_soft_state_init(&eventfd_softstate, + sizeof (eventfd_state_t), 0) != 0) { + cmn_err(CE_NOTE, "/dev/eventfd failed to create soft state"); + mutex_exit(&eventfd_lock); + return (DDI_FAILURE); + } + + if (ddi_create_minor_node(devi, "eventfd", S_IFCHR, + EVENTFDMNRN_EVENTFD, DDI_PSEUDO, NULL) == DDI_FAILURE) { + cmn_err(CE_NOTE, "/dev/eventfd couldn't create minor node"); + ddi_soft_state_fini(&eventfd_softstate); + mutex_exit(&eventfd_lock); + return (DDI_FAILURE); + } + + ddi_report_dev(devi); + eventfd_devi = devi; + + eventfd_minor = vmem_create("eventfd_minor", (void *)EVENTFDMNRN_CLONE, + UINT32_MAX - EVENTFDMNRN_CLONE, 1, NULL, NULL, NULL, 0, + VM_SLEEP | VMC_IDENTIFIER); + + mutex_exit(&eventfd_lock); + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +eventfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + switch (cmd) { + case DDI_DETACH: + break; + + case DDI_SUSPEND: + return (DDI_SUCCESS); + + default: + return (DDI_FAILURE); + } + + mutex_enter(&eventfd_lock); + vmem_destroy(eventfd_minor); + + ddi_remove_minor_node(eventfd_devi, NULL); + eventfd_devi = NULL; + + ddi_soft_state_fini(&eventfd_softstate); + mutex_exit(&eventfd_lock); + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +eventfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) +{ + int error; + + switch (infocmd) { + case DDI_INFO_DEVT2DEVINFO: + *result = (void *)eventfd_devi; + error = DDI_SUCCESS; + break; + case DDI_INFO_DEVT2INSTANCE: + *result = (void *)0; + error = DDI_SUCCESS; + break; + default: + error = DDI_FAILURE; + } + return (error); +} + +static struct cb_ops eventfd_cb_ops = { + eventfd_open, /* open */ + eventfd_close, /* close */ + nulldev, /* strategy */ + nulldev, /* print */ + nodev, /* dump */ + eventfd_read, /* read */ + eventfd_write, /* write */ + eventfd_ioctl, /* ioctl */ + nodev, /* devmap */ + nodev, /* mmap */ + nodev, /* segmap */ + eventfd_poll, /* poll */ + ddi_prop_op, /* cb_prop_op */ + 0, /* streamtab */ + D_NEW | D_MP /* Driver compatibility flag */ +}; + +static struct dev_ops eventfd_ops = { + DEVO_REV, /* devo_rev */ + 0, /* refcnt */ + eventfd_info, /* get_dev_info */ + nulldev, /* identify */ + nulldev, /* probe */ + eventfd_attach, /* attach */ + eventfd_detach, /* detach */ + nodev, /* reset */ + &eventfd_cb_ops, /* driver operations */ + NULL, /* bus operations */ + nodev, /* dev power */ + ddi_quiesce_not_needed, /* quiesce */ +}; + +static struct modldrv modldrv = { + &mod_driverops, /* module type (this is a pseudo driver) */ + "eventfd support", /* name of module */ + &eventfd_ops, /* driver ops */ +}; + +static struct modlinkage modlinkage = { + MODREV_1, + (void *)&modldrv, + NULL +}; + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + return (mod_remove(&modlinkage)); +} |