summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr/src/cmd/devfsadm/misc_link.c3
-rw-r--r--usr/src/lib/libc/amd64/Makefile1
-rw-r--r--usr/src/lib/libc/i386/Makefile.com1
-rw-r--r--usr/src/lib/libc/port/mapfile-vers7
-rw-r--r--usr/src/lib/libc/port/sys/eventfd.c67
-rw-r--r--usr/src/lib/libc/sparc/Makefile.com1
-rw-r--r--usr/src/lib/libc/sparcv9/Makefile.com1
-rw-r--r--usr/src/man/man3c/Makefile1
-rw-r--r--usr/src/man/man3c/eventfd.3c184
-rw-r--r--usr/src/man/man5/Makefile1
-rw-r--r--usr/src/man/man5/eventfd.532
-rw-r--r--usr/src/man/man9e/chpoll.9e7
-rw-r--r--usr/src/pkg/manifests/SUNWcs.mf3
-rw-r--r--usr/src/pkg/manifests/system-header.mf1
-rw-r--r--usr/src/pkg/manifests/system-library.man3c.inc1
-rw-r--r--usr/src/pkg/manifests/system-library.man5.inc1
-rw-r--r--usr/src/uts/common/Makefile.files2
-rw-r--r--usr/src/uts/common/io/eventfd.c424
-rw-r--r--usr/src/uts/common/io/eventfd.conf16
-rw-r--r--usr/src/uts/common/sys/Makefile1
-rw-r--r--usr/src/uts/common/sys/eventfd.h68
-rw-r--r--usr/src/uts/intel/Makefile.intel1
-rw-r--r--usr/src/uts/intel/eventfd/Makefile68
-rw-r--r--usr/src/uts/sparc/Makefile.sparc1
-rw-r--r--usr/src/uts/sparc/eventfd/Makefile68
25 files changed, 955 insertions, 6 deletions
diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c
index b7aef8b00d..abb133bc6d 100644
--- a/usr/src/cmd/devfsadm/misc_link.c
+++ b/usr/src/cmd/devfsadm/misc_link.c
@@ -89,6 +89,9 @@ static devfsadm_create_t misc_cbt[] = {
{ "pseudo", "ddi_pseudo", "consms",
TYPE_EXACT | DRV_EXACT, ILEVEL_0, consms
},
+ { "pseudo", "ddi_pseudo", "eventfd",
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
+ },
{ "pseudo", "ddi_pseudo", "rsm",
TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
},
diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile
index 3d649dfe9d..0c1421bbf2 100644
--- a/usr/src/lib/libc/amd64/Makefile
+++ b/usr/src/lib/libc/amd64/Makefile
@@ -867,6 +867,7 @@ PORTSYS= \
execl.o \
execle.o \
execv.o \
+ eventfd.o \
fcntl.o \
getpagesizes.o \
getpeerucred.o \
diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com
index beb568ebf6..9a76280c0a 100644
--- a/usr/src/lib/libc/i386/Makefile.com
+++ b/usr/src/lib/libc/i386/Makefile.com
@@ -903,6 +903,7 @@ PORTSYS= \
chmod.o \
chown.o \
corectl.o \
+ eventfd.o \
exacctsys.o \
execl.o \
execle.o \
diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers
index e133407665..c4571ef2f1 100644
--- a/usr/src/lib/libc/port/mapfile-vers
+++ b/usr/src/lib/libc/port/mapfile-vers
@@ -93,6 +93,13 @@ $if _x86 && _ELF64
$add amd64
$endif
+SYMBOL_VERSION ILLUMOS_0.13 { # eventfd
+ protected:
+ eventfd;
+ eventfd_read;
+ eventfd_write;
+} ILLUMOS_0.12;
+
SYMBOL_VERSION ILLUMOS_0.12 { # arc4random and friends
protected:
arc4random;
diff --git a/usr/src/lib/libc/port/sys/eventfd.c b/usr/src/lib/libc/port/sys/eventfd.c
new file mode 100644
index 0000000000..f165491cc1
--- /dev/null
+++ b/usr/src/lib/libc/port/sys/eventfd.c
@@ -0,0 +1,67 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/eventfd.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+
+int
+eventfd(unsigned int initval, int flags)
+{
+ int oflags = O_RDWR;
+ uint64_t val = initval;
+ int fd;
+
+ if (flags & ~(EFD_NONBLOCK | EFD_CLOEXEC | EFD_SEMAPHORE)) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (flags & EFD_NONBLOCK)
+ oflags |= O_NONBLOCK;
+
+ if (flags & EFD_CLOEXEC)
+ oflags |= O_CLOEXEC;
+
+ if ((fd = open("/dev/eventfd", oflags)) < 0)
+ return (-1);
+
+ if ((flags & EFD_SEMAPHORE) &&
+ ioctl(fd, EVENTFDIOC_SEMAPHORE, 0) != 0) {
+ (void) close(fd);
+ return (-1);
+ }
+
+ if (write(fd, &val, sizeof (val)) < sizeof (val)) {
+ (void) close(fd);
+ return (-1);
+ }
+
+ return (fd);
+}
+
+int
+eventfd_read(int fd, eventfd_t *valp)
+{
+ return (read(fd, valp, sizeof (*valp)) < sizeof (*valp) ? -1 : 0);
+}
+
+int
+eventfd_write(int fd, eventfd_t val)
+{
+ return (write(fd, &val, sizeof (val)) < sizeof (val) ? -1 : 0);
+}
diff --git a/usr/src/lib/libc/sparc/Makefile.com b/usr/src/lib/libc/sparc/Makefile.com
index 2991bb2d4d..3856c5332c 100644
--- a/usr/src/lib/libc/sparc/Makefile.com
+++ b/usr/src/lib/libc/sparc/Makefile.com
@@ -937,6 +937,7 @@ PORTSYS= \
chmod.o \
chown.o \
corectl.o \
+ eventfd.o \
exacctsys.o \
execl.o \
execle.o \
diff --git a/usr/src/lib/libc/sparcv9/Makefile.com b/usr/src/lib/libc/sparcv9/Makefile.com
index 255121cab6..1a65ab7680 100644
--- a/usr/src/lib/libc/sparcv9/Makefile.com
+++ b/usr/src/lib/libc/sparcv9/Makefile.com
@@ -881,6 +881,7 @@ PORTSYS= \
chmod.o \
chown.o \
corectl.o \
+ eventfd.o \
exacctsys.o \
execl.o \
execle.o \
diff --git a/usr/src/man/man3c/Makefile b/usr/src/man/man3c/Makefile
index 9e7f7fbff5..a31d024d15 100644
--- a/usr/src/man/man3c/Makefile
+++ b/usr/src/man/man3c/Makefile
@@ -112,6 +112,7 @@ MANFILES= __fbufsize.3c \
end.3c \
err.3c \
euclen.3c \
+ eventfd.3c \
exit.3c \
fattach.3c \
fclose.3c \
diff --git a/usr/src/man/man3c/eventfd.3c b/usr/src/man/man3c/eventfd.3c
new file mode 100644
index 0000000000..3a9f8be284
--- /dev/null
+++ b/usr/src/man/man3c/eventfd.3c
@@ -0,0 +1,184 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved.
+.\"
+.Dd Dec 3, 2014
+.Dt EVENTFD 3C
+.Os
+.Sh NAME
+.Nm eventfd
+.Nd create a file descriptor for event notification
+.Sh SYNOPSIS
+.In sys/eventfd.h
+.Ft int
+.Fo eventfd
+.Fa "unsigned int initval"
+.Fa "int flags"
+.Fc
+.Sh DESCRIPTION
+The
+.Fn eventfd
+function creates an
+.Xr eventfd 5
+instance that has an associated 64-bit unsigned counter. It returns a file
+descriptor that can be operated upon via
+.Xr read 2 ,
+.Xr write 2
+and the facilities that notify of file descriptor activity (e.g.,
+.Xr poll 2 ,
+.Xr port_get 3C ,
+.Xr epoll_wait 3C Ns ).
+To dispose of the instance,
+.Xr close 2
+should be called on the file descriptor.
+.Pp
+The
+.Fa initval
+argument specifies the initial value of the 64-bit counter associated with the
+instance. (Note that this limits the initial value to be a 32-bit quantity
+despite the fact that the underlying counter is 64-bit.)
+.Pp
+The \fIflags\fR argument specifies additional parameters for the
+instance, and can have any of the following values:
+.Bl -hang -width Ds
+.It Sy EFD_CLOEXEC
+.Bd -filled -compact
+Instance will be closed upon an
+.Xr exec 2 ;
+see
+.Xr open 2 Ns 's
+description of
+.Sy O_CLOEXEC .
+.Ed
+.It Sy EFD_NONBLOCK
+.Bd -filled -compact
+Instance will be set to be non-blocking. A
+.Xr read 2
+on an
+.Sy eventfd
+instance that has been initialized with
+.Sy EFD_NONBLOCK
+will return
+.Sy EAGAIN
+in lieu of blocking if the count associated with the instance is zero.
+.Ed
+.It EFD_SEMAPHORE
+.Bd -filled -compact
+Provide counting semaphore semantics whereby a
+.Xr read 2
+will atomically decrement rather than atomically clear the count when it
+becomes non-zero. See below for details on
+.Xr read 2
+semantics.
+.Ed
+.El
+.Pp
+The following operations can be performed upon an
+.Sy eventfd
+instance:
+.Bl -hang -width Ds
+.It Sy read(2)
+.Bd -filled -compact
+Atomically reads and modifies the value of the 64-bit counter associated
+with the instance. The precise semantics
+of
+.Xr read 2
+depend on the disposition of
+.Sy EFD_SEMAPHORE
+with
+respect to the instance: if
+.Sy EFD_SEMAPTHORE
+was set when the instance was created,
+.Xr read 2
+will
+.Em atomically decrement
+the counter if (and when) it is non-zero, copying the value 1 to the eight
+byte buffer passed to the system call; if
+.Sy EFD_SEMAPHORE
+was not set,
+.Xr read 2
+will
+.Em atomically clear
+the counter if (and when) it is non-zero, copying the former value of the
+counter to the eight byte buffer passed to the
+system call. In either case,
+.Xr read 2
+will block if the counter is
+zero (or return
+.Sy EAGAIN
+if the instance was created with
+.Sy EFD_NONBLOCK Ns ).
+If the buffer specified to
+.Xr read 2
+is less than
+eight bytes in length,
+.Sy EINVAL
+will be returned.
+.Ed
+.It Sy write(2)
+.Bd -filled -compact
+Atomically adds the 64-bit value pointed to by the buffer to the 64-bit
+counter associated with the instance. If the resulting value would overflow,
+the
+.Xr write 2
+will block until the value would not overflow
+(or return
+.Sy EAGAIN
+EAGAIN if the instance was created with
+.Sy EFD_NONBLOCK Ns ).
+If the buffer specified to
+.Xr write 2
+is less than eight bytes in length,
+.Sy EINVAL
+will be returned.
+.Ed
+.It Sy poll(2), port_get(3C), epoll_wait(3C)
+.Bd -filled -compact
+Provide notification when the 64-bit counter associated
+with the instance is ready for reading or writing, as specified.
+If the 64-bit value associated with the instance is non-zero,
+.Sy POLLIN
+and
+.Sy POLLRDNORM
+will be set; if the value 1 can be added the value
+without blocking,
+.Sy POLLOUT
+and
+.Sy POLLWRNORM
+will be set.
+.Ed
+.El
+.Sh RETURN VALUES
+Upon succesful completion, a file descriptor associated with the instance
+is returned. Otherwise,
+.Sy -1 is returned and
+.Sy errno
+is set to indicate the error.
+.Sh ERRORS
+The
+.Fn eventfd
+function will fail if:
+.Bl -tag -width Er
+.It Er EINVAL
+The
+.Fa flags
+are invalid.
+.It Er EMFILE
+There are currently
+.Pf { Sy OPEN_MAX Ns }
+file descriptors open in the calling process.
+.El
+.Sh SEE ALSO
+.Xr poll 2 ,
+.Xr port_get 3C ,
+.Xr epoll_wait 3C ,
+.Xr eventfd 5
diff --git a/usr/src/man/man5/Makefile b/usr/src/man/man5/Makefile
index 84d3f6aab6..7c928f3473 100644
--- a/usr/src/man/man5/Makefile
+++ b/usr/src/man/man5/Makefile
@@ -43,6 +43,7 @@ MANFILES= Intro.5 \
environ.5 \
eqn.5 \
eqnchar.5 \
+ eventfd.5 \
extendedFILE.5 \
filesystem.5 \
fnmatch.5 \
diff --git a/usr/src/man/man5/eventfd.5 b/usr/src/man/man5/eventfd.5
new file mode 100644
index 0000000000..bcf9f58347
--- /dev/null
+++ b/usr/src/man/man5/eventfd.5
@@ -0,0 +1,32 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved.
+.\"
+.Dd Dec 3, 2014
+.Dt EVENTFD 5
+.Os
+.Sh NAME
+.Nm eventfd
+.Nd Linux-compatible user event notification facility
+.Sh SYNOPSIS
+.In sys/eventfd.h
+.Sh DESCRIPTION
+.Nm
+is a Linux-borne facility for sending and receiving user
+events via a file descriptor. While the facility itself is somewhat dubious
+(it can be mimicked in an entirely portable way with a pipe), it is
+small and straightforward and this implementation is entirely compatible
+with its Linux antecedent; see
+.Xr eventfd 3C
+for details.
+.Sh SEE ALSO
+.Xr eventfd 3C
diff --git a/usr/src/man/man9e/chpoll.9e b/usr/src/man/man9e/chpoll.9e
index a2adaf7a9c..27fe2a20e9 100644
--- a/usr/src/man/man9e/chpoll.9e
+++ b/usr/src/man/man9e/chpoll.9e
@@ -22,11 +22,9 @@ chpoll \- poll entry point for a non-STREAMS character driver
.fi
.SH INTERFACE LEVEL
-.sp
.LP
This entry point is optional. Architecture independent level 1 (DDI/DKI).
.SH PARAMETERS
-.sp
.ne 2
.na
\fB\fIdev\fR\fR
@@ -156,7 +154,6 @@ A pointer to a pointer to a \fBpollhead\fR structure.
.RE
.SH DESCRIPTION
-.sp
.LP
The \fBchpoll()\fR entry point routine is used by non-STREAMS character device
drivers that wish to support polling. The driver must implement the polling
@@ -170,7 +167,7 @@ called:
.sp
.in +2
.nf
-if (events_are_satisfied_now) {
+if (specified_events_are_satisfied_now) {
*reventsp = satisfied_events & events;
} else {
*reventsp = 0;
@@ -203,12 +200,10 @@ hold any mutex across the call to \fBpollwakeup\fR(9F) that is acquired in its
\fBchpoll()\fR entry point, or a deadlock may result.
.RE
.SH RETURN VALUES
-.sp
.LP
\fBchpoll()\fR should return \fB0\fR for success, or the appropriate error
number.
.SH SEE ALSO
-.sp
.LP
\fBpoll\fR(2), \fBnochpoll\fR(9F), \fBpollwakeup\fR(9F)
.sp
diff --git a/usr/src/pkg/manifests/SUNWcs.mf b/usr/src/pkg/manifests/SUNWcs.mf
index 2a0a5486a5..71bd1cefdf 100644
--- a/usr/src/pkg/manifests/SUNWcs.mf
+++ b/usr/src/pkg/manifests/SUNWcs.mf
@@ -861,6 +861,7 @@ file path=usr/has/man/man1has/ex.1has
file path=usr/has/man/man1has/sh.1has
file path=usr/has/man/man1has/vi.1has
file path=usr/kernel/drv/$(ARCH64)/dump group=sys
+file path=usr/kernel/drv/$(ARCH64)/eventfd group=sys
file path=usr/kernel/drv/$(ARCH64)/fssnap group=sys
file path=usr/kernel/drv/$(ARCH64)/kstat group=sys
file path=usr/kernel/drv/$(ARCH64)/ksyms group=sys
@@ -869,6 +870,8 @@ file path=usr/kernel/drv/$(ARCH64)/ptm group=sys
file path=usr/kernel/drv/$(ARCH64)/pts group=sys
$(i386_ONLY)file path=usr/kernel/drv/dump group=sys
file path=usr/kernel/drv/dump.conf group=sys
+$(i386_ONLY)file path=usr/kernel/drv/eventfd group=sys
+file path=usr/kernel/drv/eventfd.conf group=sys
$(i386_ONLY)file path=usr/kernel/drv/fssnap group=sys
file path=usr/kernel/drv/fssnap.conf group=sys
$(i386_ONLY)file path=usr/kernel/drv/kstat group=sys
diff --git a/usr/src/pkg/manifests/system-header.mf b/usr/src/pkg/manifests/system-header.mf
index eba786e9d0..18d7611554 100644
--- a/usr/src/pkg/manifests/system-header.mf
+++ b/usr/src/pkg/manifests/system-header.mf
@@ -958,6 +958,7 @@ file path=usr/include/sys/esunddi.h
file path=usr/include/sys/ethernet.h
file path=usr/include/sys/euc.h
file path=usr/include/sys/eucioctl.h
+file path=usr/include/sys/eventfd.h
file path=usr/include/sys/exacct.h
file path=usr/include/sys/exacct_catalog.h
file path=usr/include/sys/exacct_impl.h
diff --git a/usr/src/pkg/manifests/system-library.man3c.inc b/usr/src/pkg/manifests/system-library.man3c.inc
index ee9934f801..85b54e1430 100644
--- a/usr/src/pkg/manifests/system-library.man3c.inc
+++ b/usr/src/pkg/manifests/system-library.man3c.inc
@@ -107,6 +107,7 @@ file path=usr/share/man/man3c/encrypt.3c
file path=usr/share/man/man3c/end.3c
file path=usr/share/man/man3c/err.3c
file path=usr/share/man/man3c/euclen.3c
+file path=usr/share/man/man3c/eventfd.3c
file path=usr/share/man/man3c/exit.3c
file path=usr/share/man/man3c/fattach.3c
file path=usr/share/man/man3c/fclose.3c
diff --git a/usr/src/pkg/manifests/system-library.man5.inc b/usr/src/pkg/manifests/system-library.man5.inc
index 1344b918e2..fd222bcec4 100644
--- a/usr/src/pkg/manifests/system-library.man5.inc
+++ b/usr/src/pkg/manifests/system-library.man5.inc
@@ -27,6 +27,7 @@ file path=usr/share/man/man5/crypt_sha512.5
file path=usr/share/man/man5/crypt_sunmd5.5
file path=usr/share/man/man5/crypt_unix.5
file path=usr/share/man/man5/environ.5
+file path=usr/share/man/man5/eventfd.5
file path=usr/share/man/man5/extendedFILE.5
file path=usr/share/man/man5/fnmatch.5
file path=usr/share/man/man5/isalist.5
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 9c833b684a..39135c3a6e 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -987,6 +987,8 @@ DEVPOLL_OBJS += devpoll.o
DEVPOOL_OBJS += devpool.o
+EVENTFD_OBJS += eventfd.o
+
I8042_OBJS += i8042.o
KB8042_OBJS += \
diff --git a/usr/src/uts/common/io/eventfd.c b/usr/src/uts/common/io/eventfd.c
new file mode 100644
index 0000000000..e5082b49b6
--- /dev/null
+++ b/usr/src/uts/common/io/eventfd.c
@@ -0,0 +1,424 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Support for the eventfd facility, a Linux-borne facility for user-generated
+ * file descriptor-based events.
+ */
+
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/eventfd.h>
+#include <sys/conf.h>
+#include <sys/vmem.h>
+#include <sys/sysmacros.h>
+#include <sys/filio.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+
+struct eventfd_state;
+typedef struct eventfd_state eventfd_state_t;
+
+struct eventfd_state {
+ kmutex_t efd_lock; /* lock protecting state */
+ boolean_t efd_semaphore; /* boolean: sema. semantics */
+ kcondvar_t efd_cv; /* condvar */
+ pollhead_t efd_pollhd; /* poll head */
+ uint64_t efd_value; /* value */
+ eventfd_state_t *efd_next; /* next state on global list */
+};
+
+/*
+ * Internal global variables.
+ */
+static kmutex_t eventfd_lock; /* lock protecting state */
+static dev_info_t *eventfd_devi; /* device info */
+static vmem_t *eventfd_minor; /* minor number arena */
+static void *eventfd_softstate; /* softstate pointer */
+static eventfd_state_t *eventfd_state; /* global list of state */
+
+/*ARGSUSED*/
+static int
+eventfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
+{
+ eventfd_state_t *state;
+ major_t major = getemajor(*devp);
+ minor_t minor = getminor(*devp);
+
+ if (minor != EVENTFDMNRN_EVENTFD)
+ return (ENXIO);
+
+ mutex_enter(&eventfd_lock);
+
+ minor = (minor_t)(uintptr_t)vmem_alloc(eventfd_minor, 1,
+ VM_BESTFIT | VM_SLEEP);
+
+ if (ddi_soft_state_zalloc(eventfd_softstate, minor) != DDI_SUCCESS) {
+ vmem_free(eventfd_minor, (void *)(uintptr_t)minor, 1);
+ mutex_exit(&eventfd_lock);
+ return (NULL);
+ }
+
+ state = ddi_get_soft_state(eventfd_softstate, minor);
+ *devp = makedevice(major, minor);
+
+ state->efd_next = eventfd_state;
+ eventfd_state = state;
+
+ mutex_exit(&eventfd_lock);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+eventfd_read(dev_t dev, uio_t *uio, cred_t *cr)
+{
+ eventfd_state_t *state;
+ minor_t minor = getminor(dev);
+ uint64_t val, oval;
+ int err;
+
+ if (uio->uio_resid < sizeof (val))
+ return (EINVAL);
+
+ state = ddi_get_soft_state(eventfd_softstate, minor);
+
+ mutex_enter(&state->efd_lock);
+
+ while (state->efd_value == 0) {
+ if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
+ mutex_exit(&state->efd_lock);
+ return (EAGAIN);
+ }
+
+ if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) {
+ mutex_exit(&state->efd_lock);
+ return (EINTR);
+ }
+ }
+
+ /*
+ * We have a non-zero value and we own the lock; our behavior now
+ * depends on whether or not EFD_SEMAPHORE was set when the eventfd
+ * was created.
+ */
+ val = oval = state->efd_value;
+
+ if (state->efd_semaphore) {
+ state->efd_value--;
+ val = 1;
+ } else {
+ state->efd_value = 0;
+ }
+
+ err = uiomove(&val, sizeof (val), UIO_READ, uio);
+
+ mutex_exit(&state->efd_lock);
+
+ if (oval == EVENTFD_VALMAX) {
+ cv_broadcast(&state->efd_cv);
+ pollwakeup(&state->efd_pollhd, POLLWRNORM | POLLOUT);
+ }
+
+ return (err);
+}
+
+/*ARGSUSED*/
+static int
+eventfd_write(dev_t dev, struct uio *uio, cred_t *credp)
+{
+ eventfd_state_t *state;
+ minor_t minor = getminor(dev);
+ uint64_t val, oval;
+ int err;
+
+ if (uio->uio_resid < sizeof (val))
+ return (EINVAL);
+
+ if ((err = uiomove(&val, sizeof (val), UIO_WRITE, uio)) != 0)
+ return (err);
+
+ if (val > EVENTFD_VALMAX)
+ return (EINVAL);
+
+ state = ddi_get_soft_state(eventfd_softstate, minor);
+
+ mutex_enter(&state->efd_lock);
+
+ while (val > EVENTFD_VALMAX - state->efd_value) {
+ if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
+ mutex_exit(&state->efd_lock);
+ return (EAGAIN);
+ }
+
+ if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) {
+ mutex_exit(&state->efd_lock);
+ return (EINTR);
+ }
+ }
+
+ /*
+ * We now know that we can add the value without overflowing.
+ */
+ state->efd_value = (oval = state->efd_value) + val;
+
+ mutex_exit(&state->efd_lock);
+
+ if (oval == 0) {
+ cv_broadcast(&state->efd_cv);
+ pollwakeup(&state->efd_pollhd, POLLRDNORM | POLLIN);
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+eventfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ eventfd_state_t *state;
+ minor_t minor = getminor(dev);
+ short revents = 0;
+
+ state = ddi_get_soft_state(eventfd_softstate, minor);
+
+ mutex_enter(&state->efd_lock);
+
+ if (state->efd_value > 0)
+ revents |= POLLRDNORM | POLLIN;
+
+ if (state->efd_value < EVENTFD_VALMAX)
+ revents |= POLLWRNORM | POLLOUT;
+
+ if (!(*reventsp = revents & events) && !anyyet)
+ *phpp = &state->efd_pollhd;
+
+ mutex_exit(&state->efd_lock);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+eventfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
+{
+ eventfd_state_t *state;
+ minor_t minor = getminor(dev);
+
+ state = ddi_get_soft_state(eventfd_softstate, minor);
+
+ switch (cmd) {
+ case EVENTFDIOC_SEMAPHORE: {
+ mutex_enter(&state->efd_lock);
+ state->efd_semaphore ^= 1;
+ mutex_exit(&state->efd_lock);
+
+ return (0);
+ }
+
+ default:
+ break;
+ }
+
+ return (ENOTTY);
+}
+
+/*ARGSUSED*/
+static int
+eventfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
+{
+ eventfd_state_t *state, **sp;
+ minor_t minor = getminor(dev);
+
+ state = ddi_get_soft_state(eventfd_softstate, minor);
+
+ if (state->efd_pollhd.ph_list != NULL) {
+ pollwakeup(&state->efd_pollhd, POLLERR);
+ pollhead_clean(&state->efd_pollhd);
+ }
+
+ mutex_enter(&eventfd_lock);
+
+ /*
+ * Remove our state from our global list.
+ */
+ for (sp = &eventfd_state; *sp != state; sp = &((*sp)->efd_next))
+ VERIFY(*sp != NULL);
+
+ *sp = (*sp)->efd_next;
+
+ ddi_soft_state_free(eventfd_softstate, minor);
+ vmem_free(eventfd_minor, (void *)(uintptr_t)minor, 1);
+
+ mutex_exit(&eventfd_lock);
+
+ return (0);
+}
+
+static int
+eventfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
+{
+ switch (cmd) {
+ case DDI_ATTACH:
+ break;
+
+ case DDI_RESUME:
+ return (DDI_SUCCESS);
+
+ default:
+ return (DDI_FAILURE);
+ }
+
+ mutex_enter(&eventfd_lock);
+
+ if (ddi_soft_state_init(&eventfd_softstate,
+ sizeof (eventfd_state_t), 0) != 0) {
+ cmn_err(CE_NOTE, "/dev/eventfd failed to create soft state");
+ mutex_exit(&eventfd_lock);
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_create_minor_node(devi, "eventfd", S_IFCHR,
+ EVENTFDMNRN_EVENTFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
+ cmn_err(CE_NOTE, "/dev/eventfd couldn't create minor node");
+ ddi_soft_state_fini(&eventfd_softstate);
+ mutex_exit(&eventfd_lock);
+ return (DDI_FAILURE);
+ }
+
+ ddi_report_dev(devi);
+ eventfd_devi = devi;
+
+ eventfd_minor = vmem_create("eventfd_minor", (void *)EVENTFDMNRN_CLONE,
+ UINT32_MAX - EVENTFDMNRN_CLONE, 1, NULL, NULL, NULL, 0,
+ VM_SLEEP | VMC_IDENTIFIER);
+
+ mutex_exit(&eventfd_lock);
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+eventfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ switch (cmd) {
+ case DDI_DETACH:
+ break;
+
+ case DDI_SUSPEND:
+ return (DDI_SUCCESS);
+
+ default:
+ return (DDI_FAILURE);
+ }
+
+ mutex_enter(&eventfd_lock);
+ vmem_destroy(eventfd_minor);
+
+ ddi_remove_minor_node(eventfd_devi, NULL);
+ eventfd_devi = NULL;
+
+ ddi_soft_state_fini(&eventfd_softstate);
+ mutex_exit(&eventfd_lock);
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+eventfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
+{
+ int error;
+
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *result = (void *)eventfd_devi;
+ error = DDI_SUCCESS;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)0;
+ error = DDI_SUCCESS;
+ break;
+ default:
+ error = DDI_FAILURE;
+ }
+ return (error);
+}
+
+static struct cb_ops eventfd_cb_ops = {
+ eventfd_open, /* open */
+ eventfd_close, /* close */
+ nulldev, /* strategy */
+ nulldev, /* print */
+ nodev, /* dump */
+ eventfd_read, /* read */
+ eventfd_write, /* write */
+ eventfd_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ eventfd_poll, /* poll */
+ ddi_prop_op, /* cb_prop_op */
+ 0, /* streamtab */
+ D_NEW | D_MP /* Driver compatibility flag */
+};
+
+static struct dev_ops eventfd_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* refcnt */
+ eventfd_info, /* get_dev_info */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ eventfd_attach, /* attach */
+ eventfd_detach, /* detach */
+ nodev, /* reset */
+ &eventfd_cb_ops, /* driver operations */
+ NULL, /* bus operations */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* module type (this is a pseudo driver) */
+ "eventfd support", /* name of module */
+ &eventfd_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
diff --git a/usr/src/uts/common/io/eventfd.conf b/usr/src/uts/common/io/eventfd.conf
new file mode 100644
index 0000000000..f9c6dc11b2
--- /dev/null
+++ b/usr/src/uts/common/io/eventfd.conf
@@ -0,0 +1,16 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+name="eventfd" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index c99ef8accb..fe76386127 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -219,6 +219,7 @@ CHKHDRS= \
ethernet.h \
euc.h \
eucioctl.h \
+ eventfd.h \
exacct.h \
exacct_catalog.h \
exacct_impl.h \
diff --git a/usr/src/uts/common/sys/eventfd.h b/usr/src/uts/common/sys/eventfd.h
new file mode 100644
index 0000000000..1b0d961b0b
--- /dev/null
+++ b/usr/src/uts/common/sys/eventfd.h
@@ -0,0 +1,68 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Header file to support for the eventfd facility. Note that this facility
+ * is designed to be binary compatible with the Linux eventfd facility; values
+ * for constants here should therefore exactly match those found in Linux, and
+ * this facility shouldn't be extended independently of Linux.
+ */
+
+#ifndef _SYS_EVENTFD_H
+#define _SYS_EVENTFD_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef uint64_t eventfd_t;
+
+/*
+ * To assure binary compatibility with Linux, these values are fixed at their
+ * Linux equivalents, not their native ones.
+ */
+#define EFD_CLOEXEC 02000000 /* LX_O_CLOEXEC */
+#define EFD_NONBLOCK 04000 /* LX_O_NONBLOCK */
+#define EFD_SEMAPHORE 1
+
+/*
+ * These ioctl values are specific to the native implementation; applications
+ * shouldn't be using them directly, and they should therefore be safe to
+ * change without breaking apps.
+ */
+#define EVENTFDIOC (('e' << 24) | ('f' << 16) | ('d' << 8))
+#define EVENTFDIOC_SEMAPHORE (EVENTFDIOC | 1) /* toggle sem state */
+
+#ifndef _KERNEL
+
+extern int eventfd(unsigned int, int);
+extern int eventfd_read(int, eventfd_t *);
+extern int eventfd_write(int, eventfd_t);
+
+#else
+
+#define EVENTFDMNRN_EVENTFD 0
+#define EVENTFDMNRN_CLONE 1
+#define EVENTFD_VALMAX (ULLONG_MAX - 1ULL)
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_EVENTFD_H */
diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel
index 06ca811f41..3809a6c45e 100644
--- a/usr/src/uts/intel/Makefile.intel
+++ b/usr/src/uts/intel/Makefile.intel
@@ -239,6 +239,7 @@ DRV_KMODS_32 += dnet
DRV_KMODS += dump
DRV_KMODS += ecpp
DRV_KMODS += emlxs
+DRV_KMODS += eventfd
DRV_KMODS += fd
DRV_KMODS += fdc
DRV_KMODS += fm
diff --git a/usr/src/uts/intel/eventfd/Makefile b/usr/src/uts/intel/eventfd/Makefile
new file mode 100644
index 0000000000..6edff0931a
--- /dev/null
+++ b/usr/src/uts/intel/eventfd/Makefile
@@ -0,0 +1,68 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = eventfd
+OBJECTS = $(EVENTFD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(EVENTFD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+CERRWARN += -_gcc=-Wno-parentheses
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/sparc/Makefile.sparc b/usr/src/uts/sparc/Makefile.sparc
index e0913ce81f..a42fa0be04 100644
--- a/usr/src/uts/sparc/Makefile.sparc
+++ b/usr/src/uts/sparc/Makefile.sparc
@@ -236,6 +236,7 @@ DRV_KMODS += nulldriver
DRV_KMODS += bridge trill
DRV_KMODS += bpf
DRV_KMODS += dca
+DRV_KMODS += eventfd
#
# Hardware Drivers in common space
diff --git a/usr/src/uts/sparc/eventfd/Makefile b/usr/src/uts/sparc/eventfd/Makefile
new file mode 100644
index 0000000000..063aa29b78
--- /dev/null
+++ b/usr/src/uts/sparc/eventfd/Makefile
@@ -0,0 +1,68 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = eventfd
+OBJECTS = $(EVENTFD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(EVENTFD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sparc/Makefile.sparc
+
+CERRWARN += -_gcc=-Wno-parentheses
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/sparc/Makefile.targ