summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2015-06-15 20:09:10 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2015-06-15 20:09:10 +0000
commit83b20b0e7fcfb37f08095fad70d955aece09b8f4 (patch)
tree33219e80479c1f1ed7669e47eea9f04f202d1249
parentb6b487396033090d1de5616ac688feb1734b7204 (diff)
downloadillumos-joyent-83b20b0e7fcfb37f08095fad70d955aece09b8f4.tar.gz
OS-4406 native signalfd support
-rw-r--r--manifest4
-rw-r--r--usr/src/cmd/devfsadm/misc_link.c3
-rw-r--r--usr/src/lib/libc/amd64/Makefile1
-rw-r--r--usr/src/lib/libc/i386/Makefile.com1
-rw-r--r--usr/src/lib/libc/port/mapfile-vers1
-rw-r--r--usr/src/lib/libc/port/sys/signalfd.c60
-rw-r--r--usr/src/lib/libc/sparc/Makefile.com1
-rw-r--r--usr/src/man/man3c/Makefile1
-rw-r--r--usr/src/man/man3c/signalfd.3c192
-rw-r--r--usr/src/uts/common/Makefile.files2
-rw-r--r--usr/src/uts/common/io/signalfd.c762
-rw-r--r--usr/src/uts/common/io/signalfd.conf16
-rw-r--r--usr/src/uts/common/os/exit.c8
-rw-r--r--usr/src/uts/common/os/fork.c8
-rw-r--r--usr/src/uts/common/os/sig.c17
-rw-r--r--usr/src/uts/common/sys/Makefile1
-rw-r--r--usr/src/uts/common/sys/proc.h2
-rw-r--r--usr/src/uts/common/sys/signalfd.h101
-rw-r--r--usr/src/uts/common/sys/thread.h2
-rw-r--r--usr/src/uts/intel/Makefile.intel1
-rw-r--r--usr/src/uts/intel/signalfd/Makefile68
-rw-r--r--usr/src/uts/sparc/Makefile.sparc1
-rw-r--r--usr/src/uts/sparc/signalfd/Makefile68
23 files changed, 1320 insertions, 1 deletions
diff --git a/manifest b/manifest
index fd04ed0ebf..1c6e3a09c8 100644
--- a/manifest
+++ b/manifest
@@ -4196,6 +4196,7 @@ f usr/include/sys/shm_impl.h 0644 root bin
f usr/include/sys/sid.h 0644 root bin
f usr/include/sys/siginfo.h 0644 root bin
f usr/include/sys/signal.h 0644 root bin
+f usr/include/sys/signalfd.h 0644 root bin
f usr/include/sys/sleepq.h 0644 root bin
f usr/include/sys/smbios.h 0644 root bin
f usr/include/sys/smbios_impl.h 0644 root bin
@@ -4529,6 +4530,7 @@ f usr/kernel/drv/amd64/pm 0755 root sys
f usr/kernel/drv/amd64/pool 0755 root sys
f usr/kernel/drv/amd64/ptm 0755 root sys
f usr/kernel/drv/amd64/pts 0755 root sys
+f usr/kernel/drv/amd64/signalfd 0755 root sys
f usr/kernel/drv/amd64/smbsrv 0755 root sys
f usr/kernel/drv/amd64/sppp 0755 root sys
f usr/kernel/drv/amd64/sppptun 0755 root sys
@@ -4553,6 +4555,7 @@ f usr/kernel/drv/pm.conf 0644 root sys
f usr/kernel/drv/pool.conf 0644 root sys
f usr/kernel/drv/ptm.conf 0644 root sys
f usr/kernel/drv/pts.conf 0644 root sys
+f usr/kernel/drv/signalfd.conf 0644 root sys
f usr/kernel/drv/smbsrv.conf 0644 root sys
f usr/kernel/drv/sppp.conf 0644 root sys
f usr/kernel/drv/sppptun.conf 0644 root sys
@@ -13513,6 +13516,7 @@ f usr/share/man/man3c/siginterrupt.3c 0444 root bin
s usr/share/man/man3c/sigismember.3c=sigsetops.3c
s usr/share/man/man3c/siglongjmp.3c=setjmp.3c
f usr/share/man/man3c/signal.3c 0444 root bin
+f usr/share/man/man3c/signalfd.3c 0444 root bin
s usr/share/man/man3c/sigpause.3c=signal.3c
f usr/share/man/man3c/sigqueue.3c 0444 root bin
s usr/share/man/man3c/sigrelse.3c=signal.3c
diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c
index fa55eb401f..55aff1e4f7 100644
--- a/usr/src/cmd/devfsadm/misc_link.c
+++ b/usr/src/cmd/devfsadm/misc_link.c
@@ -94,6 +94,9 @@ static devfsadm_create_t misc_cbt[] = {
{ "pseudo", "ddi_pseudo", "eventfd",
TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
},
+ { "pseudo", "ddi_pseudo", "signalfd",
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
+ },
{ "pseudo", "ddi_pseudo", "rsm",
TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
},
diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile
index dc86e1a197..af70e66123 100644
--- a/usr/src/lib/libc/amd64/Makefile
+++ b/usr/src/lib/libc/amd64/Makefile
@@ -904,6 +904,7 @@ PORTSYS= \
sidsys.o \
siginterrupt.o \
signal.o \
+ signalfd.o \
sigpending.o \
sigstack.o \
stat.o \
diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com
index 9896614023..241afc45cb 100644
--- a/usr/src/lib/libc/i386/Makefile.com
+++ b/usr/src/lib/libc/i386/Makefile.com
@@ -944,6 +944,7 @@ PORTSYS= \
sidsys.o \
siginterrupt.o \
signal.o \
+ signalfd.o \
sigpending.o \
sigstack.o \
stat.o \
diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers
index 56f372fe80..296e49fe3b 100644
--- a/usr/src/lib/libc/port/mapfile-vers
+++ b/usr/src/lib/libc/port/mapfile-vers
@@ -3003,6 +3003,7 @@ $endif
_so_shutdown;
_so_socket;
_so_socketpair;
+ signalfd;
str2group;
str2passwd;
str2spwd;
diff --git a/usr/src/lib/libc/port/sys/signalfd.c b/usr/src/lib/libc/port/sys/signalfd.c
new file mode 100644
index 0000000000..0080c52bdf
--- /dev/null
+++ b/usr/src/lib/libc/port/sys/signalfd.c
@@ -0,0 +1,60 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015, Joyent, Inc.
+ */
+
+#include <sys/signalfd.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+
+int
+signalfd(int fd, const sigset_t *mask, int flags)
+{
+ int origfd = fd;
+
+ if (fd == -1) {
+ int oflags = O_RDONLY;
+
+ if (flags & ~(SFD_NONBLOCK | SFD_CLOEXEC)) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (flags & SFD_NONBLOCK)
+ oflags |= O_NONBLOCK;
+
+ if (flags & SFD_CLOEXEC)
+ oflags |= O_CLOEXEC;
+
+ if ((fd = open("/dev/signalfd", oflags)) < 0)
+ return (-1);
+ }
+
+ if (ioctl(fd, SIGNALFDIOC_MASK, mask) != 0) {
+ if (origfd == -1) {
+ int old = errno;
+ (void) close(fd);
+ errno = old;
+ }
+ /*
+ * Trying to modify an existing sigfd so if this failed
+ * it's because it's not a valid fd or not a sigfd. ioctl
+ * returns the correct errno for these cases.
+ */
+ return (-1);
+ }
+
+ return (fd);
+}
diff --git a/usr/src/lib/libc/sparc/Makefile.com b/usr/src/lib/libc/sparc/Makefile.com
index 1e54306c85..2091b1bd3c 100644
--- a/usr/src/lib/libc/sparc/Makefile.com
+++ b/usr/src/lib/libc/sparc/Makefile.com
@@ -978,6 +978,7 @@ PORTSYS= \
sidsys.o \
siginterrupt.o \
signal.o \
+ signalfd.o \
sigpending.o \
sigstack.o \
stat.o \
diff --git a/usr/src/man/man3c/Makefile b/usr/src/man/man3c/Makefile
index 47b3014510..975c56e921 100644
--- a/usr/src/man/man3c/Makefile
+++ b/usr/src/man/man3c/Makefile
@@ -415,6 +415,7 @@ MANFILES= __fbufsize.3c \
sigfpe.3c \
siginterrupt.3c \
signal.3c \
+ signalfd.3c \
sigqueue.3c \
sigsetops.3c \
sigstack.3c \
diff --git a/usr/src/man/man3c/signalfd.3c b/usr/src/man/man3c/signalfd.3c
new file mode 100644
index 0000000000..43699a50a5
--- /dev/null
+++ b/usr/src/man/man3c/signalfd.3c
@@ -0,0 +1,192 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\" Copyright 2015, Joyent, Inc.
+.\"
+.Dd "Jun 15, 2015"
+.Dt SIGNALFD 3C
+.Os
+.Sh NAME
+.Nm signalfd
+.Nd create or modify a file descriptor for signal handling
+.Sh SYNOPSIS
+.In sys/signalfd.h
+.
+.Ft int
+.Fo signalfd
+.Fa "int fd"
+.Fa "const sigset_t *mask"
+.Fa "int flags"
+.Fc
+.
+.Sh DESCRIPTION
+The
+.Fn signalfd
+function returns a file descriptor that can be used
+for synchronous consumption of signals. The file descriptor can be operated
+upon via
+.Xr read 2
+and the facilities that notify of file descriptor activity (e.g.
+.Xr poll 2 ,
+.Xr port_get 3C ,
+.Xr epoll_wait 3C
+). To dispose of the instance
+.Xr close 2
+should be called on the file descriptor.
+.Pp
+If the
+.Va fd
+argument is -1, a new signalfd file descriptor will be
+returned, otherwise the
+.Va fd
+argument should be an existing signalfd file descriptor whose signal mask will
+be updated.
+.Pp
+The
+.Va mask
+argument specifies the set of signals that are relevant to the file descriptor.
+It may be manipulated with the standard signal set manipulation functions
+documented in
+.Xr sigsetops 3C .
+Signals in the mask which cannot be caught (e.g.
+.Fa SIGKILL )
+are ignored.
+.Pp
+The
+.Va flags
+argument specifies additional parameters for the instance, and can have any of
+the following values:
+.Bl -tag -width Dv
+.It Sy SFD_CLOEXEC
+Instance will be closed upon an
+.Xr exec 2 ;
+see description for
+.Fa O_CLOEXEC
+in
+.Xr open 2 .
+.It Sy SFD_NONBLOCK
+Instance will be set to be non-blocking. A
+.Xr read 2
+on a signalfd instance that has been initialized with
+.Fa SFD_NONBLOCK ,
+or made non-blocking in other ways, will return
+.Er EAGAIN
+in lieu of blocking if there are no signals from the
+.Va mask
+that are pending.
+.El
+.Pp
+As with
+.Xr sigwait 2 ,
+reading a signal from the file descriptor will consume the signal. The signals
+used with signalfd file descriptors are normally first blocked so that their
+handler does not run when a signal arrives. If the signal is not blocked the
+behavior matches that of
+.Xr sigwait 2 ;
+if a
+.Xr read 2
+is pending then the signal is consumed by the read, otherwise the signal is
+consumed by the handler.
+.Pp
+The following operations can be performed upon a signalfd file descriptor:
+.Bl -tag -width Dv
+.It Sy read(2)
+Reads and consumes one or more of the pending signals that match the file
+descriptor's
+.Va mask .
+The read buffer must be large enough to hold one or more
+.Vt signalfd_siginfo
+structures, which is described below.
+.Xr read 2
+will block if there are no matching signals pending, or return
+.Er EAGAIN
+if the instance was created with
+.Fa SFD_NONBLOCK .
+After a
+.Xr fork 2 ,
+if the child reads from the descriptor it will only consume signals from itself.
+.It Sy poll(2)
+Provide notification when one of the signals from the
+.Va mask
+arrives.
+.Fa POLLIN
+and
+.Fa POLLRDNORM
+will be set.
+.It Sy close(2)
+Closes the desriptor.
+.El
+.Pp
+The
+.Vt signalfd_siginfo
+structure returned from
+.Xr read 2
+is a fixed size 128 byte structure defined as follows:
+.Bd -literal
+typedef struct signalfd_siginfo {
+ uint32_t ssi_signo; /* signal from signal.h */
+ int32_t ssi_errno; /* error from errno.h */
+ int32_t ssi_code; /* signal code */
+ uint32_t ssi_pid; /* PID of sender */
+ uint32_t ssi_uid; /* real UID of sender */
+ int32_t ssi_fd; /* file descriptor (SIGIO) */
+ uint32_t ssi_tid; /* unused */
+ uint32_t ssi_band; /* band event (SIGIO) */
+ uint32_t ssi_overrun; /* unused */
+ uint32_t ssi_trapno; /* trap number that caused signal */
+ int32_t ssi_status; /* exit status or signal (SIGCHLD) */
+ int32_t ssi_int; /* unused */
+ uint64_t ssi_ptr; /* unused */
+ uint64_t ssi_utime; /* user CPU time consumed (SIGCHLD) */
+ uint64_t ssi_stime; /* system CPU time consumed (SIGCHLD) */
+ uint64_t ssi_addr; /* address that generated signal */
+ uint8_t ssi_pad[48]; /* pad size to 128 bytes */
+} signalfd_siginfo_t;
+.Ed
+.Sh RETURN VALUES
+Upon succesful completion, a file descriptor associated with the instance
+is returned. Otherwise, -1 is returned and errno is set to indicate the error.
+When
+.Va fd
+is not -1 and there is no error, the value of
+.Va fd
+is returned.
+.Sh ERRORS
+The
+.Fn signalfd function
+will fail if:
+.Bl -tag -width Er
+.It Er EBADF
+The
+.Va fd
+descriptor is invalid.
+.It Er EFAULT
+The
+.Va mask
+address is invalid.
+.It Er EINVAL
+The
+.Va fd
+descriptor is not a signalfd descriptor or the
+.Va flags
+are invalid.
+.It Er EMFILE
+There are currently
+.Va OPEN_MAX
+file descriptors open in the calling process.
+.It Er ENODEV
+Unable to allocate state for the file descriptor.
+.El
+.Sh SEE ALSO
+.Xr poll 2 ,
+.Xr sigwait 2 ,
+.Xr sigsetops 3C ,
+.Xr sigwaitinfo 3C ,
+.Xr signal.h 3HEAD
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index d7a58d0529..50ab92b081 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -1011,6 +1011,8 @@ DEVPOOL_OBJS += devpool.o
EVENTFD_OBJS += eventfd.o
+SIGNALFD_OBJS += signalfd.o
+
I8042_OBJS += i8042.o
INOTIFY_OBJS += inotify.o
diff --git a/usr/src/uts/common/io/signalfd.c b/usr/src/uts/common/io/signalfd.c
new file mode 100644
index 0000000000..3b9b76e7f4
--- /dev/null
+++ b/usr/src/uts/common/io/signalfd.c
@@ -0,0 +1,762 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Support for the signalfd facility, a Linux-borne facility for
+ * file descriptor-based synchronous signal consumption.
+ *
+ * As described on the signalfd(3C) man page, the general idea behind these
+ * file descriptors is that they can be used to synchronously consume signals
+ * via the read(2) syscall. That capability already exists with the
+ * sigwaitinfo(3C) function but the key advantage of signalfd is that, because
+ * it is file descriptor based, poll(2) can be used to determine when signals
+ * are available to be consumed.
+ *
+ * The general implementation uses signalfd_state to hold both the signal set
+ * and poll head for an open file descriptor. Because a process can be using
+ * different sigfds with different signal sets, each signalfd_state poll head
+ * can be thought of as an independent signal stream and the thread(s) waiting
+ * on that stream will get poll notification when any signal in the
+ * corresponding set is received.
+ *
+ * The sigfd_proc_state_t struct lives on the proc_t and maintains per-proc
+ * state for function callbacks and data when the proc needs to do work during
+ * signal delivery for pollwakeup.
+ *
+ * The read side of the implementation is straightforward and mimics the
+ * kernel behavior for sigtimedwait(). Signals continue to live on either
+ * the proc's p_sig, or thread's t_sig, member. Read consumes the signal so
+ * that it is no longer pending.
+ *
+ * The poll side is more complex since all of the sigfds on the process need
+ * to be examined every time a signal is delivered to the process in order to
+ * determine if any thread is waiting in poll for that signal.
+ *
+ * Because it is likely that a process will only be using a few sigfds, but
+ * perhaps many total file descriptors, we maintain a list of sigfds (which
+ * may need pollwakeup) that lives on the proc's p_sigfd struct. In this way
+ * only a few of the state structs will need to be examined every time a signal
+ * is delivered to the process, instead of having to examine all of the file
+ * descriptors to find the state structs.
+ *
+ * When a state struct with a matching signal set is found, if there are any
+ * threads waiting in poll for that signal, then pollwakeup is called.
+ *
+ * Forking causes some complications with sigfd polling because now two
+ * processes have a fd that references the same signalfd_state, but signals go
+ * to only one of those processes. Because the state struct is referenced by
+ * both file descriptors, and the state struct represents a signal stream to be
+ * polled, it can be confusing as to which processes should get a pollwakeup.
+ * Fortunately this is not a common problem in practice, but the implementation
+ * goes to some length to mitigate unexpected behavior.
+ *
+ * When the parent process forks (or forkall), if any thread is in poll then
+ * both the parent and child will return from poll with EINTR. This means
+ * that if either process wants to re-poll on a sigfd then it needs to re-run
+ * poll. Our fork helper function will cleanup all of the poll state on the
+ * parent process and null-out the state pointers on the child process. In this
+ * way the state will only get reestablished on either process when one of them
+ * does another poll on the sigfd. Under normal circumstances the child will
+ * close the sigfd, so it never does a re-poll, and signal delivery for the
+ * child will never come into our code path.
+ *
+ * This leaves only one odd corner case. If the parent and child both use
+ * the dup-ed sigfd to poll then when a signal is delivered to either process
+ * there is no way to determine which one should get the pollwakeup (since
+ * both processes will be queued on the same signal stream poll head). What
+ * happens in this case is that both processes will return from poll, but only
+ * one of them will actually have a signal to read. The other will return
+ * from read with EAGAIN, or block. This case is actually similar to the
+ * situation within a single process which got two different sigfd's with the
+ * same mask (or poll on two fd's that are dup-ed). Both would return from poll
+ * when a signal arrives but only one read would consume the signal and the
+ * other read would fail or block. Applications which poll on shared fd's
+ * cannot assume that a subsequent read will actually obtain data.
+ */
+
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/signalfd.h>
+#include <sys/conf.h>
+#include <sys/sysmacros.h>
+#include <sys/filio.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <sys/schedctl.h>
+#include <sys/id_space.h>
+#include <sys/sdt.h>
+
+typedef struct signalfd_state signalfd_state_t;
+
+struct signalfd_state {
+ kmutex_t sfd_lock; /* lock protecting state */
+ pollhead_t sfd_pollhd; /* poll head */
+ k_sigset_t sfd_set; /* signals for this fd */
+ signalfd_state_t *sfd_next; /* next state on global list */
+};
+
+/*
+ * Internal global variables.
+ */
+static kmutex_t signalfd_lock; /* lock protecting state */
+static dev_info_t *signalfd_devi; /* device info */
+static major_t signalfd_major;
+static id_space_t *signalfd_minor; /* minor number arena */
+static void *signalfd_softstate; /* softstate pointer */
+static signalfd_state_t *signalfd_state; /* global list of state */
+
+/*
+ * If we don't already have an entry in the proc's list for this state, add one.
+ */
+static void
+signalfd_wake_list_add(signalfd_state_t *state)
+{
+ proc_t *p = curproc;
+ list_t *lst;
+ sigfd_wake_list_t *wlp;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(p->p_sigfd != NULL);
+
+ lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
+ for (wlp = list_head(lst); wlp != NULL; wlp = list_next(lst, wlp)) {
+ if (wlp->sigfd_wl_state == state)
+ break;
+ }
+
+ if (wlp == NULL) {
+ wlp = kmem_zalloc(sizeof (sigfd_wake_list_t), KM_SLEEP);
+ wlp->sigfd_wl_state = state;
+ list_insert_head(lst, wlp);
+ }
+}
+
+static void
+signalfd_wake_rm(list_t *lst, sigfd_wake_list_t *wlp)
+{
+ list_remove(lst, wlp);
+ kmem_free(wlp, sizeof (sigfd_wake_list_t));
+}
+
+static void
+signalfd_wake_list_rm(proc_t *p, signalfd_state_t *state)
+{
+ sigfd_wake_list_t *wlp;
+ list_t *lst;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+
+ if (p->p_sigfd == NULL)
+ return;
+
+ lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
+ for (wlp = list_head(lst); wlp != NULL; wlp = list_next(lst, wlp)) {
+ if (wlp->sigfd_wl_state == state) {
+ signalfd_wake_rm(lst, wlp);
+ break;
+ }
+ }
+}
+
+static void
+signalfd_wake_list_cleanup(proc_t *p)
+{
+ sigfd_wake_list_t *wlp;
+ list_t *lst;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+
+ ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb = NULL;
+
+ lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
+ while (!list_is_empty(lst)) {
+ wlp = (sigfd_wake_list_t *)list_remove_head(lst);
+ kmem_free(wlp, sizeof (sigfd_wake_list_t));
+ }
+}
+
+static void
+signalfd_exit_helper()
+{
+ proc_t *p = curproc;
+ list_t *lst;
+
+ /* This being non-null is the only way we can get here */
+ ASSERT(p->p_sigfd != NULL);
+
+ mutex_enter(&p->p_lock);
+ lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
+
+ signalfd_wake_list_cleanup(p);
+ list_destroy(lst);
+ kmem_free(p->p_sigfd, sizeof (sigfd_proc_state_t));
+ p->p_sigfd = NULL;
+ mutex_exit(&p->p_lock);
+}
+
+/*
+ * Clear the parent's signal state list and pollwakeup callback. The child
+ * starts with no signal pollwakeup state. That will be added when needed if
+ * the child needs pollwakeup later.
+ */
+static void
+signalfd_fork_helper(struct proc *p, struct proc *cp)
+{
+ /* This being non-null is the only way we can get here */
+ ASSERT(p->p_sigfd != NULL);
+
+ mutex_enter(&p->p_lock);
+ signalfd_wake_list_cleanup(p);
+ mutex_exit(&p->p_lock);
+ cp->p_sigfd = NULL;
+}
+
+/*
+ * Called every time a signal is delivered to the process so that we can
+ * see if any signal stream needs a pollwakeup. We maintain a list of
+ * signal state elements so that we don't have to look at every file descriptor
+ * on the process. If necessary, a further optimization would be to maintain a
+ * signal set mask that is a union of all of the sets in the list so that
+ * we don't even traverse the list if the signal is not in one of the elements.
+ * However, since the list is likely to be very short, this is not currently
+ * being done. A more complex data structure might also be used, but it is
+ * unclear what that would be since each signal set needs to be checked for a
+ * match.
+ */
+static void
+signalfd_pollwake_cb(void *arg0, int sig)
+{
+ proc_t *p = (proc_t *)arg0;
+ list_t *lst;
+ sigfd_wake_list_t *wlp;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+
+ if (p->p_sigfd == NULL)
+ return;
+
+ lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
+ wlp = list_head(lst);
+ while (wlp != NULL) {
+ signalfd_state_t *state = wlp->sigfd_wl_state;
+
+ mutex_enter(&state->sfd_lock);
+
+ if (sigismember(&state->sfd_set, sig) &&
+ state->sfd_pollhd.ph_list != NULL) {
+ sigfd_wake_list_t *tmp = wlp;
+
+ /* remove it from the list */
+ wlp = list_next(lst, wlp);
+ signalfd_wake_rm(lst, tmp);
+
+ mutex_exit(&state->sfd_lock);
+ pollwakeup(&state->sfd_pollhd, POLLRDNORM | POLLIN);
+ } else {
+ mutex_exit(&state->sfd_lock);
+ wlp = list_next(lst, wlp);
+ }
+ }
+}
+
+/*ARGSUSED*/
+static int
+signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
+{
+ signalfd_state_t *state;
+ major_t major = getemajor(*devp);
+ minor_t minor = getminor(*devp);
+
+ if (minor != SIGNALFDMNRN_SIGNALFD)
+ return (ENXIO);
+
+ mutex_enter(&signalfd_lock);
+
+ minor = (minor_t)id_allocff(signalfd_minor);
+
+ if (ddi_soft_state_zalloc(signalfd_softstate, minor) != DDI_SUCCESS) {
+ id_free(signalfd_minor, minor);
+ mutex_exit(&signalfd_lock);
+ return (ENODEV);
+ }
+
+ state = ddi_get_soft_state(signalfd_softstate, minor);
+ *devp = makedevice(major, minor);
+
+ state->sfd_next = signalfd_state;
+ signalfd_state = state;
+
+ mutex_exit(&signalfd_lock);
+
+ return (0);
+}
+
+/*
+ * Consume one signal from our set in a manner similar to sigtimedwait().
+ * The block parameter is used to control whether we wait for a signal or
+ * return immediately if no signal is pending. We use the thread's t_sigwait
+ * member in the same way that it is used by sigtimedwait.
+ *
+ * Return 0 if we successfully consumed a signal or an errno if not.
+ */
+static int
+consume_signal(k_sigset_t set, uio_t *uio, boolean_t block)
+{
+ k_sigset_t oldmask;
+ kthread_t *t = curthread;
+ klwp_t *lwp = ttolwp(t);
+ proc_t *p = ttoproc(t);
+ timespec_t now;
+ timespec_t *rqtp = NULL; /* null means blocking */
+ int timecheck = 0;
+ int ret = 0;
+ k_siginfo_t info, *infop;
+ signalfd_siginfo_t ssi, *ssp = &ssi;
+
+ if (block == B_FALSE) {
+ timecheck = timechanged;
+ gethrestime(&now);
+ rqtp = &now; /* non-blocking check for pending signals */
+ }
+
+ t->t_sigwait = set;
+
+ mutex_enter(&p->p_lock);
+ /*
+ * set the thread's signal mask to unmask those signals in the
+ * specified set.
+ */
+ schedctl_finish_sigblock(t);
+ oldmask = t->t_hold;
+ sigdiffset(&t->t_hold, &t->t_sigwait);
+
+ /*
+ * Based on rqtp, wait indefinitely until we take a signal in our set
+ * or return immediately if there are no signals pending from our set.
+ */
+ while ((ret = cv_waituntil_sig(&t->t_delay_cv, &p->p_lock, rqtp,
+ timecheck)) > 0)
+ continue;
+
+ /* Restore thread's signal mask to its previous value. */
+ t->t_hold = oldmask;
+ t->t_sig_check = 1; /* so post_syscall sees new t_hold mask */
+
+ if (ret == -1) {
+ /* no signals pending */
+ mutex_exit(&p->p_lock);
+ sigemptyset(&t->t_sigwait);
+ return (EAGAIN); /* no signals pending */
+ }
+
+ /* Don't bother with signal if it is not in request set. */
+ if (lwp->lwp_cursig == 0 ||
+ !sigismember(&t->t_sigwait, lwp->lwp_cursig)) {
+ mutex_exit(&p->p_lock);
+ /*
+ * lwp_cursig is zero if pokelwps() awakened cv_wait_sig().
+ * This happens if some other thread in this process called
+ * forkall() or exit().
+ */
+ sigemptyset(&t->t_sigwait);
+ return (EINTR);
+ }
+
+ if (lwp->lwp_curinfo) {
+ infop = &lwp->lwp_curinfo->sq_info;
+ } else {
+ infop = &info;
+ bzero(infop, sizeof (info));
+ infop->si_signo = lwp->lwp_cursig;
+ infop->si_code = SI_NOINFO;
+ }
+
+ lwp->lwp_ru.nsignals++;
+
+ DTRACE_PROC2(signal__clear, int, ret, ksiginfo_t *, infop);
+ lwp->lwp_cursig = 0;
+ lwp->lwp_extsig = 0;
+ mutex_exit(&p->p_lock);
+
+ /* Convert k_siginfo into external, datamodel independent, struct. */
+ bzero(ssp, sizeof (*ssp));
+ ssp->ssi_signo = infop->si_signo;
+ ssp->ssi_errno = infop->si_errno;
+ ssp->ssi_code = infop->si_code;
+ ssp->ssi_pid = infop->si_pid;
+ ssp->ssi_uid = infop->si_uid;
+ ssp->ssi_fd = infop->si_fd;
+ ssp->ssi_band = infop->si_band;
+ ssp->ssi_trapno = infop->si_trapno;
+ ssp->ssi_status = infop->si_status;
+ ssp->ssi_utime = infop->si_utime;
+ ssp->ssi_stime = infop->si_stime;
+ ssp->ssi_addr = (uint64_t)(intptr_t)infop->si_addr;
+
+ ret = uiomove(ssp, sizeof (*ssp), UIO_READ, uio);
+
+ if (lwp->lwp_curinfo) {
+ siginfofree(lwp->lwp_curinfo);
+ lwp->lwp_curinfo = NULL;
+ }
+ sigemptyset(&t->t_sigwait);
+ return (ret);
+}
+
+/*
+ * This is similar to sigtimedwait. Based on the fd mode we may wait until a
+ * signal within our specified set is posted. We consume as many available
+ * signals within our set as we can.
+ */
+/*ARGSUSED*/
+static int
+signalfd_read(dev_t dev, uio_t *uio, cred_t *cr)
+{
+ signalfd_state_t *state;
+ minor_t minor = getminor(dev);
+ boolean_t block = B_TRUE;
+ k_sigset_t set;
+ boolean_t got_one = B_FALSE;
+ int res;
+
+ if (uio->uio_resid < sizeof (signalfd_siginfo_t))
+ return (EINVAL);
+
+ state = ddi_get_soft_state(signalfd_softstate, minor);
+
+ if (uio->uio_fmode & (FNDELAY|FNONBLOCK))
+ block = B_FALSE;
+
+ mutex_enter(&state->sfd_lock);
+ set = state->sfd_set;
+ mutex_exit(&state->sfd_lock);
+
+ if (sigisempty(&set))
+ return (set_errno(EINVAL));
+
+ do {
+ res = consume_signal(state->sfd_set, uio, block);
+ if (res == 0)
+ got_one = B_TRUE;
+
+ /*
+ * After consuming one signal we won't block trying to consume
+ * further signals.
+ */
+ block = B_FALSE;
+ } while (res == 0 && uio->uio_resid >= sizeof (signalfd_siginfo_t));
+
+ if (got_one)
+ res = 0;
+
+ return (res);
+}
+
+/*
+ * If ksigset_t's were a single word, we would do:
+ * return (((p->p_sig | t->t_sig) & set) & fillset);
+ */
+static int
+signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set)
+{
+ return (((p->p_sig.__sigbits[0] | t->t_sig.__sigbits[0]) &
+ set.__sigbits[0]) |
+ ((p->p_sig.__sigbits[1] | t->t_sig.__sigbits[1]) &
+ set.__sigbits[1]) |
+ (((p->p_sig.__sigbits[2] | t->t_sig.__sigbits[2]) &
+ set.__sigbits[2]) & FILLSET2));
+}
+
+/*ARGSUSED*/
+static int
+signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ signalfd_state_t *state;
+ minor_t minor = getminor(dev);
+ kthread_t *t = curthread;
+ proc_t *p = ttoproc(t);
+ short revents = 0;
+
+ state = ddi_get_soft_state(signalfd_softstate, minor);
+
+ mutex_enter(&state->sfd_lock);
+
+ if (signalfd_sig_pending(p, t, state->sfd_set) != 0)
+ revents |= POLLRDNORM | POLLIN;
+
+ mutex_exit(&state->sfd_lock);
+
+ if (!(*reventsp = revents & events) && !anyyet) {
+ *phpp = &state->sfd_pollhd;
+
+ /*
+ * Enable pollwakeup handling.
+ */
+ if (p->p_sigfd == NULL) {
+ sigfd_proc_state_t *pstate;
+
+ pstate = kmem_zalloc(sizeof (sigfd_proc_state_t),
+ KM_SLEEP);
+ list_create(&pstate->sigfd_list,
+ sizeof (sigfd_wake_list_t),
+ offsetof(sigfd_wake_list_t, sigfd_wl_lst));
+
+ mutex_enter(&p->p_lock);
+ /* check again now that we're locked */
+ if (p->p_sigfd == NULL) {
+ p->p_sigfd = pstate;
+ } else {
+ /* someone beat us to it */
+ list_destroy(&pstate->sigfd_list);
+ kmem_free(pstate, sizeof (sigfd_proc_state_t));
+ }
+ mutex_exit(&p->p_lock);
+ }
+
+ mutex_enter(&p->p_lock);
+ if (((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb ==
+ NULL) {
+ ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb =
+ signalfd_pollwake_cb;
+ }
+ signalfd_wake_list_add(state);
+ mutex_exit(&p->p_lock);
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
+{
+ signalfd_state_t *state;
+ minor_t minor = getminor(dev);
+ sigset_t mask;
+
+ state = ddi_get_soft_state(signalfd_softstate, minor);
+
+ switch (cmd) {
+ case SIGNALFDIOC_MASK:
+ if (copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t)))
+ return (set_errno(EFAULT));
+
+ mutex_enter(&state->sfd_lock);
+ sigutok(&mask, &state->sfd_set);
+ mutex_exit(&state->sfd_lock);
+
+ return (0);
+
+ default:
+ break;
+ }
+
+ return (ENOTTY);
+}
+
+/*ARGSUSED*/
+static int
+signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
+{
+ signalfd_state_t *state, **sp;
+ minor_t minor = getminor(dev);
+ proc_t *p = curproc;
+
+ state = ddi_get_soft_state(signalfd_softstate, minor);
+
+ if (state->sfd_pollhd.ph_list != NULL) {
+ pollwakeup(&state->sfd_pollhd, POLLERR);
+ pollhead_clean(&state->sfd_pollhd);
+ }
+
+ /* Make sure our state is removed from our proc's pollwake list. */
+ mutex_enter(&p->p_lock);
+ signalfd_wake_list_rm(p, state);
+ mutex_exit(&p->p_lock);
+
+ mutex_enter(&signalfd_lock);
+
+ /* Remove our state from our global list. */
+ for (sp = &signalfd_state; *sp != state; sp = &((*sp)->sfd_next))
+ VERIFY(*sp != NULL);
+
+ *sp = (*sp)->sfd_next;
+
+ ddi_soft_state_free(signalfd_softstate, minor);
+ id_free(signalfd_minor, minor);
+
+ mutex_exit(&signalfd_lock);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
+{
+ if (cmd != DDI_ATTACH || signalfd_devi != NULL)
+ return (DDI_FAILURE);
+
+ mutex_enter(&signalfd_lock);
+
+ signalfd_minor = id_space_create("signalfd_minor", 1, L_MAXMIN32 + 1);
+ if (!signalfd_minor)
+ return (DDI_FAILURE);
+
+ if (ddi_soft_state_init(&signalfd_softstate,
+ sizeof (signalfd_state_t), 0) != 0) {
+ cmn_err(CE_NOTE, "/dev/signalfd failed to create soft state");
+ id_space_destroy(signalfd_minor);
+ mutex_exit(&signalfd_lock);
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_create_minor_node(devi, "signalfd", S_IFCHR,
+ SIGNALFDMNRN_SIGNALFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
+ cmn_err(CE_NOTE, "/dev/signalfd couldn't create minor node");
+ ddi_soft_state_fini(&signalfd_softstate);
+ id_space_destroy(signalfd_minor);
+ mutex_exit(&signalfd_lock);
+ return (DDI_FAILURE);
+ }
+
+ ddi_report_dev(devi);
+ signalfd_devi = devi;
+ signalfd_major = ddi_driver_major(signalfd_devi);
+
+ sigfd_fork_helper = signalfd_fork_helper;
+ sigfd_exit_helper = signalfd_exit_helper;
+
+ mutex_exit(&signalfd_lock);
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ switch (cmd) {
+ case DDI_DETACH:
+ break;
+
+ case DDI_SUSPEND:
+ return (DDI_SUCCESS);
+
+ default:
+ return (DDI_FAILURE);
+ }
+
+ /* list should be empty */
+ VERIFY(signalfd_state == NULL);
+
+ mutex_enter(&signalfd_lock);
+ id_space_destroy(signalfd_minor);
+
+ ddi_remove_minor_node(signalfd_devi, NULL);
+ signalfd_devi = NULL;
+ sigfd_fork_helper = NULL;
+ sigfd_exit_helper = NULL;
+
+ ddi_soft_state_fini(&signalfd_softstate);
+ mutex_exit(&signalfd_lock);
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
+{
+ int error;
+
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *result = (void *)signalfd_devi;
+ error = DDI_SUCCESS;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)0;
+ error = DDI_SUCCESS;
+ break;
+ default:
+ error = DDI_FAILURE;
+ }
+ return (error);
+}
+
+static struct cb_ops signalfd_cb_ops = {
+ signalfd_open, /* open */
+ signalfd_close, /* close */
+ nulldev, /* strategy */
+ nulldev, /* print */
+ nodev, /* dump */
+ signalfd_read, /* read */
+ nodev, /* write */
+ signalfd_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ signalfd_poll, /* poll */
+ ddi_prop_op, /* cb_prop_op */
+ 0, /* streamtab */
+ D_NEW | D_MP /* Driver compatibility flag */
+};
+
+static struct dev_ops signalfd_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* refcnt */
+ signalfd_info, /* get_dev_info */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ signalfd_attach, /* attach */
+ signalfd_detach, /* detach */
+ nodev, /* reset */
+ &signalfd_cb_ops, /* driver operations */
+ NULL, /* bus operations */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* module type (this is a pseudo driver) */
+ "signalfd support", /* name of module */
+ &signalfd_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
diff --git a/usr/src/uts/common/io/signalfd.conf b/usr/src/uts/common/io/signalfd.conf
new file mode 100644
index 0000000000..de44738a14
--- /dev/null
+++ b/usr/src/uts/common/io/signalfd.conf
@@ -0,0 +1,16 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+name="signalfd" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c
index 06a3856332..6e3884946c 100644
--- a/usr/src/uts/common/os/exit.c
+++ b/usr/src/uts/common/os/exit.c
@@ -491,6 +491,14 @@ proc_exit(int why, int what)
(*dtrace_helpers_cleanup)();
}
+ /*
+ * Clean up any signalfd state for the process.
+ */
+ if (p->p_sigfd != NULL) {
+ VERIFY(sigfd_exit_helper != NULL);
+ (*sigfd_exit_helper)();
+ }
+
/* untimeout the realtime timers */
if (p->p_itimer != NULL)
timer_exit();
diff --git a/usr/src/uts/common/os/fork.c b/usr/src/uts/common/os/fork.c
index fcd33a7788..522731f5e2 100644
--- a/usr/src/uts/common/os/fork.c
+++ b/usr/src/uts/common/os/fork.c
@@ -331,6 +331,14 @@ cfork(int isvfork, int isfork1, int flags)
}
/*
+ * Setup signalfd state.
+ */
+ if (p->p_sigfd != NULL) {
+ VERIFY(sigfd_fork_helper != NULL);
+ (*sigfd_fork_helper)(p, cp);
+ }
+
+ /*
* Duplicate parent's resource controls.
*/
dup_set = rctl_set_create();
diff --git a/usr/src/uts/common/os/sig.c b/usr/src/uts/common/os/sig.c
index bc48c6e6e8..cc2e753526 100644
--- a/usr/src/uts/common/os/sig.c
+++ b/usr/src/uts/common/os/sig.c
@@ -61,6 +61,7 @@
#include <sys/dtrace.h>
#include <sys/sdt.h>
#include <sys/brand.h>
+#include <sys/signalfd.h>
const k_sigset_t nullsmask = {0, 0, 0};
@@ -95,6 +96,13 @@ const k_sigset_t holdvfork =
static int isjobstop(int);
static void post_sigcld(proc_t *, sigqueue_t *);
+
+/*
+ * signalfd helper functions which are set when the signalfd driver loads.
+ */
+void (*sigfd_fork_helper)(struct proc *, struct proc *);
+void (*sigfd_exit_helper)();
+
/*
* Internal variables for counting number of user thread stop requests posted.
* They may not be accurate at some special situation such as that a virtually
@@ -323,6 +331,11 @@ sigtoproc(proc_t *p, kthread_t *t, int sig)
(void) eat_signal(t, sig);
thread_unlock(t);
DTRACE_PROC2(signal__send, kthread_t *, t, int, sig);
+ if (p->p_sigfd != NULL && ((sigfd_proc_state_t *)
+ (p->p_sigfd))->sigfd_pollwake_cb != NULL)
+ (*((sigfd_proc_state_t *)(p->p_sigfd))->
+ sigfd_pollwake_cb)(p, sig);
+
} else if ((tt = p->p_tlist) != NULL) {
/*
* Make sure that some lwp that already exists
@@ -361,6 +374,10 @@ sigtoproc(proc_t *p, kthread_t *t, int sig)
}
DTRACE_PROC2(signal__send, kthread_t *, tt, int, sig);
+ if (p->p_sigfd != NULL && ((sigfd_proc_state_t *)
+ (p->p_sigfd))->sigfd_pollwake_cb != NULL)
+ (*((sigfd_proc_state_t *)(p->p_sigfd))->
+ sigfd_pollwake_cb)(p, sig);
}
}
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index a6537c9db3..b646989821 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -512,6 +512,7 @@ CHKHDRS= \
sid.h \
siginfo.h \
signal.h \
+ signalfd.h \
sleepq.h \
smbios.h \
smbios_impl.h \
diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h
index e811bebf25..9704748b71 100644
--- a/usr/src/uts/common/sys/proc.h
+++ b/usr/src/uts/common/sys/proc.h
@@ -48,6 +48,7 @@
#include <sys/list.h>
#include <sys/avl.h>
#include <sys/door_impl.h>
+#include <sys/signalfd.h>
#ifdef __cplusplus
extern "C" {
@@ -198,6 +199,7 @@ typedef struct proc {
k_sigset_t p_extsig; /* signals sent from another contract */
k_sigset_t p_ignore; /* ignore when generated */
k_sigset_t p_siginfo; /* gets signal info with signal */
+ void *p_sigfd; /* signalfd support state */
struct sigqueue *p_sigqueue; /* queued siginfo structures */
struct sigqhdr *p_sigqhdr; /* hdr to sigqueue structure pool */
struct sigqhdr *p_signhdr; /* hdr to signotify structure pool */
diff --git a/usr/src/uts/common/sys/signalfd.h b/usr/src/uts/common/sys/signalfd.h
new file mode 100644
index 0000000000..11a0483652
--- /dev/null
+++ b/usr/src/uts/common/sys/signalfd.h
@@ -0,0 +1,101 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Header file to support the signalfd facility. Note that this facility
+ * is designed to be binary compatible with the Linux signalfd facility, modulo
+ * the signals themselves; values for constants here should therefore exactly
+ * match those found in Linux, and this facility shouldn't be extended
+ * independently of Linux.
+ */
+
+#ifndef _SYS_SIGNALFD_H
+#define _SYS_SIGNALFD_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * To assure binary compatibility with Linux, these values are fixed at their
+ * Linux equivalents, not their native ones.
+ */
+#define SFD_CLOEXEC 02000000 /* LX_O_CLOEXEC */
+#define SFD_NONBLOCK 04000 /* LX_O_NONBLOCK */
+
+/*
+ * These ioctl values are specific to the native implementation; applications
+ * shouldn't be using them directly, and they should therefore be safe to
+ * change without breaking apps.
+ */
+#define SIGNALFDIOC (('s' << 24) | ('f' << 16) | ('d' << 8))
+#define SIGNALFDIOC_MASK (SIGNALFDIOC | 1) /* set mask */
+
+typedef struct signalfd_siginfo {
+ uint32_t ssi_signo; /* signal from signal.h */
+ int32_t ssi_errno; /* error from errno.h */
+ int32_t ssi_code; /* signal code */
+ uint32_t ssi_pid; /* PID of sender */
+ uint32_t ssi_uid; /* real UID of sender */
+ int32_t ssi_fd; /* File descriptor (SIGIO) */
+ uint32_t ssi_tid; /* unused */
+ uint32_t ssi_band; /* band event (SIGIO) */
+ uint32_t ssi_overrun; /* unused */
+ uint32_t ssi_trapno; /* trap number that caused signal */
+ int32_t ssi_status; /* exit status or signal (SIGCHLD) */
+ int32_t ssi_int; /* unused */
+ uint64_t ssi_ptr; /* unused */
+ uint64_t ssi_utime; /* user CPU time consumed (SIGCHLD) */
+ uint64_t ssi_stime; /* system CPU time consumed (SIGCHLD) */
+ uint64_t ssi_addr; /* address that generated signal */
+ uint8_t ssi_pad[48]; /* Pad size to 128 bytes to allow for */
+ /* additional fields in the future. */
+} signalfd_siginfo_t;
+
+#ifndef _KERNEL
+
+extern int signalfd(int, const sigset_t *, int);
+
+#else
+
+#define SIGNALFDMNRN_SIGNALFD 0
+#define SIGNALFDMNRN_CLONE 1
+
+typedef struct sigfd_wake_list {
+ list_node_t sigfd_wl_lst;
+ void *sigfd_wl_state;
+} sigfd_wake_list_t;
+
+/*
+ * This holds the proc_t state for a process which is using signalfd.
+ */
+typedef struct sigfd_proc_state {
+ void (*sigfd_pollwake_cb)(void *, int);
+ list_t sigfd_list;
+} sigfd_proc_state_t;
+
+
+extern void (*sigfd_fork_helper)(struct proc *, struct proc *);
+extern void (*sigfd_exit_helper)();
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SIGNALFD_H */
diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h
index 41ea2331df..53deb90e23 100644
--- a/usr/src/uts/common/sys/thread.h
+++ b/usr/src/uts/common/sys/thread.h
@@ -170,7 +170,7 @@ typedef struct _kthread {
k_sigset_t t_sig; /* signals pending to this process */
k_sigset_t t_extsig; /* signals sent from another contract */
k_sigset_t t_hold; /* hold signal bit mask */
- k_sigset_t t_sigwait; /* sigtimedwait() is accepting these */
+ k_sigset_t t_sigwait; /* sigtimedwait/sigfd accepting these */
struct _kthread *t_forw; /* process's forward thread link */
struct _kthread *t_back; /* process's backward thread link */
struct _kthread *t_thlink; /* tid (lwpid) lookup hash link */
diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel
index cdc0da3940..7f0e97a5b3 100644
--- a/usr/src/uts/intel/Makefile.intel
+++ b/usr/src/uts/intel/Makefile.intel
@@ -327,6 +327,7 @@ DRV_KMODS += sd
DRV_KMODS += sdhost
DRV_KMODS += sgen
DRV_KMODS += si3124
+DRV_KMODS += signalfd
DRV_KMODS += smbios
DRV_KMODS += skd
DRV_KMODS += softmac
diff --git a/usr/src/uts/intel/signalfd/Makefile b/usr/src/uts/intel/signalfd/Makefile
new file mode 100644
index 0000000000..d1a461c2f1
--- /dev/null
+++ b/usr/src/uts/intel/signalfd/Makefile
@@ -0,0 +1,68 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = signalfd
+OBJECTS = $(SIGNALFD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(SIGNALFD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+CERRWARN += -_gcc=-Wno-parentheses
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/sparc/Makefile.sparc b/usr/src/uts/sparc/Makefile.sparc
index 3e790ed1ed..e8458de8e3 100644
--- a/usr/src/uts/sparc/Makefile.sparc
+++ b/usr/src/uts/sparc/Makefile.sparc
@@ -240,6 +240,7 @@ DRV_KMODS += bpf
DRV_KMODS += dca
DRV_KMODS += inotify
DRV_KMODS += eventfd
+DRV_KMODS += signalfd
DRV_KMODS += timerfd
#
diff --git a/usr/src/uts/sparc/signalfd/Makefile b/usr/src/uts/sparc/signalfd/Makefile
new file mode 100644
index 0000000000..a60bc617e1
--- /dev/null
+++ b/usr/src/uts/sparc/signalfd/Makefile
@@ -0,0 +1,68 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = signalfd
+OBJECTS = $(SIGNALFD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(SIGNALFD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sparc/Makefile.sparc
+
+CERRWARN += -_gcc=-Wno-parentheses
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/sparc/Makefile.targ