summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2015-10-15 16:26:52 -0700
committerRobert Mustacchi <rm@joyent.com>2015-11-16 09:44:54 -0800
commit3d729aecc03ea6ebb9bd5d56b8dccd24f57daa41 (patch)
tree7c90a77f5265b35f475932b34c933c63b5664a4e
parentf9eb9fdf196b6ed476e4ffc69cecd8b0da3cb7e7 (diff)
downloadillumos-joyent-3d729aecc03ea6ebb9bd5d56b8dccd24f57daa41.tar.gz
6342 want signalfd support
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com> Reviewed by: Robert Mustacchi <rm@joyent.com> Reviewed by: Igor Kozhukhov <ikozhukhov@gmail.com> Reviewed by: Garrett D'Amore <garrett@damore.org> Approved by: Dan McDonald <danmcd@omniti.com>
-rw-r--r--usr/src/cmd/devfsadm/misc_link.c3
-rw-r--r--usr/src/lib/libc/amd64/Makefile1
-rw-r--r--usr/src/lib/libc/i386/Makefile.com1
-rw-r--r--usr/src/lib/libc/port/mapfile-vers5
-rw-r--r--usr/src/lib/libc/port/sys/signalfd.c60
-rw-r--r--usr/src/lib/libc/sparc/Makefile.com1
-rw-r--r--usr/src/man/man3c/Makefile1
-rw-r--r--usr/src/man/man3c/signalfd.3c192
-rw-r--r--usr/src/pkg/manifests/system-header.mf1
-rw-r--r--usr/src/pkg/manifests/system-kernel.mf7
-rw-r--r--usr/src/pkg/manifests/system-library.man3c.inc1
-rw-r--r--usr/src/uts/common/Makefile.files2
-rw-r--r--usr/src/uts/common/io/signalfd.c774
-rw-r--r--usr/src/uts/common/io/signalfd.conf16
-rw-r--r--usr/src/uts/common/os/exit.c8
-rw-r--r--usr/src/uts/common/os/sig.c16
-rw-r--r--usr/src/uts/common/sys/Makefile1
-rw-r--r--usr/src/uts/common/sys/proc.h2
-rw-r--r--usr/src/uts/common/sys/signalfd.h100
-rw-r--r--usr/src/uts/common/sys/thread.h2
-rw-r--r--usr/src/uts/intel/Makefile.intel1
-rw-r--r--usr/src/uts/intel/signalfd/Makefile68
-rw-r--r--usr/src/uts/sparc/Makefile.sparc1
-rw-r--r--usr/src/uts/sparc/signalfd/Makefile68
24 files changed, 1331 insertions, 1 deletions
diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c
index bf59fb5e6b..5f241df296 100644
--- a/usr/src/cmd/devfsadm/misc_link.c
+++ b/usr/src/cmd/devfsadm/misc_link.c
@@ -92,6 +92,9 @@ static devfsadm_create_t misc_cbt[] = {
{ "pseudo", "ddi_pseudo", "eventfd",
TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
},
+ { "pseudo", "ddi_pseudo", "signalfd",
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
+ },
{ "pseudo", "ddi_pseudo", "rsm",
TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
},
diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile
index dbda6c0c31..a968fa45f7 100644
--- a/usr/src/lib/libc/amd64/Makefile
+++ b/usr/src/lib/libc/amd64/Makefile
@@ -902,6 +902,7 @@ PORTSYS= \
sidsys.o \
siginterrupt.o \
signal.o \
+ signalfd.o \
sigpending.o \
sigstack.o \
stat.o \
diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com
index 4ebd6473a9..c0f74678f8 100644
--- a/usr/src/lib/libc/i386/Makefile.com
+++ b/usr/src/lib/libc/i386/Makefile.com
@@ -942,6 +942,7 @@ PORTSYS= \
sidsys.o \
siginterrupt.o \
signal.o \
+ signalfd.o \
sigpending.o \
sigstack.o \
stat.o \
diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers
index f7f6e6a137..d62c86b364 100644
--- a/usr/src/lib/libc/port/mapfile-vers
+++ b/usr/src/lib/libc/port/mapfile-vers
@@ -93,6 +93,11 @@ $if _x86 && _ELF64
$add amd64
$endif
+SYMBOL_VERSION ILLUMOS_0.18 { # signalfd
+ protected:
+ signalfd;
+} ILLUMOS_0.17;
+
SYMBOL_VERSION ILLUMOS_0.17 { # glob(3C) LFS
$if lf64
protected:
diff --git a/usr/src/lib/libc/port/sys/signalfd.c b/usr/src/lib/libc/port/sys/signalfd.c
new file mode 100644
index 0000000000..0080c52bdf
--- /dev/null
+++ b/usr/src/lib/libc/port/sys/signalfd.c
@@ -0,0 +1,60 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015, Joyent, Inc.
+ */
+
+#include <sys/signalfd.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+
+int
+signalfd(int fd, const sigset_t *mask, int flags)
+{
+ int origfd = fd;
+
+ if (fd == -1) {
+ int oflags = O_RDONLY;
+
+ if (flags & ~(SFD_NONBLOCK | SFD_CLOEXEC)) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (flags & SFD_NONBLOCK)
+ oflags |= O_NONBLOCK;
+
+ if (flags & SFD_CLOEXEC)
+ oflags |= O_CLOEXEC;
+
+ if ((fd = open("/dev/signalfd", oflags)) < 0)
+ return (-1);
+ }
+
+ if (ioctl(fd, SIGNALFDIOC_MASK, mask) != 0) {
+ if (origfd == -1) {
+ int old = errno;
+ (void) close(fd);
+ errno = old;
+ }
+ /*
+ * Trying to modify an existing sigfd so if this failed
+ * it's because it's not a valid fd or not a sigfd. ioctl
+ * returns the correct errno for these cases.
+ */
+ return (-1);
+ }
+
+ return (fd);
+}
diff --git a/usr/src/lib/libc/sparc/Makefile.com b/usr/src/lib/libc/sparc/Makefile.com
index 2228bc848a..dc50cf3b38 100644
--- a/usr/src/lib/libc/sparc/Makefile.com
+++ b/usr/src/lib/libc/sparc/Makefile.com
@@ -976,6 +976,7 @@ PORTSYS= \
sidsys.o \
siginterrupt.o \
signal.o \
+ signalfd.o \
sigpending.o \
sigstack.o \
stat.o \
diff --git a/usr/src/man/man3c/Makefile b/usr/src/man/man3c/Makefile
index c38d65a57a..9ba004eff7 100644
--- a/usr/src/man/man3c/Makefile
+++ b/usr/src/man/man3c/Makefile
@@ -411,6 +411,7 @@ MANFILES= __fbufsize.3c \
sigfpe.3c \
siginterrupt.3c \
signal.3c \
+ signalfd.3c \
sigqueue.3c \
sigsetops.3c \
sigstack.3c \
diff --git a/usr/src/man/man3c/signalfd.3c b/usr/src/man/man3c/signalfd.3c
new file mode 100644
index 0000000000..43699a50a5
--- /dev/null
+++ b/usr/src/man/man3c/signalfd.3c
@@ -0,0 +1,192 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\" Copyright 2015, Joyent, Inc.
+.\"
+.Dd "Jun 15, 2015"
+.Dt SIGNALFD 3C
+.Os
+.Sh NAME
+.Nm signalfd
+.Nd create or modify a file descriptor for signal handling
+.Sh SYNOPSIS
+.In sys/signalfd.h
+.
+.Ft int
+.Fo signalfd
+.Fa "int fd"
+.Fa "const sigset_t *mask"
+.Fa "int flags"
+.Fc
+.
+.Sh DESCRIPTION
+The
+.Fn signalfd
+function returns a file descriptor that can be used
+for synchronous consumption of signals. The file descriptor can be operated
+upon via
+.Xr read 2
+and the facilities that notify of file descriptor activity (e.g.
+.Xr poll 2 ,
+.Xr port_get 3C ,
+.Xr epoll_wait 3C
+). To dispose of the instance
+.Xr close 2
+should be called on the file descriptor.
+.Pp
+If the
+.Va fd
+argument is -1, a new signalfd file descriptor will be
+returned, otherwise the
+.Va fd
+argument should be an existing signalfd file descriptor whose signal mask will
+be updated.
+.Pp
+The
+.Va mask
+argument specifies the set of signals that are relevant to the file descriptor.
+It may be manipulated with the standard signal set manipulation functions
+documented in
+.Xr sigsetops 3C .
+Signals in the mask which cannot be caught (e.g.
+.Fa SIGKILL )
+are ignored.
+.Pp
+The
+.Va flags
+argument specifies additional parameters for the instance, and can have any of
+the following values:
+.Bl -tag -width Dv
+.It Sy SFD_CLOEXEC
+Instance will be closed upon an
+.Xr exec 2 ;
+see description for
+.Fa O_CLOEXEC
+in
+.Xr open 2 .
+.It Sy SFD_NONBLOCK
+Instance will be set to be non-blocking. A
+.Xr read 2
+on a signalfd instance that has been initialized with
+.Fa SFD_NONBLOCK ,
+or made non-blocking in other ways, will return
+.Er EAGAIN
+in lieu of blocking if there are no signals from the
+.Va mask
+that are pending.
+.El
+.Pp
+As with
+.Xr sigwait 2 ,
+reading a signal from the file descriptor will consume the signal. The signals
+used with signalfd file descriptors are normally first blocked so that their
+handler does not run when a signal arrives. If the signal is not blocked the
+behavior matches that of
+.Xr sigwait 2 ;
+if a
+.Xr read 2
+is pending then the signal is consumed by the read, otherwise the signal is
+consumed by the handler.
+.Pp
+The following operations can be performed upon a signalfd file descriptor:
+.Bl -tag -width Dv
+.It Sy read(2)
+Reads and consumes one or more of the pending signals that match the file
+descriptor's
+.Va mask .
+The read buffer must be large enough to hold one or more
+.Vt signalfd_siginfo
+structures, which is described below.
+.Xr read 2
+will block if there are no matching signals pending, or return
+.Er EAGAIN
+if the instance was created with
+.Fa SFD_NONBLOCK .
+After a
+.Xr fork 2 ,
+if the child reads from the descriptor it will only consume signals from itself.
+.It Sy poll(2)
+Provide notification when one of the signals from the
+.Va mask
+arrives.
+.Fa POLLIN
+and
+.Fa POLLRDNORM
+will be set.
+.It Sy close(2)
+Closes the desriptor.
+.El
+.Pp
+The
+.Vt signalfd_siginfo
+structure returned from
+.Xr read 2
+is a fixed size 128 byte structure defined as follows:
+.Bd -literal
+typedef struct signalfd_siginfo {
+ uint32_t ssi_signo; /* signal from signal.h */
+ int32_t ssi_errno; /* error from errno.h */
+ int32_t ssi_code; /* signal code */
+ uint32_t ssi_pid; /* PID of sender */
+ uint32_t ssi_uid; /* real UID of sender */
+ int32_t ssi_fd; /* file descriptor (SIGIO) */
+ uint32_t ssi_tid; /* unused */
+ uint32_t ssi_band; /* band event (SIGIO) */
+ uint32_t ssi_overrun; /* unused */
+ uint32_t ssi_trapno; /* trap number that caused signal */
+ int32_t ssi_status; /* exit status or signal (SIGCHLD) */
+ int32_t ssi_int; /* unused */
+ uint64_t ssi_ptr; /* unused */
+ uint64_t ssi_utime; /* user CPU time consumed (SIGCHLD) */
+ uint64_t ssi_stime; /* system CPU time consumed (SIGCHLD) */
+ uint64_t ssi_addr; /* address that generated signal */
+ uint8_t ssi_pad[48]; /* pad size to 128 bytes */
+} signalfd_siginfo_t;
+.Ed
+.Sh RETURN VALUES
+Upon succesful completion, a file descriptor associated with the instance
+is returned. Otherwise, -1 is returned and errno is set to indicate the error.
+When
+.Va fd
+is not -1 and there is no error, the value of
+.Va fd
+is returned.
+.Sh ERRORS
+The
+.Fn signalfd function
+will fail if:
+.Bl -tag -width Er
+.It Er EBADF
+The
+.Va fd
+descriptor is invalid.
+.It Er EFAULT
+The
+.Va mask
+address is invalid.
+.It Er EINVAL
+The
+.Va fd
+descriptor is not a signalfd descriptor or the
+.Va flags
+are invalid.
+.It Er EMFILE
+There are currently
+.Va OPEN_MAX
+file descriptors open in the calling process.
+.It Er ENODEV
+Unable to allocate state for the file descriptor.
+.El
+.Sh SEE ALSO
+.Xr poll 2 ,
+.Xr sigwait 2 ,
+.Xr sigsetops 3C ,
+.Xr sigwaitinfo 3C ,
+.Xr signal.h 3HEAD
diff --git a/usr/src/pkg/manifests/system-header.mf b/usr/src/pkg/manifests/system-header.mf
index 4551ca095c..697b75b6c2 100644
--- a/usr/src/pkg/manifests/system-header.mf
+++ b/usr/src/pkg/manifests/system-header.mf
@@ -1428,6 +1428,7 @@ file path=usr/include/sys/shm_impl.h
file path=usr/include/sys/sid.h
file path=usr/include/sys/siginfo.h
file path=usr/include/sys/signal.h
+file path=usr/include/sys/signalfd.h
file path=usr/include/sys/skein.h
file path=usr/include/sys/sleepq.h
file path=usr/include/sys/smbios.h
diff --git a/usr/src/pkg/manifests/system-kernel.mf b/usr/src/pkg/manifests/system-kernel.mf
index d3cf047cb9..a00bb109cc 100644
--- a/usr/src/pkg/manifests/system-kernel.mf
+++ b/usr/src/pkg/manifests/system-kernel.mf
@@ -94,6 +94,9 @@ dir path=lib/svc
dir path=lib/svc/manifest group=sys
dir path=lib/svc/manifest/system group=sys
dir path=lib/svc/method
+dir path=usr/kernel group=sys
+dir path=usr/kernel/drv group=sys
+dir path=usr/kernel/drv/$(ARCH64) group=sys
dir path=usr/share/man
dir path=usr/share/man/man1m
dir path=usr/share/man/man2
@@ -254,6 +257,7 @@ $(i386_ONLY)driver name=sd perms="* 0640 root sys" \
driver name=sgen perms="* 0600 root sys" \
alias=scsa,08.bfcp \
alias=scsa,08.bvhci
+driver name=signalfd perms="* 0666 root sys"
driver name=simnet clone_perms="simnet 0666 root sys" perms="* 0666 root sys"
$(i386_ONLY)driver name=smbios perms="smbios 0444 root sys"
driver name=softmac
@@ -820,6 +824,9 @@ file path=lib/svc/manifest/system/scheduler.xml group=sys mode=0444
file path=lib/svc/method/svc-dumpadm mode=0555
file path=lib/svc/method/svc-intrd mode=0555
file path=lib/svc/method/svc-scheduler mode=0555
+file path=usr/kernel/drv/$(ARCH64)/signalfd group=sys
+$(i386_ONLY)file path=usr/kernel/drv/signalfd group=sys
+file path=usr/kernel/drv/signalfd.conf group=sys
$(sparc_ONLY)file path=usr/share/man/man1m/monitor.1m
$(sparc_ONLY)file path=usr/share/man/man1m/obpsym.1m
# On SPARC driver/bscv is Serverblade1 specific, and in system/kernel/platform
diff --git a/usr/src/pkg/manifests/system-library.man3c.inc b/usr/src/pkg/manifests/system-library.man3c.inc
index ae061edac9..30999ee484 100644
--- a/usr/src/pkg/manifests/system-library.man3c.inc
+++ b/usr/src/pkg/manifests/system-library.man3c.inc
@@ -406,6 +406,7 @@ file path=usr/share/man/man3c/shm_unlink.3c
file path=usr/share/man/man3c/sigfpe.3c
file path=usr/share/man/man3c/siginterrupt.3c
file path=usr/share/man/man3c/signal.3c
+file path=usr/share/man/man3c/signalfd.3c
file path=usr/share/man/man3c/sigqueue.3c
file path=usr/share/man/man3c/sigsetops.3c
file path=usr/share/man/man3c/sigstack.3c
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index e64cf0db35..e0530c886f 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -993,6 +993,8 @@ DEVPOOL_OBJS += devpool.o
EVENTFD_OBJS += eventfd.o
+SIGNALFD_OBJS += signalfd.o
+
I8042_OBJS += i8042.o
KB8042_OBJS += \
diff --git a/usr/src/uts/common/io/signalfd.c b/usr/src/uts/common/io/signalfd.c
new file mode 100644
index 0000000000..32f8f85f7a
--- /dev/null
+++ b/usr/src/uts/common/io/signalfd.c
@@ -0,0 +1,774 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Support for the signalfd facility, a Linux-borne facility for
+ * file descriptor-based synchronous signal consumption.
+ *
+ * As described on the signalfd(3C) man page, the general idea behind these
+ * file descriptors is that they can be used to synchronously consume signals
+ * via the read(2) syscall. That capability already exists with the
+ * sigwaitinfo(3C) function but the key advantage of signalfd is that, because
+ * it is file descriptor based, poll(2) can be used to determine when signals
+ * are available to be consumed.
+ *
+ * The general implementation uses signalfd_state to hold both the signal set
+ * and poll head for an open file descriptor. Because a process can be using
+ * different sigfds with different signal sets, each signalfd_state poll head
+ * can be thought of as an independent signal stream and the thread(s) waiting
+ * on that stream will get poll notification when any signal in the
+ * corresponding set is received.
+ *
+ * The sigfd_proc_state_t struct lives on the proc_t and maintains per-proc
+ * state for function callbacks and data when the proc needs to do work during
+ * signal delivery for pollwakeup.
+ *
+ * The read side of the implementation is straightforward and mimics the
+ * kernel behavior for sigtimedwait(). Signals continue to live on either
+ * the proc's p_sig, or thread's t_sig, member. Read consumes the signal so
+ * that it is no longer pending.
+ *
+ * The poll side is more complex since all of the sigfds on the process need
+ * to be examined every time a signal is delivered to the process in order to
+ * pollwake any thread waiting in poll for that signal.
+ *
+ * Because it is likely that a process will only be using one, or a few, sigfds,
+ * but many total file descriptors, we maintain a list of sigfds which need
+ * pollwakeup. The list lives on the proc's p_sigfd struct. In this way only
+ * zero, or a few, of the state structs will need to be examined every time a
+ * signal is delivered to the process, instead of having to examine all of the
+ * file descriptors to find the state structs. When a state struct with a
+ * matching signal set is found then pollwakeup is called.
+ *
+ * The sigfd_list is self-cleaning; as signalfd_pollwake_cb is called, the list
+ * will clear out on its own. There is an exit helper (signalfd_exit_helper)
+ * which cleans up any remaining per-proc state when the process exits.
+ *
+ * The main complexity with signalfd is the interaction of forking and polling.
+ * This interaction is complex because now two processes have a fd that
+ * references the same dev_t (and its associated signalfd_state), but signals
+ * go to only one of those processes. Also, we don't know when one of the
+ * processes closes its fd because our 'close' entry point is only called when
+ * the last fd is closed (which could be by either process).
+ *
+ * Because the state struct is referenced by both file descriptors, and the
+ * state struct represents a signal stream needing a pollwakeup, if both
+ * processes were polling then both processes would get a pollwakeup when a
+ * signal arrives for either process (that is, the pollhead is associated with
+ * our dev_t so when a signal arrives the pollwakeup wakes up all waiters).
+ *
+ * Fortunately this is not a common problem in practice, but the implementation
+ * attempts to mitigate unexpected behavior. The typical behavior is that the
+ * parent has been polling the signalfd (which is why it was open in the first
+ * place) and the parent might have a pending signalfd_state (with the
+ * pollhead) on its per-process sigfd_list. After the fork the child will
+ * simply close that fd (among others) as part of the typical fork/close/exec
+ * pattern. Because the child will never poll that fd, it will never get any
+ * state onto its own sigfd_list (the child starts with a null list). The
+ * intention is that the child sees no pollwakeup activity for signals unless
+ * it explicitly reinvokes poll on the sigfd.
+ *
+ * As background, there are two primary polling cases to consider when the
+ * parent process forks:
+ * 1) If any thread is blocked in poll(2) then both the parent and child will
+ * return from the poll syscall with EINTR. This means that if either
+ * process wants to re-poll on a sigfd then it needs to re-run poll and
+ * would come back in to the signalfd_poll entry point. The parent would
+ * already have the dev_t's state on its sigfd_list and the child would not
+ * have anything there unless it called poll again on its fd.
+ * 2) If the process is using /dev/poll(7D) then the polling info is being
+ * cached by the poll device and the process might not currently be blocked
+ * on anything polling related. A subsequent DP_POLL ioctl will not invoke
+ * our signalfd_poll entry point again. Because the parent still has its
+ * sigfd_list setup, an incoming signal will hit our signalfd_pollwake_cb
+ * entry point, which in turn calls pollwake, and /dev/poll will do the
+ * right thing on DP_POLL. The child will not have a sigfd_list yet so the
+ * signal will not cause a pollwakeup. The dp code does its own handling for
+ * cleaning up its cache.
+ *
+ * This leaves only one odd corner case. If the parent and child both use
+ * the dup-ed sigfd to poll then when a signal is delivered to either process
+ * there is no way to determine which one should get the pollwakeup (since
+ * both processes will be queued on the same signal stream poll head). What
+ * happens in this case is that both processes will return from poll, but only
+ * one of them will actually have a signal to read. The other will return
+ * from read with EAGAIN, or block. This case is actually similar to the
+ * situation within a single process which got two different sigfd's with the
+ * same mask (or poll on two fd's that are dup-ed). Both would return from poll
+ * when a signal arrives but only one read would consume the signal and the
+ * other read would fail or block. Applications which poll on shared fd's
+ * cannot assume that a subsequent read will actually obtain data.
+ */
+
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/signalfd.h>
+#include <sys/conf.h>
+#include <sys/sysmacros.h>
+#include <sys/filio.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <sys/schedctl.h>
+#include <sys/id_space.h>
+#include <sys/sdt.h>
+
+typedef struct signalfd_state signalfd_state_t;
+
+struct signalfd_state {
+ kmutex_t sfd_lock; /* lock protecting state */
+ pollhead_t sfd_pollhd; /* poll head */
+ k_sigset_t sfd_set; /* signals for this fd */
+ signalfd_state_t *sfd_next; /* next state on global list */
+};
+
+/*
+ * Internal global variables.
+ */
+static kmutex_t signalfd_lock; /* lock protecting state */
+static dev_info_t *signalfd_devi; /* device info */
+static id_space_t *signalfd_minor; /* minor number arena */
+static void *signalfd_softstate; /* softstate pointer */
+static signalfd_state_t *signalfd_state; /* global list of state */
+
+/*
+ * If we don't already have an entry in the proc's list for this state, add one.
+ */
+static void
+signalfd_wake_list_add(signalfd_state_t *state)
+{
+ proc_t *p = curproc;
+ list_t *lst;
+ sigfd_wake_list_t *wlp;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(p->p_sigfd != NULL);
+
+ lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
+ for (wlp = list_head(lst); wlp != NULL; wlp = list_next(lst, wlp)) {
+ if (wlp->sigfd_wl_state == state)
+ break;
+ }
+
+ if (wlp == NULL) {
+ wlp = kmem_zalloc(sizeof (sigfd_wake_list_t), KM_SLEEP);
+ wlp->sigfd_wl_state = state;
+ list_insert_head(lst, wlp);
+ }
+}
+
+static void
+signalfd_wake_rm(list_t *lst, sigfd_wake_list_t *wlp)
+{
+ list_remove(lst, wlp);
+ kmem_free(wlp, sizeof (sigfd_wake_list_t));
+}
+
+static void
+signalfd_wake_list_rm(proc_t *p, signalfd_state_t *state)
+{
+ sigfd_wake_list_t *wlp;
+ list_t *lst;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+
+ if (p->p_sigfd == NULL)
+ return;
+
+ lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
+ for (wlp = list_head(lst); wlp != NULL; wlp = list_next(lst, wlp)) {
+ if (wlp->sigfd_wl_state == state) {
+ signalfd_wake_rm(lst, wlp);
+ break;
+ }
+ }
+
+ if (list_is_empty(lst)) {
+ ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb = NULL;
+ list_destroy(lst);
+ kmem_free(p->p_sigfd, sizeof (sigfd_proc_state_t));
+ p->p_sigfd = NULL;
+ }
+}
+
+static void
+signalfd_wake_list_cleanup(proc_t *p)
+{
+ sigfd_wake_list_t *wlp;
+ list_t *lst;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+
+ ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb = NULL;
+
+ lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
+ while (!list_is_empty(lst)) {
+ wlp = (sigfd_wake_list_t *)list_remove_head(lst);
+ kmem_free(wlp, sizeof (sigfd_wake_list_t));
+ }
+}
+
+static void
+signalfd_exit_helper(void)
+{
+ proc_t *p = curproc;
+ list_t *lst;
+
+ /* This being non-null is the only way we can get here */
+ ASSERT(p->p_sigfd != NULL);
+
+ mutex_enter(&p->p_lock);
+ lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
+
+ signalfd_wake_list_cleanup(p);
+ list_destroy(lst);
+ kmem_free(p->p_sigfd, sizeof (sigfd_proc_state_t));
+ p->p_sigfd = NULL;
+ mutex_exit(&p->p_lock);
+}
+
+/*
+ * Called every time a signal is delivered to the process so that we can
+ * see if any signal stream needs a pollwakeup. We maintain a list of
+ * signal state elements so that we don't have to look at every file descriptor
+ * on the process. If necessary, a further optimization would be to maintain a
+ * signal set mask that is a union of all of the sets in the list so that
+ * we don't even traverse the list if the signal is not in one of the elements.
+ * However, since the list is likely to be very short, this is not currently
+ * being done. A more complex data structure might also be used, but it is
+ * unclear what that would be since each signal set needs to be checked for a
+ * match.
+ */
+static void
+signalfd_pollwake_cb(void *arg0, int sig)
+{
+ proc_t *p = (proc_t *)arg0;
+ list_t *lst;
+ sigfd_wake_list_t *wlp;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+
+ if (p->p_sigfd == NULL)
+ return;
+
+ lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
+ wlp = list_head(lst);
+ while (wlp != NULL) {
+ signalfd_state_t *state = wlp->sigfd_wl_state;
+
+ mutex_enter(&state->sfd_lock);
+
+ if (sigismember(&state->sfd_set, sig) &&
+ state->sfd_pollhd.ph_list != NULL) {
+ sigfd_wake_list_t *tmp = wlp;
+
+ /* remove it from the list */
+ wlp = list_next(lst, wlp);
+ signalfd_wake_rm(lst, tmp);
+
+ mutex_exit(&state->sfd_lock);
+ pollwakeup(&state->sfd_pollhd, POLLRDNORM | POLLIN);
+ } else {
+ mutex_exit(&state->sfd_lock);
+ wlp = list_next(lst, wlp);
+ }
+ }
+}
+
+_NOTE(ARGSUSED(1))
+static int
+signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
+{
+ signalfd_state_t *state;
+ major_t major = getemajor(*devp);
+ minor_t minor = getminor(*devp);
+
+ if (minor != SIGNALFDMNRN_SIGNALFD)
+ return (ENXIO);
+
+ mutex_enter(&signalfd_lock);
+
+ minor = (minor_t)id_allocff(signalfd_minor);
+
+ if (ddi_soft_state_zalloc(signalfd_softstate, minor) != DDI_SUCCESS) {
+ id_free(signalfd_minor, minor);
+ mutex_exit(&signalfd_lock);
+ return (ENODEV);
+ }
+
+ state = ddi_get_soft_state(signalfd_softstate, minor);
+ *devp = makedevice(major, minor);
+
+ state->sfd_next = signalfd_state;
+ signalfd_state = state;
+
+ mutex_exit(&signalfd_lock);
+
+ return (0);
+}
+
+/*
+ * Consume one signal from our set in a manner similar to sigtimedwait().
+ * The block parameter is used to control whether we wait for a signal or
+ * return immediately if no signal is pending. We use the thread's t_sigwait
+ * member in the same way that it is used by sigtimedwait.
+ *
+ * Return 0 if we successfully consumed a signal or an errno if not.
+ */
+static int
+consume_signal(k_sigset_t set, uio_t *uio, boolean_t block)
+{
+ k_sigset_t oldmask;
+ kthread_t *t = curthread;
+ klwp_t *lwp = ttolwp(t);
+ proc_t *p = ttoproc(t);
+ timespec_t now;
+ timespec_t *rqtp = NULL; /* null means blocking */
+ int timecheck = 0;
+ int ret = 0;
+ k_siginfo_t info, *infop;
+ signalfd_siginfo_t ssi, *ssp = &ssi;
+
+ if (block == B_FALSE) {
+ timecheck = timechanged;
+ gethrestime(&now);
+ rqtp = &now; /* non-blocking check for pending signals */
+ }
+
+ t->t_sigwait = set;
+
+ mutex_enter(&p->p_lock);
+ /*
+ * set the thread's signal mask to unmask those signals in the
+ * specified set.
+ */
+ schedctl_finish_sigblock(t);
+ oldmask = t->t_hold;
+ sigdiffset(&t->t_hold, &t->t_sigwait);
+
+ /*
+ * Based on rqtp, wait indefinitely until we take a signal in our set
+ * or return immediately if there are no signals pending from our set.
+ */
+ while ((ret = cv_waituntil_sig(&t->t_delay_cv, &p->p_lock, rqtp,
+ timecheck)) > 0)
+ continue;
+
+ /* Restore thread's signal mask to its previous value. */
+ t->t_hold = oldmask;
+ t->t_sig_check = 1; /* so post_syscall sees new t_hold mask */
+
+ if (ret == -1) {
+ /* no signals pending */
+ mutex_exit(&p->p_lock);
+ sigemptyset(&t->t_sigwait);
+ return (EAGAIN); /* no signals pending */
+ }
+
+ /* Don't bother with signal if it is not in request set. */
+ if (lwp->lwp_cursig == 0 ||
+ !sigismember(&t->t_sigwait, lwp->lwp_cursig)) {
+ mutex_exit(&p->p_lock);
+ /*
+ * lwp_cursig is zero if pokelwps() awakened cv_wait_sig().
+ * This happens if some other thread in this process called
+ * forkall() or exit().
+ */
+ sigemptyset(&t->t_sigwait);
+ return (EINTR);
+ }
+
+ if (lwp->lwp_curinfo) {
+ infop = &lwp->lwp_curinfo->sq_info;
+ } else {
+ infop = &info;
+ bzero(infop, sizeof (info));
+ infop->si_signo = lwp->lwp_cursig;
+ infop->si_code = SI_NOINFO;
+ }
+
+ lwp->lwp_ru.nsignals++;
+
+ DTRACE_PROC2(signal__clear, int, ret, ksiginfo_t *, infop);
+ lwp->lwp_cursig = 0;
+ lwp->lwp_extsig = 0;
+ mutex_exit(&p->p_lock);
+
+ /* Convert k_siginfo into external, datamodel independent, struct. */
+ bzero(ssp, sizeof (*ssp));
+ ssp->ssi_signo = infop->si_signo;
+ ssp->ssi_errno = infop->si_errno;
+ ssp->ssi_code = infop->si_code;
+ ssp->ssi_pid = infop->si_pid;
+ ssp->ssi_uid = infop->si_uid;
+ ssp->ssi_fd = infop->si_fd;
+ ssp->ssi_band = infop->si_band;
+ ssp->ssi_trapno = infop->si_trapno;
+ ssp->ssi_status = infop->si_status;
+ ssp->ssi_utime = infop->si_utime;
+ ssp->ssi_stime = infop->si_stime;
+ ssp->ssi_addr = (uint64_t)(intptr_t)infop->si_addr;
+
+ ret = uiomove(ssp, sizeof (*ssp), UIO_READ, uio);
+
+ if (lwp->lwp_curinfo) {
+ siginfofree(lwp->lwp_curinfo);
+ lwp->lwp_curinfo = NULL;
+ }
+ sigemptyset(&t->t_sigwait);
+ return (ret);
+}
+
+/*
+ * This is similar to sigtimedwait. Based on the fd mode we may wait until a
+ * signal within our specified set is posted. We consume as many available
+ * signals within our set as we can.
+ */
+_NOTE(ARGSUSED(2))
+static int
+signalfd_read(dev_t dev, uio_t *uio, cred_t *cr)
+{
+ signalfd_state_t *state;
+ minor_t minor = getminor(dev);
+ boolean_t block = B_TRUE;
+ k_sigset_t set;
+ boolean_t got_one = B_FALSE;
+ int res;
+
+ if (uio->uio_resid < sizeof (signalfd_siginfo_t))
+ return (EINVAL);
+
+ state = ddi_get_soft_state(signalfd_softstate, minor);
+
+ if (uio->uio_fmode & (FNDELAY|FNONBLOCK))
+ block = B_FALSE;
+
+ mutex_enter(&state->sfd_lock);
+ set = state->sfd_set;
+ mutex_exit(&state->sfd_lock);
+
+ if (sigisempty(&set))
+ return (set_errno(EINVAL));
+
+ do {
+ res = consume_signal(state->sfd_set, uio, block);
+ if (res == 0)
+ got_one = B_TRUE;
+
+ /*
+ * After consuming one signal we won't block trying to consume
+ * further signals.
+ */
+ block = B_FALSE;
+ } while (res == 0 && uio->uio_resid >= sizeof (signalfd_siginfo_t));
+
+ if (got_one)
+ res = 0;
+
+ return (res);
+}
+
+/*
+ * If ksigset_t's were a single word, we would do:
+ * return (((p->p_sig | t->t_sig) & set) & fillset);
+ */
+static int
+signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set)
+{
+ return (((p->p_sig.__sigbits[0] | t->t_sig.__sigbits[0]) &
+ set.__sigbits[0]) |
+ ((p->p_sig.__sigbits[1] | t->t_sig.__sigbits[1]) &
+ set.__sigbits[1]) |
+ (((p->p_sig.__sigbits[2] | t->t_sig.__sigbits[2]) &
+ set.__sigbits[2]) & FILLSET2));
+}
+
+_NOTE(ARGSUSED(4))
+static int
+signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ signalfd_state_t *state;
+ minor_t minor = getminor(dev);
+ kthread_t *t = curthread;
+ proc_t *p = ttoproc(t);
+ short revents = 0;
+
+ state = ddi_get_soft_state(signalfd_softstate, minor);
+
+ mutex_enter(&state->sfd_lock);
+
+ if (signalfd_sig_pending(p, t, state->sfd_set) != 0)
+ revents |= POLLRDNORM | POLLIN;
+
+ mutex_exit(&state->sfd_lock);
+
+ if (!(*reventsp = revents & events) && !anyyet) {
+ *phpp = &state->sfd_pollhd;
+
+ /*
+ * Enable pollwakeup handling.
+ */
+ if (p->p_sigfd == NULL) {
+ sigfd_proc_state_t *pstate;
+
+ pstate = kmem_zalloc(sizeof (sigfd_proc_state_t),
+ KM_SLEEP);
+ list_create(&pstate->sigfd_list,
+ sizeof (sigfd_wake_list_t),
+ offsetof(sigfd_wake_list_t, sigfd_wl_lst));
+
+ mutex_enter(&p->p_lock);
+ /* check again now that we're locked */
+ if (p->p_sigfd == NULL) {
+ p->p_sigfd = pstate;
+ } else {
+ /* someone beat us to it */
+ list_destroy(&pstate->sigfd_list);
+ kmem_free(pstate, sizeof (sigfd_proc_state_t));
+ }
+ mutex_exit(&p->p_lock);
+ }
+
+ mutex_enter(&p->p_lock);
+ if (((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb ==
+ NULL) {
+ ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb =
+ signalfd_pollwake_cb;
+ }
+ signalfd_wake_list_add(state);
+ mutex_exit(&p->p_lock);
+ }
+
+ return (0);
+}
+
+_NOTE(ARGSUSED(4))
+static int
+signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
+{
+ signalfd_state_t *state;
+ minor_t minor = getminor(dev);
+ sigset_t mask;
+
+ state = ddi_get_soft_state(signalfd_softstate, minor);
+
+ switch (cmd) {
+ case SIGNALFDIOC_MASK:
+ if (ddi_copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t),
+ md) != 0)
+ return (set_errno(EFAULT));
+
+ mutex_enter(&state->sfd_lock);
+ sigutok(&mask, &state->sfd_set);
+ mutex_exit(&state->sfd_lock);
+
+ return (0);
+
+ default:
+ break;
+ }
+
+ return (ENOTTY);
+}
+
+_NOTE(ARGSUSED(1))
+static int
+signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
+{
+ signalfd_state_t *state, **sp;
+ minor_t minor = getminor(dev);
+ proc_t *p = curproc;
+
+ state = ddi_get_soft_state(signalfd_softstate, minor);
+
+ if (state->sfd_pollhd.ph_list != NULL) {
+ pollwakeup(&state->sfd_pollhd, POLLERR);
+ pollhead_clean(&state->sfd_pollhd);
+ }
+
+ /* Make sure our state is removed from our proc's pollwake list. */
+ mutex_enter(&p->p_lock);
+ signalfd_wake_list_rm(p, state);
+ mutex_exit(&p->p_lock);
+
+ mutex_enter(&signalfd_lock);
+
+ /* Remove our state from our global list. */
+ for (sp = &signalfd_state; *sp != state; sp = &((*sp)->sfd_next))
+ VERIFY(*sp != NULL);
+
+ *sp = (*sp)->sfd_next;
+
+ ddi_soft_state_free(signalfd_softstate, minor);
+ id_free(signalfd_minor, minor);
+
+ mutex_exit(&signalfd_lock);
+
+ return (0);
+}
+
+static int
+signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
+{
+ if (cmd != DDI_ATTACH || signalfd_devi != NULL)
+ return (DDI_FAILURE);
+
+ mutex_enter(&signalfd_lock);
+
+ signalfd_minor = id_space_create("signalfd_minor", 1, L_MAXMIN32 + 1);
+ if (signalfd_minor == NULL) {
+ cmn_err(CE_WARN, "signalfd couldn't create id space");
+ mutex_exit(&signalfd_lock);
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_soft_state_init(&signalfd_softstate,
+ sizeof (signalfd_state_t), 0) != 0) {
+ cmn_err(CE_WARN, "signalfd failed to create soft state");
+ id_space_destroy(signalfd_minor);
+ mutex_exit(&signalfd_lock);
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_create_minor_node(devi, "signalfd", S_IFCHR,
+ SIGNALFDMNRN_SIGNALFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
+ cmn_err(CE_NOTE, "/dev/signalfd couldn't create minor node");
+ ddi_soft_state_fini(&signalfd_softstate);
+ id_space_destroy(signalfd_minor);
+ mutex_exit(&signalfd_lock);
+ return (DDI_FAILURE);
+ }
+
+ ddi_report_dev(devi);
+ signalfd_devi = devi;
+
+ sigfd_exit_helper = signalfd_exit_helper;
+
+ mutex_exit(&signalfd_lock);
+
+ return (DDI_SUCCESS);
+}
+
+_NOTE(ARGSUSED(0))
+static int
+signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ switch (cmd) {
+ case DDI_DETACH:
+ break;
+
+ default:
+ return (DDI_FAILURE);
+ }
+
+ /* list should be empty */
+ VERIFY(signalfd_state == NULL);
+
+ mutex_enter(&signalfd_lock);
+ id_space_destroy(signalfd_minor);
+
+ ddi_remove_minor_node(signalfd_devi, NULL);
+ signalfd_devi = NULL;
+ sigfd_exit_helper = NULL;
+
+ ddi_soft_state_fini(&signalfd_softstate);
+ mutex_exit(&signalfd_lock);
+
+ return (DDI_SUCCESS);
+}
+
+_NOTE(ARGSUSED(0))
+static int
+signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
+{
+ int error;
+
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *result = (void *)signalfd_devi;
+ error = DDI_SUCCESS;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)0;
+ error = DDI_SUCCESS;
+ break;
+ default:
+ error = DDI_FAILURE;
+ }
+ return (error);
+}
+
+static struct cb_ops signalfd_cb_ops = {
+ signalfd_open, /* open */
+ signalfd_close, /* close */
+ nulldev, /* strategy */
+ nulldev, /* print */
+ nodev, /* dump */
+ signalfd_read, /* read */
+ nodev, /* write */
+ signalfd_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ signalfd_poll, /* poll */
+ ddi_prop_op, /* cb_prop_op */
+ 0, /* streamtab */
+ D_NEW | D_MP /* Driver compatibility flag */
+};
+
+static struct dev_ops signalfd_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* refcnt */
+ signalfd_info, /* get_dev_info */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ signalfd_attach, /* attach */
+ signalfd_detach, /* detach */
+ nodev, /* reset */
+ &signalfd_cb_ops, /* driver operations */
+ NULL, /* bus operations */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* module type (this is a pseudo driver) */
+ "signalfd support", /* name of module */
+ &signalfd_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
diff --git a/usr/src/uts/common/io/signalfd.conf b/usr/src/uts/common/io/signalfd.conf
new file mode 100644
index 0000000000..de44738a14
--- /dev/null
+++ b/usr/src/uts/common/io/signalfd.conf
@@ -0,0 +1,16 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+name="signalfd" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c
index c5d54b5978..f0c0983a3a 100644
--- a/usr/src/uts/common/os/exit.c
+++ b/usr/src/uts/common/os/exit.c
@@ -455,6 +455,14 @@ proc_exit(int why, int what)
(*dtrace_helpers_cleanup)();
}
+ /*
+ * Clean up any signalfd state for the process.
+ */
+ if (p->p_sigfd != NULL) {
+ VERIFY(sigfd_exit_helper != NULL);
+ (*sigfd_exit_helper)();
+ }
+
/* untimeout the realtime timers */
if (p->p_itimer != NULL)
timer_exit();
diff --git a/usr/src/uts/common/os/sig.c b/usr/src/uts/common/os/sig.c
index 0b79c3765a..453b1f22d4 100644
--- a/usr/src/uts/common/os/sig.c
+++ b/usr/src/uts/common/os/sig.c
@@ -60,6 +60,7 @@
#include <sys/cyclic.h>
#include <sys/dtrace.h>
#include <sys/sdt.h>
+#include <sys/signalfd.h>
const k_sigset_t nullsmask = {0, 0, 0};
@@ -94,6 +95,12 @@ const k_sigset_t holdvfork =
static int isjobstop(int);
static void post_sigcld(proc_t *, sigqueue_t *);
+
+/*
+ * signalfd helper function which is set when the signalfd driver loads.
+ */
+void (*sigfd_exit_helper)();
+
/*
* Internal variables for counting number of user thread stop requests posted.
* They may not be accurate at some special situation such as that a virtually
@@ -307,6 +314,11 @@ sigtoproc(proc_t *p, kthread_t *t, int sig)
(void) eat_signal(t, sig);
thread_unlock(t);
DTRACE_PROC2(signal__send, kthread_t *, t, int, sig);
+ if (p->p_sigfd != NULL && ((sigfd_proc_state_t *)
+ (p->p_sigfd))->sigfd_pollwake_cb != NULL)
+ (*((sigfd_proc_state_t *)(p->p_sigfd))->
+ sigfd_pollwake_cb)(p, sig);
+
} else if ((tt = p->p_tlist) != NULL) {
/*
* Make sure that some lwp that already exists
@@ -345,6 +357,10 @@ sigtoproc(proc_t *p, kthread_t *t, int sig)
}
DTRACE_PROC2(signal__send, kthread_t *, tt, int, sig);
+ if (p->p_sigfd != NULL && ((sigfd_proc_state_t *)
+ (p->p_sigfd))->sigfd_pollwake_cb != NULL)
+ (*((sigfd_proc_state_t *)(p->p_sigfd))->
+ sigfd_pollwake_cb)(p, sig);
}
}
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index ed02832500..94c09029b5 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -509,6 +509,7 @@ CHKHDRS= \
sid.h \
siginfo.h \
signal.h \
+ signalfd.h \
skein.h \
sleepq.h \
smbios.h \
diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h
index f1a2fc5485..5abf8fd3cd 100644
--- a/usr/src/uts/common/sys/proc.h
+++ b/usr/src/uts/common/sys/proc.h
@@ -47,6 +47,7 @@
#include <sys/list.h>
#include <sys/avl.h>
#include <sys/door_impl.h>
+#include <sys/signalfd.h>
#ifdef __cplusplus
extern "C" {
@@ -197,6 +198,7 @@ typedef struct proc {
k_sigset_t p_extsig; /* signals sent from another contract */
k_sigset_t p_ignore; /* ignore when generated */
k_sigset_t p_siginfo; /* gets signal info with signal */
+ void *p_sigfd; /* signalfd support state */
struct sigqueue *p_sigqueue; /* queued siginfo structures */
struct sigqhdr *p_sigqhdr; /* hdr to sigqueue structure pool */
struct sigqhdr *p_signhdr; /* hdr to signotify structure pool */
diff --git a/usr/src/uts/common/sys/signalfd.h b/usr/src/uts/common/sys/signalfd.h
new file mode 100644
index 0000000000..2661d5a05f
--- /dev/null
+++ b/usr/src/uts/common/sys/signalfd.h
@@ -0,0 +1,100 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Header file to support the signalfd facility. Note that this facility
+ * is designed to be binary compatible with the Linux signalfd facility, modulo
+ * the signals themselves; values for constants here should therefore exactly
+ * match those found in Linux, and this facility shouldn't be extended
+ * independently of Linux.
+ */
+
+#ifndef _SYS_SIGNALFD_H
+#define _SYS_SIGNALFD_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * To assure binary compatibility with Linux, these values are fixed at their
+ * Linux equivalents, not their native ones.
+ */
+#define SFD_CLOEXEC 02000000 /* LX_O_CLOEXEC */
+#define SFD_NONBLOCK 04000 /* LX_O_NONBLOCK */
+
+/*
+ * These ioctl values are specific to the native implementation; applications
+ * shouldn't be using them directly, and they should therefore be safe to
+ * change without breaking apps.
+ */
+#define SIGNALFDIOC (('s' << 24) | ('f' << 16) | ('d' << 8))
+#define SIGNALFDIOC_MASK (SIGNALFDIOC | 1) /* set mask */
+
+typedef struct signalfd_siginfo {
+ uint32_t ssi_signo; /* signal from signal.h */
+ int32_t ssi_errno; /* error from errno.h */
+ int32_t ssi_code; /* signal code */
+ uint32_t ssi_pid; /* PID of sender */
+ uint32_t ssi_uid; /* real UID of sender */
+ int32_t ssi_fd; /* File descriptor (SIGIO) */
+ uint32_t ssi_tid; /* unused */
+ uint32_t ssi_band; /* band event (SIGIO) */
+ uint32_t ssi_overrun; /* unused */
+ uint32_t ssi_trapno; /* trap number that caused signal */
+ int32_t ssi_status; /* exit status or signal (SIGCHLD) */
+ int32_t ssi_int; /* unused */
+ uint64_t ssi_ptr; /* unused */
+ uint64_t ssi_utime; /* user CPU time consumed (SIGCHLD) */
+ uint64_t ssi_stime; /* system CPU time consumed (SIGCHLD) */
+ uint64_t ssi_addr; /* address that generated signal */
+ uint8_t ssi_pad[48]; /* Pad size to 128 bytes to allow for */
+ /* additional fields in the future. */
+} signalfd_siginfo_t;
+
+#ifndef _KERNEL
+
+extern int signalfd(int, const sigset_t *, int);
+
+#else
+
+#define SIGNALFDMNRN_SIGNALFD 0
+#define SIGNALFDMNRN_CLONE 1
+
+typedef struct sigfd_wake_list {
+ list_node_t sigfd_wl_lst;
+ void *sigfd_wl_state;
+} sigfd_wake_list_t;
+
+/*
+ * This holds the proc_t state for a process which is using signalfd.
+ */
+typedef struct sigfd_proc_state {
+ void (*sigfd_pollwake_cb)(void *, int);
+ list_t sigfd_list;
+} sigfd_proc_state_t;
+
+
+extern void (*sigfd_exit_helper)();
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SIGNALFD_H */
diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h
index 188230d61e..fd6a60c65e 100644
--- a/usr/src/uts/common/sys/thread.h
+++ b/usr/src/uts/common/sys/thread.h
@@ -164,7 +164,7 @@ typedef struct _kthread {
k_sigset_t t_sig; /* signals pending to this process */
k_sigset_t t_extsig; /* signals sent from another contract */
k_sigset_t t_hold; /* hold signal bit mask */
- k_sigset_t t_sigwait; /* sigtimedwait() is accepting these */
+ k_sigset_t t_sigwait; /* sigtimedwait/sigfd accepting these */
struct _kthread *t_forw; /* process's forward thread link */
struct _kthread *t_back; /* process's backward thread link */
struct _kthread *t_thlink; /* tid (lwpid) lookup hash link */
diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel
index d055d0a8d1..79aa34879f 100644
--- a/usr/src/uts/intel/Makefile.intel
+++ b/usr/src/uts/intel/Makefile.intel
@@ -324,6 +324,7 @@ DRV_KMODS += sd
DRV_KMODS += sdhost
DRV_KMODS += sgen
DRV_KMODS += si3124
+DRV_KMODS += signalfd
DRV_KMODS += smbios
DRV_KMODS += skd
DRV_KMODS += softmac
diff --git a/usr/src/uts/intel/signalfd/Makefile b/usr/src/uts/intel/signalfd/Makefile
new file mode 100644
index 0000000000..d1a461c2f1
--- /dev/null
+++ b/usr/src/uts/intel/signalfd/Makefile
@@ -0,0 +1,68 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = signalfd
+OBJECTS = $(SIGNALFD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(SIGNALFD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+CERRWARN += -_gcc=-Wno-parentheses
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/sparc/Makefile.sparc b/usr/src/uts/sparc/Makefile.sparc
index 41a57721e8..e677f5363f 100644
--- a/usr/src/uts/sparc/Makefile.sparc
+++ b/usr/src/uts/sparc/Makefile.sparc
@@ -239,6 +239,7 @@ DRV_KMODS += bridge trill
DRV_KMODS += bpf
DRV_KMODS += dca
DRV_KMODS += eventfd
+DRV_KMODS += signalfd
#
# Hardware Drivers in common space
diff --git a/usr/src/uts/sparc/signalfd/Makefile b/usr/src/uts/sparc/signalfd/Makefile
new file mode 100644
index 0000000000..a60bc617e1
--- /dev/null
+++ b/usr/src/uts/sparc/signalfd/Makefile
@@ -0,0 +1,68 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = signalfd
+OBJECTS = $(SIGNALFD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(SIGNALFD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sparc/Makefile.sparc
+
+CERRWARN += -_gcc=-Wno-parentheses
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/sparc/Makefile.targ