summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/sys
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/sys')
-rw-r--r--usr/src/uts/common/sys/devpoll.h5
-rw-r--r--usr/src/uts/common/sys/poll.h6
-rw-r--r--usr/src/uts/common/sys/poll_impl.h106
3 files changed, 92 insertions, 25 deletions
diff --git a/usr/src/uts/common/sys/devpoll.h b/usr/src/uts/common/sys/devpoll.h
index 4e4c76d9b0..3b6bd159c3 100644
--- a/usr/src/uts/common/sys/devpoll.h
+++ b/usr/src/uts/common/sys/devpoll.h
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _SYS_DEVPOLL_H
@@ -88,9 +88,6 @@ typedef struct dp_entry {
mutex_enter(&(dpep)->dpe_lock); \
ASSERT((dpep)->dpe_refcnt > 0); \
(dpep)->dpe_refcnt--; \
- if ((dpep)->dpe_refcnt == 0) { \
- cv_broadcast(&(dpep)->dpe_cv); \
- } \
mutex_exit(&(dpep)->dpe_lock); \
}
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/sys/poll.h b/usr/src/uts/common/sys/poll.h
index efc8457a6a..75a588533f 100644
--- a/usr/src/uts/common/sys/poll.h
+++ b/usr/src/uts/common/sys/poll.h
@@ -31,7 +31,7 @@
*/
/*
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _SYS_POLL_H
@@ -130,8 +130,8 @@ extern void pollwakeup(pollhead_t *, short);
/*
* Internal routines.
*/
-extern void polllock(pollhead_t *, kmutex_t *);
-extern int pollunlock(void);
+extern int polllock(pollhead_t *, kmutex_t *);
+extern int pollunlock(int *);
extern void pollrelock(int);
extern void pollcleanup(void);
extern void pollblockexit(struct fpollinfo *);
diff --git a/usr/src/uts/common/sys/poll_impl.h b/usr/src/uts/common/sys/poll_impl.h
index 2e866ec4d4..67b47f9a1e 100644
--- a/usr/src/uts/common/sys/poll_impl.h
+++ b/usr/src/uts/common/sys/poll_impl.h
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _SYS_POLL_IMPL_H
@@ -36,7 +36,7 @@
*
* Each kernel thread (1), if engaged in poll system call, has a reference to
* a pollstate_t (2), which contains relevant flags and locks. The pollstate_t
- * contains a pointer to a pcache_t (3), which caches the state of previous
+ * contains a pointer to a pollcache_t (3), which caches the state of previous
* calls to poll. A bitmap (4) is stored inside the poll cache, where each
* bit represents a file descriptor. The bits are set if the corresponding
* device has a polled event pending. Only fds with their bit set will be
@@ -45,7 +45,7 @@
* structures keep track of the pollfd_t arrays (6) passed in from userland.
* Each polled file descriptor has a corresponding polldat_t which can be
* chained onto a device's pollhead, and these are kept in a hash table (7)
- * inside the pcache_t. The hash table allows efficient conversion of a
+ * inside the pollcache_t. The hash table allows efficient conversion of a
* given fd to its corresponding polldat_t.
*
* (1) (2)
@@ -76,7 +76,7 @@
* Both poll system call and /dev/poll use the pollcache_t structure
* definition and the routines managing the structure. But poll(2) and
* /dev/poll have their own copy of the structures. The /dev/poll driver
- * table (1a) contains an array of pointers, each pointing at a pcache_t
+ * table (1a) contains an array of pointers, each pointing at a pollcache_t
* struct (3). A device minor number is used as an device table index.
*
*/
@@ -86,12 +86,26 @@
#include <sys/thread.h>
#include <sys/file.h>
+#include <sys/port_kernel.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
+ * Typedefs
+ */
+struct pollcache;
+struct pollstate;
+struct pcachelink;
+struct polldat;
+
+typedef struct pollcache pollcache_t;
+typedef struct pollstate pollstate_t;
+typedef struct pcachelink pcachelink_t;
+typedef struct polldat polldat_t;
+
+/*
* description of pollcacheset structure
*/
typedef struct pollcacheset {
@@ -104,18 +118,40 @@ typedef struct pollcacheset {
#define POLLFDSETS 2
/*
+ * Maximum depth for recusive poll operations.
+ */
+#define POLLMAXDEPTH 5
+
+/*
* State information kept by each polling thread
*/
-typedef struct pollstate {
+struct pollstate {
pollfd_t *ps_pollfd; /* hold the current poll list */
size_t ps_nfds; /* size of ps_pollfd */
kmutex_t ps_lock; /* mutex for sleep/wakeup */
- struct pollcache *ps_pcache; /* cached poll fd set */
+ pollcache_t *ps_pcache; /* cached poll fd set */
pollcacheset_t *ps_pcacheset; /* cached poll lists */
int ps_nsets; /* no. of cached poll sets */
pollfd_t *ps_dpbuf; /* return pollfd buf used by devpoll */
size_t ps_dpbufsize; /* size of ps_dpbuf */
-} pollstate_t;
+ int ps_depth; /* epoll recursion depth */
+ pollcache_t *ps_pc_stack[POLLMAXDEPTH]; /* epoll recursion state */
+ pollcache_t *ps_contend_pc; /* pollcache waited on */
+ pollstate_t *ps_contend_nextp; /* next in contender list */
+ pollstate_t **ps_contend_pnextp; /* pointer-to-previous-next */
+ int ps_flags; /* state flags */
+};
+
+/* pollstate flags */
+#define POLLSTATE_STALEMATE 0x1
+#define POLLSTATE_ULFAIL 0x2
+
+/* pollstate_enter results */
+#define PSE_SUCCESS 0
+#define PSE_FAIL_DEPTH 1
+#define PSE_FAIL_LOOP 2
+#define PSE_FAIL_DEADLOCK 3
+#define PSE_FAIL_POLLSTATE 4
/*
* poll cache size defines
@@ -143,27 +179,54 @@ typedef struct xref {
#define POLLPOSINVAL (-1L) /* xf_position is invalid */
#define POLLPOSTRANS (-2L) /* xf_position is transient state */
+
+typedef enum pclstate {
+ PCL_INIT = 0, /* just allocated/zeroed, prior */
+ PCL_VALID, /* linked with both parent and child pollcaches */
+ PCL_STALE, /* still linked but marked stale, pending refresh */
+ PCL_INVALID, /* dissociated from one pollcache, awaiting cleanup */
+ PCL_FREE /* only meant to indicate use-after-free */
+} pclstate_t;
+
+/*
+ * The pcachelink struct creates an association between parent and child
+ * pollcaches in a recursive /dev/poll operation. Fields are protected by
+ * pcl_lock although manipulation of pcl_child_next or pcl_parent_next also
+ * requires holding pc_lock in the respective pcl_parent_pc or pcl_child_pc
+ * pollcache.
+ */
+struct pcachelink {
+ kmutex_t pcl_lock; /* protects contents */
+ pclstate_t pcl_state; /* status of link entry */
+ int pcl_refcnt; /* ref cnt of linked pcaches */
+ pollcache_t *pcl_child_pc; /* child pollcache */
+ pollcache_t *pcl_parent_pc; /* parent pollcache */
+ pcachelink_t *pcl_child_next; /* next in child list */
+ pcachelink_t *pcl_parent_next; /* next in parents list */
+};
+
+
/*
* polldat is an entry for a cached poll fd. A polldat struct can be in
* poll cache table as well as on pollhead ph_list, which is used by
* pollwakeup to wake up a sleeping poller. There should be one polldat
* per polled fd hanging off pollstate struct.
*/
-typedef struct polldat {
+struct polldat {
int pd_fd; /* cached poll fd */
int pd_events; /* union of all polled events */
file_t *pd_fp; /* used to detect fd reuse */
pollhead_t *pd_php; /* used to undo poll registration */
kthread_t *pd_thread; /* used for waking up a sleep thrd */
- struct pollcache *pd_pcache; /* a ptr to the pollcache of this fd */
- struct polldat *pd_next; /* next on pollhead's ph_list */
- struct polldat *pd_hashnext; /* next on pollhead's ph_list */
+ pollcache_t *pd_pcache; /* a ptr to the pollcache of this fd */
+ polldat_t *pd_next; /* next on pollhead's ph_list */
+ polldat_t *pd_hashnext; /* next on pollhead's ph_list */
int pd_count; /* total count from all ref'ed sets */
int pd_nsets; /* num of xref sets, used by poll(2) */
xref_t *pd_ref; /* ptr to xref info, 1 for each set */
- struct port_kevent *pd_portev; /* associated port event struct */
+ port_kevent_t *pd_portev; /* associated port event struct */
uint64_t pd_epolldata; /* epoll data, if any */
-} polldat_t;
+};
/*
* One cache for each thread that polls. Points to a bitmap (used by pollwakeup)
@@ -172,7 +235,7 @@ typedef struct polldat {
* of port_fdcache_t, both structs implement pc_lock with offset 0 (see also
* pollrelock()).
*/
-typedef struct pollcache {
+struct pollcache {
kmutex_t pc_lock; /* lock to protect pollcache */
ulong_t *pc_bitmap; /* point to poll fd bitmap */
polldat_t **pc_hash; /* points to a hash table of ptrs */
@@ -187,11 +250,12 @@ typedef struct pollcache {
kcondvar_t pc_cv; /* cv to wait on if needed */
pid_t pc_pid; /* for check acc rights, devpoll only */
int pc_mapstart; /* where search start, devpoll only */
-} pollcache_t;
+ pcachelink_t *pc_parents; /* linked list of epoll parents */
+ pcachelink_t *pc_children; /* linked list of epoll children */
+};
/* pc_flag */
#define PC_POLLWAKE 0x02 /* pollwakeup() occurred */
-#define PC_WRITEWANTED 0x04 /* writer wishes to modify the pollcache_t */
#if defined(_KERNEL)
/*
@@ -218,11 +282,15 @@ extern void pollhead_delete(pollhead_t *, polldat_t *);
/*
* poll state interfaces:
*
- * pollstate_create creates per-thread pollstate
- * pollstate_destroy cleans up per-thread pollstate
+ * pollstate_create initializes per-thread pollstate
+ * pollstate_destroy cleans up per-thread pollstate
+ * pollstate_enter safely lock pollcache for pollstate
+ * pollstate_exit unlock pollcache from pollstate
*/
extern pollstate_t *pollstate_create(void);
extern void pollstate_destroy(pollstate_t *);
+extern int pollstate_enter(pollcache_t *);
+extern void pollstate_exit(pollcache_t *);
/*
* public pcache interfaces:
@@ -254,6 +322,7 @@ extern void pcache_destroy(pollcache_t *);
* pcache_grow_map grows the pollcache bitmap
* pcache_update_xref update cross ref (from polldat back to cacheset) info
* pcache_clean_entry cleanup an entry in pcache and more...
+ * pcache_wake_parents wake linked parent pollcaches
*/
extern polldat_t *pcache_lookup_fd(pollcache_t *, int);
extern polldat_t *pcache_alloc_fd(int);
@@ -263,6 +332,7 @@ extern void pcache_grow_hashtbl(pollcache_t *, nfds_t);
extern void pcache_grow_map(pollcache_t *, int);
extern void pcache_update_xref(pollcache_t *, int, ssize_t, int);
extern void pcache_clean_entry(pollstate_t *, int);
+extern void pcache_wake_parents(pollcache_t *);
/*
* pcacheset interfaces: