summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan McDonald <danmcd@mnx.io>2022-10-19 11:35:41 -0400
committerDan McDonald <danmcd@mnx.io>2022-10-19 11:35:41 -0400
commit00cae703489661f6bf04c41dfbc11cec67ed2d46 (patch)
treef0126f9faef999491d89c8c56555b510a6f2d910
parent15d74ce20c96665020b37b5b2323f5bd1bf71afa (diff)
parentf23ed011dd1990f5b6b2d755feeaa7baf5a22caa (diff)
downloadillumos-joyent-release-20221020.tar.gz
[illumos-gate merge]release-20221020
commit f23ed011dd1990f5b6b2d755feeaa7baf5a22caa 15036 portfs wears inadequate pollcache disguise
-rw-r--r--usr/src/pkg/manifests/system-test-ostest.p5m1
-rw-r--r--usr/src/test/os-tests/runfiles/default.run5
-rw-r--r--usr/src/test/os-tests/tests/regression/Makefile3
-rw-r--r--usr/src/test/os-tests/tests/regression/illumos-15036.c123
-rw-r--r--usr/src/uts/common/fs/portfs/port.c2
-rw-r--r--usr/src/uts/common/sys/poll_impl.h15
-rw-r--r--usr/src/uts/common/sys/port_kernel.h13
-rw-r--r--usr/src/uts/common/syscall/poll.c8
8 files changed, 160 insertions, 10 deletions
diff --git a/usr/src/pkg/manifests/system-test-ostest.p5m b/usr/src/pkg/manifests/system-test-ostest.p5m
index 100f109732..e2e4c9450a 100644
--- a/usr/src/pkg/manifests/system-test-ostest.p5m
+++ b/usr/src/pkg/manifests/system-test-ostest.p5m
@@ -105,6 +105,7 @@ file path=opt/os-tests/tests/portfs/file_assoc.32 mode=0555
file path=opt/os-tests/tests/portfs/file_assoc.64 mode=0555
dir path=opt/os-tests/tests/regression
file path=opt/os-tests/tests/regression/illumos-15031 mode=0555
+file path=opt/os-tests/tests/regression/illumos-15036 mode=0555
dir path=opt/os-tests/tests/sdevfs
file path=opt/os-tests/tests/sdevfs/sdevfs_eisdir mode=0555
dir path=opt/os-tests/tests/secflags
diff --git a/usr/src/test/os-tests/runfiles/default.run b/usr/src/test/os-tests/runfiles/default.run
index 49156ea42b..612563ebcd 100644
--- a/usr/src/test/os-tests/runfiles/default.run
+++ b/usr/src/test/os-tests/runfiles/default.run
@@ -148,4 +148,7 @@ tests = ['coretests']
tests = ['file_assoc.32', 'file_assoc.64']
[/opt/os-tests/tests/regression]
-tests = ['illumos-15031']
+tests = [
+ 'illumos-15031',
+ 'illumos-15036'
+ ]
diff --git a/usr/src/test/os-tests/tests/regression/Makefile b/usr/src/test/os-tests/tests/regression/Makefile
index 1536e83ca6..93a365167b 100644
--- a/usr/src/test/os-tests/tests/regression/Makefile
+++ b/usr/src/test/os-tests/tests/regression/Makefile
@@ -14,7 +14,8 @@
#
PROGS = \
- illumos-15031
+ illumos-15031 \
+ illumos-15036
ROOTOPTDIR = $(ROOT)/opt/os-tests/tests
ROOTOPTREGRESSION = $(ROOTOPTDIR)/regression
diff --git a/usr/src/test/os-tests/tests/regression/illumos-15036.c b/usr/src/test/os-tests/tests/regression/illumos-15036.c
new file mode 100644
index 0000000000..7530738c2d
--- /dev/null
+++ b/usr/src/test/os-tests/tests/regression/illumos-15036.c
@@ -0,0 +1,123 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <port.h>
+#include <err.h>
+#include <assert.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+#include <sys/poll.h>
+
+/*
+ * Attempt to trigger the #15036 regression. This depends on a subsequent
+ * allocation in the same slab setting a bit at a specific offset, so reliable
+ * detection of the issue is a challenge. This test uses brute force in its
+ * attempt, but cannot guarantee to trigger the behavior on a system affected by
+ * the issue.
+ */
+
+/*
+ * We need regular files on a regular filesystem for fs_poll to be called.
+ * Assume that we can find some in our own tests dir.
+ *
+ * As for repetitions, this is not especially consistent, so really hammer
+ * things out of brute force.
+ */
+#define FILE_SRC "/opt/os-tests/tests"
+#define FILE_COUNT 10
+#define TEST_REPEAT 10000
+
+static uint_t
+find_test_files(const char *dir, uint_t count, int *result_fds)
+{
+ assert(count > 0);
+
+ DIR *dirp;
+
+ dirp = opendir(dir);
+ if (dirp == NULL) {
+ return (0);
+ }
+
+ dirent_t *de;
+ uint_t nvalid = 0;
+ while ((de = readdir(dirp)) != NULL) {
+ char path[MAXPATHLEN];
+ struct stat st;
+
+ (void) snprintf(path, sizeof (path), "%s/%s", dir, de->d_name);
+ if (lstat(path, &st) != 0 || (st.st_mode & S_IFREG) == 0) {
+ continue;
+ }
+ result_fds[nvalid] = open(path, O_RDONLY, 0);
+ if (result_fds[nvalid] < 0) {
+ continue;
+ }
+
+ nvalid++;
+ if (nvalid == count) {
+ break;
+ }
+ }
+
+ (void) closedir(dirp);
+ return (nvalid);
+}
+
+int
+main(int argc, char *argv[])
+{
+ int poll_fds[FILE_COUNT];
+
+ if (find_test_files(FILE_SRC, FILE_COUNT, poll_fds) != FILE_COUNT) {
+ errx(EXIT_FAILURE, "FAIL - count not open test files to poll");
+ }
+
+ for (uint_t i = 0; i < TEST_REPEAT; i++) {
+ int port_fds[FILE_COUNT];
+
+ for (uint_t j = 0; j < FILE_COUNT; j++) {
+ port_fds[j] = port_create();
+ if (port_fds[j] < 0) {
+ err(EXIT_FAILURE, "FAIL - port_create()");
+ }
+
+ int res = port_associate(port_fds[j], PORT_SOURCE_FD,
+ (uintptr_t)poll_fds[j], POLLIN, NULL);
+ if (res != 0) {
+ err(EXIT_FAILURE, "FAIL - port_associate()");
+ }
+ }
+
+ for (uint_t j = 0; j < FILE_COUNT; j++) {
+ int res = port_dissociate(port_fds[j], PORT_SOURCE_FD,
+ (uintptr_t)poll_fds[j]);
+ if (res != 0) {
+ err(EXIT_FAILURE, "FAIL - port_dissociate()");
+ }
+ (void) close(port_fds[j]);
+ }
+ }
+
+ (void) printf("PASS\n");
+ return (EXIT_SUCCESS);
+}
diff --git a/usr/src/uts/common/fs/portfs/port.c b/usr/src/uts/common/fs/portfs/port.c
index dd32c82434..c7e069b108 100644
--- a/usr/src/uts/common/fs/portfs/port.c
+++ b/usr/src/uts/common/fs/portfs/port.c
@@ -26,6 +26,7 @@
/*
* Copyright (c) 2015 Joyent, Inc. All rights reserved.
+ * Copyright 2022 Oxide Computer Company
*/
#include <sys/types.h>
@@ -824,6 +825,7 @@ port_init(port_t *pp)
/* Allocate cache skeleton for PORT_SOURCE_FD events */
portq->portq_pcp = kmem_zalloc(sizeof (port_fdcache_t), KM_SLEEP);
mutex_init(&portq->portq_pcp->pc_lock, NULL, MUTEX_DEFAULT, NULL);
+ portq->portq_pcp->pc_flag = PC_PORTFS;
/*
* Allocate cache skeleton for association of event sources.
diff --git a/usr/src/uts/common/sys/poll_impl.h b/usr/src/uts/common/sys/poll_impl.h
index cd0a571c47..388849a14f 100644
--- a/usr/src/uts/common/sys/poll_impl.h
+++ b/usr/src/uts/common/sys/poll_impl.h
@@ -234,9 +234,11 @@ struct polldat {
/*
* One cache for each thread that polls. Points to a bitmap (used by pollwakeup)
* and a hash table of polldats.
- * The offset of pc_lock field must be kept in sync with the pc_lock offset
- * of port_fdcache_t, both structs implement pc_lock with offset 0 (see also
- * pollrelock()).
+ *
+ * Because of the handling required in pollrelock(), portfs abuses the notion of
+ * an active pollcache (t_pollcache), providing its own struct port_fdcache_t.
+ * It has matching pc_lock and pc_flag members at the correct offsets, but none
+ * of its other fields can be accessed (through t_pollcache) safetly.
*/
struct pollcache {
kmutex_t pc_lock; /* lock to protect pollcache */
@@ -260,6 +262,13 @@ struct pollcache {
/* pc_flag */
#define PC_POLLWAKE 0x02 /* pollwakeup() occurred */
#define PC_EPOLL 0x04 /* pollcache is epoll-enabled */
+/*
+ * PC_PORTFS is not a flag for "real" pollcaches, but rather an indicator for
+ * when portfs sets t_pollcache to a port_fdcache_t pointer. If, while
+ * debugging a system, one sees PC_PORTFS in pc_flag, they will know to
+ * disregard the other fields, as it is not a pollcache.
+ */
+#define PC_PORTFS 0x08
#if defined(_KERNEL)
/*
diff --git a/usr/src/uts/common/sys/port_kernel.h b/usr/src/uts/common/sys/port_kernel.h
index 7456f63573..daac6f4927 100644
--- a/usr/src/uts/common/sys/port_kernel.h
+++ b/usr/src/uts/common/sys/port_kernel.h
@@ -22,13 +22,12 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2022 Oxide Computer Company
*/
#ifndef _SYS_PORT_KERNEL_H
#define _SYS_PORT_KERNEL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/vnode.h>
#include <sys/list.h>
@@ -52,7 +51,7 @@ extern "C" {
typedef struct port_kevent {
kmutex_t portkev_lock; /* used by PORT_SOURCE_FD source */
int portkev_source; /* event: source */
- int portkev_events; /* event: data */
+ int portkev_events; /* event: data */
int portkev_flags; /* internal flags */
pid_t portkev_pid; /* pid of process using this struct */
long portkev_object; /* event: object */
@@ -122,8 +121,10 @@ typedef struct portfop_cache {
/*
* PORT_SOURCE_FD cache per port.
* One cache for each port that uses PORT_SOURCE_FD.
- * pc_lock must be the first element of port_fdcache_t to keep it
- * synchronized with the offset of pc_lock in pollcache_t (see pollrelock()).
+ *
+ * The types and offsets of pc_lock and pc_flag must exactly match their sibling
+ * fields in pollcache_t, as they are accessed as if the port_fdcache_t _was_ a
+ * pollcache via t_pollcache. (See: pollrelock() and fs_reject_epoll())
*/
typedef struct port_fdcache {
kmutex_t pc_lock; /* lock to protect portcache */
@@ -131,6 +132,8 @@ typedef struct port_fdcache {
struct portfd **pc_hash; /* points to a hash table of ptrs */
int pc_hashsize; /* the size of current hash table */
int pc_fdcount; /* track how many fd's are hashed */
+ uintptr_t _pc_pad; /* pad to properly offset pc_flag */
+ int pc_flag; /* pollcache flags (compat) */
} port_fdcache_t;
/*
diff --git a/usr/src/uts/common/syscall/poll.c b/usr/src/uts/common/syscall/poll.c
index 373c86c474..a10b2623db 100644
--- a/usr/src/uts/common/syscall/poll.c
+++ b/usr/src/uts/common/syscall/poll.c
@@ -279,6 +279,14 @@ pollunlock(int *lockstate)
return (0);
}
+/*
+ * The pc_lock and pc_flag fields of port_fdcache_t must exactly match those of
+ * pollcache_t as they are accessed through t_pollcache as if they were part of
+ * a "real" pollcache.
+ */
+CTASSERT(offsetof(pollcache_t, pc_lock) == offsetof(port_fdcache_t, pc_lock));
+CTASSERT(offsetof(pollcache_t, pc_flag) == offsetof(port_fdcache_t, pc_flag));
+
void
pollrelock(int lockstate)
{