summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2016-10-25 19:23:39 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2016-10-25 19:24:15 +0000
commit3125ad3bb6fb793491e3fc152ef05eb4cd221797 (patch)
treedc78de15ad1f4b94e7e60f01ddbe0c2fbc7eebbd
parent084bab208e0b7c16aa583ebb268d2886e066a4b8 (diff)
downloadillumos-joyent-3125ad3bb6fb793491e3fc152ef05eb4cd221797.tar.gz
OS-5717 lx sched* syscalls should work while running under FSS
OS-5718 lx sched_setscheduler should not return previous policy OS-5524 sched_setscheduler missing support for SCHED_BATCH and SCHED_IDLE OS-5733 missing sched_setattr and sched_getattr Reviewed by: Patrick Mooney <patrick.mooney@joyent.com> Approved by: Patrick Mooney <patrick.mooney@joyent.com>
-rw-r--r--usr/src/lib/brand/lx/lx_brand/Makefile.com1
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/lx_brand.c28
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/sched.c476
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h8
-rw-r--r--usr/src/lib/brand/lx/testing/ltp_skiplist14
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_misc.c11
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_syscall.c36
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_brand.h15
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_sched.h60
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_syscalls.h4
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_sched.c582
11 files changed, 568 insertions, 667 deletions
diff --git a/usr/src/lib/brand/lx/lx_brand/Makefile.com b/usr/src/lib/brand/lx/lx_brand/Makefile.com
index e653fecd58..53f5246834 100644
--- a/usr/src/lib/brand/lx/lx_brand/Makefile.com
+++ b/usr/src/lib/brand/lx/lx_brand/Makefile.com
@@ -45,7 +45,6 @@ COBJS = aio.o \
mount_nfs.o \
priority.o \
ptrace.o \
- sched.o \
sendfile.o \
signal.o \
stack.o \
diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c
index d98b73941e..5724b6cbba 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c
@@ -1153,13 +1153,13 @@ static lx_syscall_handler_t lx_handlers[] = {
lx_sysfs, /* 139: sysfs */
lx_getpriority, /* 140: getpriority */
lx_setpriority, /* 141: setpriority */
- lx_sched_setparam, /* 142: sched_setparam */
- lx_sched_getparam, /* 143: sched_getparam */
- lx_sched_setscheduler, /* 144: sched_setscheduler */
- lx_sched_getscheduler, /* 145: sched_getscheduler */
- lx_sched_get_priority_max, /* 146: sched_get_priority_max */
- lx_sched_get_priority_min, /* 147: sched_get_priority_min */
- lx_sched_rr_get_interval, /* 148: sched_rr_get_interval */
+ NULL, /* 142: sched_setparam */
+ NULL, /* 143: sched_getparam */
+ NULL, /* 144: sched_setscheduler */
+ NULL, /* 145: sched_getscheduler */
+ NULL, /* 146: sched_get_priority_max */
+ NULL, /* 147: sched_get_priority_min */
+ NULL, /* 148: sched_rr_get_interval */
lx_mlock, /* 149: mlock */
lx_munlock, /* 150: munlock */
lx_mlockall, /* 151: mlockall */
@@ -1496,14 +1496,14 @@ static lx_syscall_handler_t lx_handlers[] = {
lx_munlock, /* 151: munlock */
lx_mlockall, /* 152: mlockall */
lx_munlockall, /* 153: munlockall */
- lx_sched_setparam, /* 154: sched_setparam */
- lx_sched_getparam, /* 155: sched_getparam */
- lx_sched_setscheduler, /* 156: sched_setscheduler */
- lx_sched_getscheduler, /* 157: sched_getscheduler */
+ NULL, /* 154: sched_setparam */
+ NULL, /* 155: sched_getparam */
+ NULL, /* 156: sched_setscheduler */
+ NULL, /* 157: sched_getscheduler */
NULL, /* 158: sched_yield */
- lx_sched_get_priority_max, /* 159: sched_get_priority_max */
- lx_sched_get_priority_min, /* 160: sched_get_priority_min */
- lx_sched_rr_get_interval, /* 161: sched_rr_get_interval */
+ NULL, /* 159: sched_get_priority_max */
+ NULL, /* 160: sched_get_priority_min */
+ NULL, /* 161: sched_rr_get_interval */
NULL, /* 162: nanosleep */
lx_remap, /* 163: mremap */
NULL, /* 164: setresuid16 */
diff --git a/usr/src/lib/brand/lx/lx_brand/common/sched.c b/usr/src/lib/brand/lx/lx_brand/common/sched.c
deleted file mode 100644
index 80b7660985..0000000000
--- a/usr/src/lib/brand/lx/lx_brand/common/sched.c
+++ /dev/null
@@ -1,476 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- * Copyright 2016 Joyent, Inc.
- */
-
-#include <sys/types.h>
-#include <sys/cred_impl.h>
-#include <sys/ucred.h>
-#include <ucred.h>
-#include <stdlib.h>
-#include <signal.h>
-#include <errno.h>
-#include <sched.h>
-#include <strings.h>
-#include <pthread.h>
-#include <time.h>
-#include <thread.h>
-#include <alloca.h>
-#include <unistd.h>
-#include <sys/syscall.h>
-#include <sys/lx_syscall.h>
-#include <sys/lx_debug.h>
-#include <sys/lx_brand.h>
-#include <sys/lx_misc.h>
-#include <sys/lx_sched.h>
-
-/* Linux only has three valid policies, SCHED_FIFO, SCHED_RR and SCHED_OTHER */
-static int
-validate_policy(int policy)
-{
- switch (policy) {
- case LX_SCHED_FIFO:
- return (SCHED_FIFO);
-
- case LX_SCHED_RR:
- return (SCHED_RR);
-
- case LX_SCHED_OTHER:
- return (SCHED_OTHER);
-
- default:
- lx_debug("validate_policy: illegal policy: %d", policy);
- return (-EINVAL);
- }
-}
-
-/*
- * Check to see if we have the permissions to set scheduler parameters and
- * policy, based on Linux' demand that such commands fail with errno set to
- * EPERM if the current euid is not the euid or ruid of the process in
- * question.
- */
-static int
-check_schedperms(pid_t pid)
-{
- size_t sz;
- ucred_t *cr;
- uid_t euid;
-
- euid = geteuid();
-
- if (pid == getpid()) {
- /*
- * If we're the process to be checked, simply check the euid
- * against our ruid.
- */
- if (euid != getuid())
- return (-EPERM);
-
- return (0);
- }
-
- /*
- * We allocate a ucred_t ourselves rather than call ucred_get(3C)
- * because ucred_get() calls malloc(3C), which the brand library cannot
- * use. Because we allocate the space with SAFE_ALLOCA(), there's
- * no need to free it when we're done.
- */
- sz = ucred_size();
- cr = (ucred_t *)SAFE_ALLOCA(sz);
-
- if (cr == NULL)
- return (-ENOMEM);
-
- /*
- * If we can't access the process' credentials, fail with errno EPERM
- * as the call would not have succeeded anyway.
- */
- if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, pid, cr) != 0)
- return ((errno == EACCES) ? -EPERM : -errno);
-
- if ((euid != ucred_geteuid(cr)) && (euid != ucred_getruid(cr)))
- return (-EPERM);
-
- return (0);
-}
-
-static int
-ltos_sparam(int policy, struct lx_sched_param *lsp, struct sched_param *sp)
-{
- struct lx_sched_param ls;
- int smin = sched_get_priority_min(policy);
- int smax = sched_get_priority_max(policy);
-
- if (uucopy(lsp, &ls, sizeof (struct lx_sched_param)) != 0)
- return (-errno);
-
- bzero(sp, sizeof (struct sched_param));
-
- /*
- * Linux has a fixed priority range, 0 - 99, which we need to convert to
- * Solaris's dynamic range. Linux considers lower numbers to be
- * higher priority, so we'll invert the priority within Solaris's range.
- *
- * The formula to convert between ranges is:
- *
- * L * (smax - smin)
- * S = ----------------- + smin
- * (lmax - lmin)
- *
- * where S is the Solaris equivalent of the linux priority L.
- *
- * To invert the priority, we use:
- * S' = smax - S + smin
- *
- * Together, these two formulas become:
- *
- * L * (smax - smin)
- * S = smax - ----------------- + 2smin
- * 99
- */
- sp->sched_priority = smax -
- ((ls.lx_sched_prio * (smax - smin)) / LX_PRI_MAX) + 2*smin;
-
- lx_debug("ltos_sparam: linux prio %d = Solaris prio %d "
- "(Solaris range %d,%d)\n", ls.lx_sched_prio, sp->sched_priority,
- smin, smax);
-
- return (0);
-}
-
-static int
-stol_sparam(int policy, struct sched_param *sp, struct lx_sched_param *lsp)
-{
- struct lx_sched_param ls;
- int smin = sched_get_priority_min(policy);
- int smax = sched_get_priority_max(policy);
-
- if (policy == SCHED_OTHER) {
- /*
- * In Linux, the only valid SCHED_OTHER scheduler priority is 0
- */
- ls.lx_sched_prio = 0;
- } else {
- /*
- * Convert Solaris's dynamic, inverted priority range to the
- * fixed Linux range of 1 - 99.
- *
- * The formula is (see above):
- *
- * (smax - s + 2smin) * 99
- * l = -----------------------
- * smax - smin
- */
- ls.lx_sched_prio = ((smax - sp->sched_priority + 2*smin) *
- LX_PRI_MAX) / (smax - smin);
- }
-
- lx_debug("stol_sparam: policy %d: Solaris prio %d = linux prio %d "
- "(Solaris range %d,%d)\n", policy,
- sp->sched_priority, ls.lx_sched_prio, smin, smax);
-
- return ((uucopy(&ls, lsp, sizeof (struct lx_sched_param)) != 0)
- ? -errno : 0);
-}
-
-long
-lx_sched_getparam(uintptr_t pid, uintptr_t param)
-{
- int policy, ret;
- pid_t s_pid;
- lwpid_t s_tid;
-
- struct sched_param sp;
-
- if (((pid_t)pid < 0) || (param == NULL))
- return (-EINVAL);
-
- if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
- return (-ESRCH);
-
- /*
- * If we're attempting to get information on our own process, we can
- * get data on a per-thread basis; if not, punt and use the specified
- * pid.
- */
- if (s_pid == getpid()) {
- if ((ret = pthread_getschedparam(s_tid, &policy, &sp)) != 0)
- return (-ret);
- } else {
- if (sched_getparam(s_pid, &sp) == -1)
- return (-errno);
-
- if ((policy = sched_getscheduler(s_pid)) < 0)
- return (-errno);
- }
-
- /*
- * Make sure that any non-SCHED_FIFO non-SCHED_RR scheduler is mapped
- * onto SCHED_OTHER.
- */
- if (policy != SCHED_FIFO && policy != SCHED_RR)
- policy = SCHED_OTHER;
-
- return (stol_sparam(policy, &sp, (struct lx_sched_param *)param));
-}
-
-long
-lx_sched_setparam(uintptr_t pid, uintptr_t param)
-{
- int err, policy;
- pid_t s_pid;
- lwpid_t s_tid;
- struct lx_sched_param lp;
- struct sched_param sp;
-
- if (((pid_t)pid < 0) || (param == NULL))
- return (-EINVAL);
-
- if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
- return (-ESRCH);
-
- if (s_pid == getpid()) {
- struct sched_param dummy;
-
- if ((err = pthread_getschedparam(s_tid, &policy, &dummy)) != 0)
- return (-err);
- } else
- if ((policy = sched_getscheduler(s_pid)) < 0)
- return (-errno);
-
- lx_debug("sched_setparam(): current policy %d", policy);
-
- if (uucopy((void *)param, &lp, sizeof (lp)) != 0)
- return (-errno);
-
- /*
- * In Linux, the only valid SCHED_OTHER scheduler priority is 0
- */
- if ((policy == SCHED_OTHER) && (lp.lx_sched_prio != 0))
- return (-EINVAL);
-
- if ((err = ltos_sparam(policy, (struct lx_sched_param *)&lp,
- &sp)) != 0)
- return (err);
-
- /*
- * Check if we're allowed to change the scheduler for the process.
- *
- * If we're operating on a thread, we can't just call
- * pthread_setschedparam() because as all threads reside within a
- * single Solaris process, Solaris will allow the modification
- *
- * If we're operating on a process, we can't just call sched_setparam()
- * because Solaris will allow the call to succeed if the scheduler
- * parameters do not differ from those being installed, but Linux wants
- * the call to fail.
- */
- if ((err = check_schedperms(s_pid)) != 0)
- return (err);
-
- if (s_pid == getpid())
- return (((err = pthread_setschedparam(s_tid, policy, &sp)) != 0)
- ? -err : 0);
-
- return ((sched_setparam(s_pid, &sp) == -1) ? -errno : 0);
-}
-
-long
-lx_sched_rr_get_interval(uintptr_t pid, uintptr_t ts)
-{
- pid_t s_pid;
-
- if ((pid_t)pid < 0)
- return (-EINVAL);
-
- if (lx_lpid_to_spid((pid_t)pid, &s_pid) < 0)
- return (-ESRCH);
-
- if (sched_rr_get_interval(s_pid, (struct timespec *)ts) == 0)
- return (0);
- else
- return (-errno);
-}
-
-long
-lx_sched_getscheduler(uintptr_t pid)
-{
- int policy, rv;
- pid_t s_pid;
- lwpid_t s_tid;
-
- if ((pid_t)pid < 0)
- return (-EINVAL);
-
- if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
- return (-ESRCH);
-
- if (s_pid == getpid()) {
- struct sched_param dummy;
-
- if ((rv = pthread_getschedparam(s_tid, &policy, &dummy)) != 0)
- return (-rv);
- } else
- if ((policy = sched_getscheduler(s_pid)) < 0)
- return (-errno);
-
- /*
- * Linux only supports certain policies; avoid confusing apps with
- * alien policies.
- */
- switch (policy) {
- case SCHED_FIFO:
- return (LX_SCHED_FIFO);
- case SCHED_OTHER:
- return (LX_SCHED_OTHER);
- case SCHED_RR:
- return (LX_SCHED_RR);
- default:
- break;
- }
-
- return (LX_SCHED_OTHER);
-}
-
-long
-lx_sched_setscheduler(uintptr_t pid, uintptr_t policy, uintptr_t param)
-{
- int rt_pol;
- int rv;
- pid_t s_pid;
- lwpid_t s_tid;
- struct lx_sched_param lp;
-
- struct sched_param sp;
-
- if (((pid_t)pid < 0) || (param == NULL))
- return (-EINVAL);
-
- if ((rt_pol = validate_policy((int)policy)) < 0)
- return (rt_pol);
-
- if ((rv = ltos_sparam(policy, (struct lx_sched_param *)param,
- &sp)) != 0)
- return (rv);
-
- if (uucopy((void *)param, &lp, sizeof (lp)) != 0)
- return (-errno);
-
- if (rt_pol == LX_SCHED_OTHER) {
- /*
- * In Linux, the only valid SCHED_OTHER scheduler priority is 0
- */
- if (lp.lx_sched_prio != 0)
- return (-EINVAL);
-
- /*
- * If we're already SCHED_OTHER, there's nothing else to do.
- */
- if (lx_sched_getscheduler(pid) == LX_SCHED_OTHER)
- return (0);
- }
-
- if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
- return (-ESRCH);
-
- /*
- * Check if we're allowed to change the scheduler for the process.
- *
- * If we're operating on a thread, we can't just call
- * pthread_setschedparam() because as all threads reside within a
- * single Solaris process, Solaris will allow the modification.
- *
- * If we're operating on a process, we can't just call
- * sched_setscheduler() because Solaris will allow the call to succeed
- * if the scheduler and scheduler parameters do not differ from those
- * being installed, but Linux wants the call to fail.
- */
- if ((rv = check_schedperms(s_pid)) != 0)
- return (rv);
-
- if (s_pid == getpid()) {
- struct sched_param param;
- int pol;
-
- if ((pol = sched_getscheduler(s_pid)) == -1)
- return (-errno);
-
- /*
- * sched_setscheduler() returns the previous scheduling policy
- * on success, so call pthread_getschedparam() to get the
- * current thread's scheduling policy and return that if the
- * call to pthread_setschedparam() succeeds.
- */
- if ((rv = pthread_getschedparam(s_tid, &pol, &param)) != 0)
- return (-rv);
-
- return (((rv = pthread_setschedparam(s_tid, rt_pol, &sp)) != 0)
- ? -rv : pol);
- }
-
- return (((rv = sched_setscheduler(s_pid, rt_pol, &sp)) == -1)
- ? -errno : rv);
-}
-
-long
-lx_sched_get_priority_min(uintptr_t policy)
-{
- /*
- * In Linux, the only valid SCHED_OTHER scheduler priority is 0.
- * Linux scheduling priorities are not alterable, so there is no
- * Solaris translation necessary.
- */
- switch (policy) {
- case LX_SCHED_FIFO:
- case LX_SCHED_RR:
- return (LX_SCHED_PRIORITY_MIN_RRFIFO);
- case LX_SCHED_OTHER:
- return (LX_SCHED_PRIORITY_MIN_OTHER);
- default:
- break;
- }
- return (-EINVAL);
-}
-
-long
-lx_sched_get_priority_max(uintptr_t policy)
-{
- /*
- * In Linux, the only valid SCHED_OTHER scheduler priority is 0
- * Linux scheduling priorities are not alterable, so there is no
- * Solaris translation necessary.
- */
- switch (policy) {
- case LX_SCHED_FIFO:
- case LX_SCHED_RR:
- return (LX_SCHED_PRIORITY_MAX_RRFIFO);
- case LX_SCHED_OTHER:
- return (LX_SCHED_PRIORITY_MAX_OTHER);
- default:
- break;
- }
- return (-EINVAL);
-}
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h
index 1f255a8c58..64e1ca6ab8 100644
--- a/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h
@@ -162,14 +162,6 @@ extern long lx_setpriority(uintptr_t, uintptr_t, uintptr_t);
extern long lx_ptrace(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
-extern long lx_sched_getparam(uintptr_t, uintptr_t);
-extern long lx_sched_setparam(uintptr_t, uintptr_t);
-extern long lx_sched_rr_get_interval(uintptr_t pid, uintptr_t);
-extern long lx_sched_getscheduler(uintptr_t);
-extern long lx_sched_setscheduler(uintptr_t, uintptr_t, uintptr_t);
-extern long lx_sched_get_priority_min(uintptr_t);
-extern long lx_sched_get_priority_max(uintptr_t);
-
extern long lx_xattr2(uintptr_t, uintptr_t);
extern long lx_xattr3(uintptr_t, uintptr_t, uintptr_t);
extern long lx_xattr4(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
diff --git a/usr/src/lib/brand/lx/testing/ltp_skiplist b/usr/src/lib/brand/lx/testing/ltp_skiplist
index e98654f4e9..bb8e4e08f8 100644
--- a/usr/src/lib/brand/lx/testing/ltp_skiplist
+++ b/usr/src/lib/brand/lx/testing/ltp_skiplist
@@ -71,12 +71,6 @@ quotactl01
quotactl02
remap_file_pages01
remap_file_pages02
-sched_getparam01
-sched_getparam02
-sched_setscheduler01
-sched_setscheduler03 # OS-5524
-sched_setparam02
-sched_setparam03
setfsuid04
setfsuid04_16
settimeofday01
@@ -212,14 +206,6 @@ removexattr01
removexattr02
request_key01
request_key02
-sched_rr_get_interval01
-sched_rr_get_interval02
-sched_rr_get_interval03
-sched_getscheduler01
-sched_getscheduler02
-sched_getattr01
-sched_getattr02
-sched_setattr01
semctl01
semop02
sendfile02 # OS-3296
diff --git a/usr/src/uts/common/brand/lx/os/lx_misc.c b/usr/src/uts/common/brand/lx/os/lx_misc.c
index cfc5f95e78..e2043a9ab6 100644
--- a/usr/src/uts/common/brand/lx/os/lx_misc.c
+++ b/usr/src/uts/common/brand/lx/os/lx_misc.c
@@ -481,6 +481,17 @@ lx_initlwp(klwp_t *lwp, void *lwpbd)
if (plwpd != NULL) {
lwpd->br_cgroupid = plwpd->br_cgroupid;
}
+ /*
+ * The new LWP inherits the parent LWP emulated scheduling info.
+ */
+ if (plwpd != NULL) {
+ lwpd->br_schd_class = plwpd->br_schd_class;
+ lwpd->br_schd_pri = plwpd->br_schd_pri;
+ lwpd->br_schd_flags = plwpd->br_schd_flags;
+ lwpd->br_schd_runtime = plwpd->br_schd_runtime;
+ lwpd->br_schd_deadline = plwpd->br_schd_deadline;
+ lwpd->br_schd_period = plwpd->br_schd_period;
+ }
lxzdata = ztolxzd(p->p_zone);
mutex_enter(&lxzdata->lxzd_lock);
cgrp = lxzdata->lxzd_cgroup;
diff --git a/usr/src/uts/common/brand/lx/os/lx_syscall.c b/usr/src/uts/common/brand/lx/os/lx_syscall.c
index bfd77c029f..7cb29f1004 100644
--- a/usr/src/uts/common/brand/lx/os/lx_syscall.c
+++ b/usr/src/uts/common/brand/lx/os/lx_syscall.c
@@ -673,14 +673,14 @@ lx_sysent_t lx_sysent32[] = {
{"munlock", NULL, 0, 2}, /* 151 */
{"mlockall", NULL, 0, 1}, /* 152 */
{"munlockall", NULL, 0, 0}, /* 153 */
- {"sched_setparam", NULL, 0, 2}, /* 154 */
- {"sched_getparam", NULL, 0, 2}, /* 155 */
- {"sched_setscheduler", NULL, 0, 3}, /* 156 */
- {"sched_getscheduler", NULL, 0, 1}, /* 157 */
+ {"sched_setparam", lx_sched_setparam, 0, 2}, /* 154 */
+ {"sched_getparam", lx_sched_getparam, 0, 2}, /* 155 */
+ {"sched_setscheduler", lx_sched_setscheduler, 0, 3}, /* 156 */
+ {"sched_getscheduler", lx_sched_getscheduler, 0, 1}, /* 157 */
{"sched_yield", lx_sched_yield, 0, 0}, /* 158 */
- {"sched_get_priority_max", NULL, 0, 1}, /* 159 */
- {"sched_get_priority_min", NULL, 0, 1}, /* 160 */
- {"sched_rr_get_interval", NULL, 0, 2}, /* 161 */
+ {"sched_get_priority_max", lx_sched_get_priority_max, 0, 1}, /* 159 */
+ {"sched_get_priority_min", lx_sched_get_priority_min, 0, 1}, /* 160 */
+ {"sched_rr_get_interval", lx_sched_rr_get_interval, 0, 2}, /* 161 */
{"nanosleep", lx_nanosleep, 0, 2}, /* 162 */
{"mremap", NULL, 0, 5}, /* 163 */
{"setresuid16", lx_setresuid16, 0, 3}, /* 164 */
@@ -874,8 +874,8 @@ lx_sysent_t lx_sysent32[] = {
{"process_vm_writev", NULL, NOSYS_NULL, 0}, /* 348 */
{"kcmp", NULL, NOSYS_NULL, 0}, /* 349 */
{"finit_module", NULL, NOSYS_NULL, 0}, /* 350 */
- {"sched_setattr", NULL, NOSYS_NULL, 0}, /* 351 */
- {"sched_getattr", NULL, NOSYS_NULL, 0}, /* 352 */
+ {"sched_setattr", lx_sched_setattr, 0, 3}, /* 351 */
+ {"sched_getattr", lx_sched_getattr, 0, 4}, /* 352 */
{"renameat2", NULL, NOSYS_NULL, 0}, /* 353 */
{"seccomp", NULL, NOSYS_NULL, 0}, /* 354 */
{"getrandom", lx_getrandom, 0, 3}, /* 355 */
@@ -1032,13 +1032,13 @@ lx_sysent_t lx_sysent64[] = {
{"sysfs", NULL, 0, 3}, /* 139 */
{"getpriority", NULL, 0, 2}, /* 140 */
{"setpriority", NULL, 0, 3}, /* 141 */
- {"sched_setparam", NULL, 0, 2}, /* 142 */
- {"sched_getparam", NULL, 0, 2}, /* 143 */
- {"sched_setscheduler", NULL, 0, 3}, /* 144 */
- {"sched_getscheduler", NULL, 0, 1}, /* 145 */
- {"sched_get_priority_max", NULL, 0, 1}, /* 146 */
- {"sched_get_priority_min", NULL, 0, 1}, /* 147 */
- {"sched_rr_get_interval", NULL, 0, 2}, /* 148 */
+ {"sched_setparam", lx_sched_setparam, 0, 2}, /* 142 */
+ {"sched_getparam", lx_sched_getparam, 0, 2}, /* 143 */
+ {"sched_setscheduler", lx_sched_setscheduler, 0, 3}, /* 144 */
+ {"sched_getscheduler", lx_sched_getscheduler, 0, 1}, /* 145 */
+ {"sched_get_priority_max", lx_sched_get_priority_max, 0, 1}, /* 146 */
+ {"sched_get_priority_min", lx_sched_get_priority_min, 0, 1}, /* 147 */
+ {"sched_rr_get_interval", lx_sched_rr_get_interval, 0, 2}, /* 148 */
{"mlock", NULL, 0, 2}, /* 149 */
{"munlock", NULL, 0, 2}, /* 150 */
{"mlockall", NULL, 0, 1}, /* 151 */
@@ -1204,8 +1204,8 @@ lx_sysent_t lx_sysent64[] = {
{"process_vm_writev", NULL, NOSYS_NULL, 0}, /* 311 */
{"kcmp", NULL, NOSYS_NULL, 0}, /* 312 */
{"finit_module", NULL, NOSYS_NULL, 0}, /* 313 */
- {"sched_setattr", NULL, NOSYS_NULL, 0}, /* 314 */
- {"sched_getattr", NULL, NOSYS_NULL, 0}, /* 315 */
+ {"sched_setattr", lx_sched_setattr, 0, 3}, /* 314 */
+ {"sched_getattr", lx_sched_getattr, 0, 4}, /* 315 */
{"renameat2", NULL, NOSYS_NULL, 0}, /* 316 */
{"seccomp", NULL, NOSYS_NULL, 0}, /* 317 */
{"getrandom", lx_getrandom, 0, 3}, /* 318 */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h
index fe372a2124..7a9357c367 100644
--- a/usr/src/uts/common/brand/lx/sys/lx_brand.h
+++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h
@@ -81,6 +81,8 @@ extern "C" {
/* Highest capability we know about */
#define LX_CAP_MAX_VALID 36
+/* sched attr flag values */
+#define LX_SCHED_FLAG_RESET_ON_FORK 0x1
/*
* brand(2) subcommands
*
@@ -554,6 +556,19 @@ struct lx_lwp_data {
* ID of the cgroup this thread belongs to.
*/
uint_t br_cgroupid;
+
+ /*
+ * When the zone is running under FSS (which is the common case) then
+ * we cannot change scheduling class, so we emulate that. By default
+ * Linux uses LX_SCHED_OTHER (which is 0) and that only supports a
+ * priority of 0, so no special initialization is needed.
+ */
+ int br_schd_class; /* emulated scheduling class */
+ int br_schd_pri; /* emulated scheduling priority */
+ uint64_t br_schd_flags; /* emulated [sg]et_attr flags */
+ uint64_t br_schd_runtime; /* emulated DEADLINE */
+ uint64_t br_schd_deadline; /* emulated DEADLINE */
+ uint64_t br_schd_period; /* emulated DEADLINE */
};
/*
diff --git a/usr/src/uts/common/brand/lx/sys/lx_sched.h b/usr/src/uts/common/brand/lx/sys/lx_sched.h
deleted file mode 100644
index b0ae748f3c..0000000000
--- a/usr/src/uts/common/brand/lx/sys/lx_sched.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_LINUX_SCHED_H
-#define _SYS_LINUX_SCHED_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/procset.h>
-#include <sys/priocntl.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Linux scheduler policies.
- */
-#define LX_SCHED_OTHER 0
-#define LX_SCHED_FIFO 1
-#define LX_SCHED_RR 2
-
-#define LX_PRI_MAX 99
-
-typedef int l_pid_t;
-
-struct lx_sched_param {
- int lx_sched_prio;
-};
-
-extern int sched_setprocset(procset_t *, l_pid_t);
-extern long do_priocntlsys(int, procset_t *, void *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_LINUX_SCHED_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h
index 34b2c6153c..f8fb1c145d 100644
--- a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h
+++ b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h
@@ -178,8 +178,12 @@ extern long lx_renameat();
extern long lx_sched_getaffinity();
extern long lx_sched_getparam();
extern long lx_sched_getscheduler();
+extern long lx_sched_getattr();
+extern long lx_sched_get_priority_max();
+extern long lx_sched_get_priority_min();
extern long lx_sched_rr_get_interval();
extern long lx_sched_setaffinity();
+extern long lx_sched_setattr();
extern long lx_sched_setparam();
extern long lx_sched_setscheduler();
extern long lx_sched_yield();
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sched.c b/usr/src/uts/common/brand/lx/syscall/lx_sched.c
index dd4a039905..0fc8046517 100644
--- a/usr/src/uts/common/brand/lx/syscall/lx_sched.c
+++ b/usr/src/uts/common/brand/lx/syscall/lx_sched.c
@@ -26,6 +26,19 @@
* Copyright 2016 Joyent, Inc.
*/
+/*
+ * Emulation for scheduling related syscalls.
+ *
+ * Under a typical zone configuration the zones will always be running under
+ * FSS so that no single zone can monopolize the system. Zones do not have the
+ * privilege to leave FSS (for the obvious reason that this would violate the
+ * global zone resource management policies). Thus, for the sched_* syscalls
+ * we typically will never be able to emulate those using our other native
+ * scheduling classes. Under this common case we simply track the scheduler
+ * settings on the lwp's lx brand structure and we also try to adjust the
+ * lwp priority within the valid range to approximate the intended effect.
+ */
+
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/errno.h>
@@ -35,16 +48,87 @@
#include <sys/tspriocntl.h>
#include <sys/processor.h>
#include <sys/brand.h>
-#include <sys/lx_sched.h>
#include <sys/lx_brand.h>
#include <sys/sysmacros.h>
#include <sys/policy.h>
+#include <sys/procset.h>
+#include <sys/priocntl.h>
+
+typedef int l_pid_t;
extern int yield();
extern long priocntl_common(int, procset_t *, int, caddr_t, caddr_t, uio_seg_t);
+static int lx_sched_setprocset(procset_t *, l_pid_t);
+static long lx_do_priocntlsys(int, procset_t *, void *);
+
#define BITS_PER_BYTE 8
+/*
+ * Linux scheduler policies.
+ */
+#define LX_SCHED_OTHER 0
+#define LX_SCHED_FIFO 1
+#define LX_SCHED_RR 2
+#define LX_SCHED_BATCH 3
+#define LX_SCHED_IDLE 5
+#define LX_SCHED_DEADLINE 6
+
+/*
+ * Linux scheduler priority ranges.
+ */
+#define LX_SCHED_PRIORITY_MIN_OTHER 0
+#define LX_SCHED_PRIORITY_MAX_OTHER 0
+#define LX_SCHED_PRIORITY_MIN_RRFIFO 1
+#define LX_SCHED_PRIORITY_MAX_RRFIFO 99
+
+#define MAXPRI 60 /* See FSS_MAXUPRI */
+
+/*
+ * When emulating scheduling priorities (e.g. under FSS) we'll do the best we
+ * can by adjusting the thread's priority within our range.
+ */
+static int lx_emul_pri_map[] = {
+ 0, /* LX_SCHED_OTHER */
+ MAXPRI, /* LX_SCHED_FIFO */
+ MAXPRI - 1, /* LX_SCHED_RR */
+ -MAXPRI + 1, /* LX_SCHED_BATCH */
+ 0, /* UNUSED */
+ -MAXPRI, /* LX_SCHED_IDLE */
+ MAXPRI /* LX_SCHED_DEADLINE */
+};
+
+/*
+ * Determine if we should emulate the sched_* syscalls. A zone is almost always
+ * going to be running under FSS in any kind of production configuration, and
+ * FSS is currently the only class which zone processes won't have the privilege
+ * to leave. Instead of checking for FSS explicitly, we generalize our check
+ * using CL_CANEXIT.
+ */
+#define EMUL_SCHED() (CL_CANEXIT(curthread, CRED()) != 0)
+
+struct lx_sched_param {
+ int lx_sched_prio;
+};
+
+typedef struct lx_sched_attr {
+ uint32_t lx_size;
+
+ uint32_t lx_sched_policy;
+ uint64_t lx_sched_flags;
+
+ /* For LX_SCHED_OTHER or LX_SCHED_BATCH */
+ int lx_sched_nice;
+
+ /* For LX_SCHED_FIFO or LX_SCHED_RR */
+ uint32_t lx_sched_priority;
+
+ /* For LX_SCHED_DEADLINE */
+ uint64_t lx_sched_runtime;
+ uint64_t lx_sched_deadline;
+ uint64_t lx_sched_period;
+} lx_sched_attr_t;
+
long
lx_sched_yield(void)
{
@@ -81,7 +165,8 @@ stol_cpuset(cpuset_t *smask, lx_affmask_t *lmask)
/*
* Find and lock a process for lx_sched_* operations.
- * Sets 'pp' and 'tp' on success, with P_PR_LOCK set (but p_lock not held).
+ * Sets 'pp' and 'tp' on success, with P_PR_LOCK set and p_lock held.
+ * The target process must be branded.
*/
static int
lx_sched_pidlock(l_pid_t pid, proc_t **pp, kthread_t **tp, boolean_t is_write)
@@ -95,9 +180,9 @@ lx_sched_pidlock(l_pid_t pid, proc_t **pp, kthread_t **tp, boolean_t is_write)
}
if (pid == 0) {
p = curproc;
+ ASSERT(PROC_IS_BRANDED(p));
mutex_enter(&p->p_lock);
sprlock_proc(p);
- mutex_exit(&p->p_lock);
*tp = curthread;
*pp = p;
@@ -107,13 +192,18 @@ lx_sched_pidlock(l_pid_t pid, proc_t **pp, kthread_t **tp, boolean_t is_write)
if (lx_lpid_lock((pid_t)pid, curzone, PRLOCK, &p, &t) != 0) {
return (ESRCH);
}
- mutex_exit(&p->p_lock);
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ if (!(PROC_IS_BRANDED(p))) {
+ sprunlock(p);
+ return (EPERM);
+ }
if (is_write) {
cred_t *cr = CRED();
/*
- * To perform a sched_setaffinity on a thread outside of the
+ * To perform a sched_* operation on a thread outside of the
* current process, either the euid/egid of the target must
* match, or the calling process must hold CAP_SYS_NICE.
* (PRIV_PROC_PRIOUP maps to CAP_SYS_NICE)
@@ -121,14 +211,15 @@ lx_sched_pidlock(l_pid_t pid, proc_t **pp, kthread_t **tp, boolean_t is_write)
err = 0;
if (secpolicy_raisepriority(cr) != 0) {
err = 0;
+ mutex_exit(&p->p_lock);
mutex_enter(&p->p_crlock);
if (crgetuid(cr) != crgetuid(p->p_cred) ||
crgetgid(cr) != crgetgid(p->p_cred)) {
err = EPERM;
}
mutex_exit(&p->p_crlock);
+ mutex_enter(&p->p_lock);
if (err != 0) {
- mutex_enter(&p->p_lock);
sprunlock(p);
return (err);
}
@@ -136,6 +227,7 @@ lx_sched_pidlock(l_pid_t pid, proc_t **pp, kthread_t **tp, boolean_t is_write)
}
*pp = p;
*tp = t;
+ ASSERT(MUTEX_HELD(&p->p_lock));
return (0);
}
@@ -163,6 +255,7 @@ lx_sched_getaffinity(l_pid_t pid, unsigned int len, void *maskp)
return (set_errno(err));
}
+ mutex_exit(&p->p_lock);
mutex_enter(&cpu_lock);
mutex_enter(&p->p_lock);
/*
@@ -228,6 +321,7 @@ lx_sched_setaffinity(l_pid_t pid, unsigned int len, void *maskp)
/*
* Constrain the mask to currently active CPUs.
*/
+ mutex_exit(&p->p_lock);
mutex_enter(&cpu_lock);
mutex_enter(&p->p_lock);
lwpd = ttolxlwp(tp);
@@ -310,35 +404,81 @@ lx_sched_setscheduler(l_pid_t pid, int policy, struct lx_sched_param *param)
int prio, maxupri;
int rv;
- if (pid < 0)
- return (set_errno(ESRCH));
-
- if ((rv = sched_setprocset(&procset, pid)))
- return (rv);
+ if (pid < 0 || param == NULL)
+ return (set_errno(EINVAL));
if (copyin(param, &sched_param, sizeof (sched_param)))
return (set_errno(EFAULT));
prio = sched_param.lx_sched_prio;
- if (policy < 0) {
- /*
- * get the class id
- */
- pcparm.pc_cid = PC_CLNULL;
- (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
- if (lwp->lwp_errno)
- return (lwp->lwp_errno);
+ if (EMUL_SCHED()) {
+ proc_t *p;
+ kthread_t *tp = NULL;
+ int incr;
+ lx_lwp_data_t *lwpd;
+
+ switch (policy) {
+ case LX_SCHED_OTHER:
+ case LX_SCHED_BATCH:
+ case LX_SCHED_IDLE:
+ case LX_SCHED_DEADLINE:
+ if (prio != LX_SCHED_PRIORITY_MIN_OTHER)
+ return (set_errno(EINVAL));
+ break;
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ if (crgetuid(CRED()) != 0)
+ return (set_errno(EPERM));
+ if (prio < LX_SCHED_PRIORITY_MIN_RRFIFO ||
+ prio > LX_SCHED_PRIORITY_MAX_RRFIFO)
+ return (set_errno(EINVAL));
+ break;
+ default:
+ return (set_errno(EINVAL));
+ }
- /*
- * get the current policy
- */
- bzero(&pcinfo, sizeof (pcinfo));
- pcinfo.pc_cid = pcparm.pc_cid;
- (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
- if (lwp->lwp_errno)
- return (lwp->lwp_errno);
+ /* Find and operate on the target lwp. */
+ if ((rv = lx_sched_pidlock(pid, &p, &tp, B_TRUE)) != 0)
+ return (set_errno(rv));
+
+ lwpd = lwptolxlwp(ttolwp(tp));
+ if (lwpd->br_schd_class == LX_SCHED_IDLE &&
+ policy != LX_SCHED_IDLE && crgetuid(CRED()) != 0) {
+
+ sprunlock(p);
+ return (set_errno(EPERM));
+ }
+ lwpd->br_schd_class = policy;
+ lwpd->br_schd_pri = prio;
+
+ ASSERT(policy <= LX_SCHED_DEADLINE);
+ incr = lx_emul_pri_map[policy];
+
+ CL_DOPRIO(tp, CRED(), incr, &rv);
+
+ sprunlock(p);
+ return (0);
+ }
+
+ if ((rv = lx_sched_setprocset(&procset, pid)))
+ return (rv);
+
+ /* get the class id */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) lx_do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /* get the current policy */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) lx_do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (policy < 0) {
if (strcmp(pcinfo.pc_clname, "TS") == 0) {
policy = LX_SCHED_OTHER;
} else if (strcmp(pcinfo.pc_clname, "RT") == 0) {
@@ -356,7 +496,7 @@ lx_sched_setscheduler(l_pid_t pid, int policy, struct lx_sched_param *param)
case LX_SCHED_FIFO:
case LX_SCHED_RR:
(void) strcpy(pcinfo.pc_clname, "RT");
- (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ (void) lx_do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
if (lwp->lwp_errno)
return (lwp->lwp_errno);
@@ -371,7 +511,7 @@ lx_sched_setscheduler(l_pid_t pid, int policy, struct lx_sched_param *param)
case LX_SCHED_OTHER:
(void) strcpy(pcinfo.pc_clname, "TS");
- (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ (void) lx_do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
if (lwp->lwp_errno)
return (lwp->lwp_errno);
@@ -392,7 +532,7 @@ lx_sched_setscheduler(l_pid_t pid, int policy, struct lx_sched_param *param)
/*
* finally set scheduling policy and parameters
*/
- (void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm);
+ (void) lx_do_priocntlsys(PC_SETPARMS, &procset, &pcparm);
return (0);
}
@@ -408,16 +548,30 @@ lx_sched_getscheduler(l_pid_t pid)
int rv;
if (pid < 0)
- return (set_errno(ESRCH));
+ return (set_errno(EINVAL));
+
+ if (EMUL_SCHED()) {
+ proc_t *p;
+ kthread_t *tp = NULL;
- if ((rv = sched_setprocset(&procset, pid)))
+ /* Find and operate on the target lwp. */
+ if ((rv = lx_sched_pidlock(pid, &p, &tp, B_FALSE)) != 0)
+ return (set_errno(rv));
+
+ policy = lwptolxlwp(ttolwp(tp))->br_schd_class;
+ sprunlock(p);
+
+ return (policy);
+ }
+
+ if ((rv = lx_sched_setprocset(&procset, pid)))
return (rv);
/*
* get the class id
*/
pcparm.pc_cid = PC_CLNULL;
- (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ (void) lx_do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
if (lwp->lwp_errno)
return (lwp->lwp_errno);
@@ -426,17 +580,18 @@ lx_sched_getscheduler(l_pid_t pid)
*/
bzero(&pcinfo, sizeof (pcinfo));
pcinfo.pc_cid = pcparm.pc_cid;
- (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ (void) lx_do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
if (lwp->lwp_errno)
return (lwp->lwp_errno);
- if (strcmp(pcinfo.pc_clname, "TS") == 0)
+ if (strcmp(pcinfo.pc_clname, "TS") == 0) {
policy = LX_SCHED_OTHER;
- else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+ } else if (strcmp(pcinfo.pc_clname, "RT") == 0) {
policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR;
- else
+ } else {
policy = set_errno(EINVAL);
+ }
return (policy);
}
@@ -455,22 +610,71 @@ lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param)
int prio, maxupri;
int rv;
- if (pid < 0)
- return (set_errno(ESRCH));
-
- if ((rv = sched_setprocset(&procset, pid)))
- return (rv);
+ if (pid < 0 || param == NULL)
+ return (set_errno(EINVAL));
if (copyin(param, &sched_param, sizeof (sched_param)))
return (set_errno(EFAULT));
prio = sched_param.lx_sched_prio;
+ if (EMUL_SCHED()) {
+ proc_t *p;
+ kthread_t *tp = NULL;
+ int incr;
+
+ /* Find and operate on the target lwp. */
+ if ((rv = lx_sched_pidlock(pid, &p, &tp, B_TRUE)) != 0)
+ return (set_errno(rv));
+
+ policy = lwptolxlwp(ttolwp(tp))->br_schd_class;
+ switch (policy) {
+ case LX_SCHED_OTHER:
+ case LX_SCHED_BATCH:
+ case LX_SCHED_IDLE:
+ case LX_SCHED_DEADLINE:
+ if (prio != LX_SCHED_PRIORITY_MIN_OTHER) {
+ sprunlock(p);
+ return (set_errno(EINVAL));
+ }
+ break;
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ if (crgetuid(CRED()) != 0) {
+ sprunlock(p);
+ return (set_errno(EPERM));
+ }
+ if (prio < LX_SCHED_PRIORITY_MIN_RRFIFO ||
+ prio > LX_SCHED_PRIORITY_MAX_RRFIFO) {
+ sprunlock(p);
+ return (set_errno(EINVAL));
+ }
+ break;
+ default:
+ /* this shouldn't happen */
+ ASSERT(0);
+ sprunlock(p);
+ return (set_errno(EINVAL));
+ }
+
+ lwptolxlwp(ttolwp(tp))->br_schd_pri = prio;
+
+ ASSERT(policy <= LX_SCHED_DEADLINE);
+ incr = lx_emul_pri_map[policy];
+
+ CL_DOPRIO(tp, CRED(), incr, &rv);
+ sprunlock(p);
+ return (0);
+ }
+
+ if ((rv = lx_sched_setprocset(&procset, pid)))
+ return (rv);
+
/*
* get the class id
*/
pcparm.pc_cid = PC_CLNULL;
- (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ (void) lx_do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
if (lwp->lwp_errno)
return (lwp->lwp_errno);
@@ -479,7 +683,7 @@ lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param)
*/
bzero(&pcinfo, sizeof (pcinfo));
pcinfo.pc_cid = pcparm.pc_cid;
- (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ (void) lx_do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
if (lwp->lwp_errno)
return (lwp->lwp_errno);
@@ -498,7 +702,7 @@ lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param)
case LX_SCHED_FIFO:
case LX_SCHED_RR:
(void) strcpy(pcinfo.pc_clname, "RT");
- (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ (void) lx_do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
if (lwp->lwp_errno)
return (lwp->lwp_errno);
@@ -513,7 +717,7 @@ lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param)
case LX_SCHED_OTHER:
(void) strcpy(pcinfo.pc_clname, "TS");
- (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ (void) lx_do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
if (lwp->lwp_errno)
return (lwp->lwp_errno);
@@ -534,7 +738,7 @@ lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param)
/*
* finally set scheduling policy and parameters
*/
- (void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm);
+ (void) lx_do_priocntlsys(PC_SETPARMS, &procset, &pcparm);
return (0);
}
@@ -551,17 +755,33 @@ lx_sched_getparam(l_pid_t pid, struct lx_sched_param *param)
int prio, scale;
int rv;
- if (pid < 0)
- return (set_errno(ESRCH));
+ if (pid < 0 || param == NULL)
+ return (set_errno(EINVAL));
+
+ if (EMUL_SCHED()) {
+ proc_t *p;
+ kthread_t *tp = NULL;
+
+ /* Find and operate on the target lwp. */
+ if ((rv = lx_sched_pidlock(pid, &p, &tp, B_FALSE)) != 0)
+ return (set_errno(rv));
+
+ local_param.lx_sched_prio = lwptolxlwp(ttolwp(tp))->br_schd_pri;
+ sprunlock(p);
+ if (copyout(&local_param, param, sizeof (local_param)))
+ return (set_errno(EFAULT));
+
+ return (0);
+ }
- if ((rv = sched_setprocset(&procset, pid)))
+ if ((rv = lx_sched_setprocset(&procset, pid)))
return (rv);
/*
* get the class id
*/
pcparm.pc_cid = PC_CLNULL;
- (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ (void) lx_do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
if (lwp->lwp_errno)
return (lwp->lwp_errno);
@@ -570,7 +790,7 @@ lx_sched_getparam(l_pid_t pid, struct lx_sched_param *param)
*/
bzero(&pcinfo, sizeof (pcinfo));
pcinfo.pc_cid = pcparm.pc_cid;
- (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ (void) lx_do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
if (lwp->lwp_errno)
return (lwp->lwp_errno);
@@ -612,16 +832,51 @@ lx_sched_rr_get_interval(l_pid_t pid, struct timespec *ival)
int rv;
if (pid < 0)
- return (set_errno(ESRCH));
+ return (set_errno(EINVAL));
+
+ if (EMUL_SCHED()) {
+ int policy;
+ proc_t *p;
+ kthread_t *tp = NULL;
+
+ /* Find and operate on the target lwp. */
+ if ((rv = lx_sched_pidlock(pid, &p, &tp, B_FALSE)) != 0)
+ return (set_errno(rv));
+
+ policy = lwptolxlwp(ttolwp(tp))->br_schd_class;
+ sprunlock(p);
+
+ interval.tv_sec = 0;
+ if (policy == LX_SCHED_RR) {
+ /* Use a made-up value similar to Linux */
+ interval.tv_nsec = 100000000;
+ } else {
+ interval.tv_nsec = 0;
+ }
+
+ if (copyout(&interval, ival, sizeof (interval)))
+ return (set_errno(EFAULT));
+
+ return (0);
+ }
- if ((rv = sched_setprocset(&procset, pid)))
+ if ((rv = lx_sched_setprocset(&procset, pid)))
return (rv);
/*
* get the class id
*/
pcparm.pc_cid = PC_CLNULL;
- (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ (void) lx_do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the class info and identify the equivalent linux policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) lx_do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
if (lwp->lwp_errno)
return (lwp->lwp_errno);
@@ -631,12 +886,15 @@ lx_sched_rr_get_interval(l_pid_t pid, struct timespec *ival)
setprocset(&procset, POP_AND, P_PID, 0, P_ALL, 0);
bzero(&pcinfo, sizeof (pcinfo));
(void) strcpy(pcinfo.pc_clname, "RT");
- (void) do_priocntlsys(PC_GETCID, &procset, &pcinfo);
+ (void) lx_do_priocntlsys(PC_GETCID, &procset, &pcinfo);
if (lwp->lwp_errno)
return (lwp->lwp_errno);
- if (pcparm.pc_cid == pcinfo.pc_cid &&
- ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs != RT_TQINF) {
+ /*
+ * Contrary to what the man page says, you don't have to be in RR to
+ * get this interval.
+ */
+ if (((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs != RT_TQINF) {
interval.tv_sec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqsecs;
interval.tv_nsec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs;
@@ -649,8 +907,185 @@ lx_sched_rr_get_interval(l_pid_t pid, struct timespec *ival)
return (set_errno(EINVAL));
}
-int
-sched_setprocset(procset_t *procset, l_pid_t pid)
+long
+lx_sched_get_priority_min(uintptr_t policy)
+{
+ /*
+ * Linux scheduling priorities are not alterable, so there is no
+ * illumos translation necessary.
+ */
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ return (LX_SCHED_PRIORITY_MIN_RRFIFO);
+ case LX_SCHED_OTHER:
+ case LX_SCHED_BATCH:
+ case LX_SCHED_IDLE:
+ case LX_SCHED_DEADLINE:
+ return (LX_SCHED_PRIORITY_MIN_OTHER);
+ default:
+ break;
+ }
+ return (set_errno(EINVAL));
+}
+
+long
+lx_sched_get_priority_max(uintptr_t policy)
+{
+ /*
+ * Linux scheduling priorities are not alterable, so there is no
+ * illumos translation necessary.
+ */
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ return (LX_SCHED_PRIORITY_MAX_RRFIFO);
+ case LX_SCHED_OTHER:
+ case LX_SCHED_BATCH:
+ case LX_SCHED_IDLE:
+ case LX_SCHED_DEADLINE:
+ return (LX_SCHED_PRIORITY_MAX_OTHER);
+ default:
+ break;
+ }
+ return (set_errno(EINVAL));
+}
+
+long
+lx_sched_setattr(l_pid_t pid, lx_sched_attr_t *attr, uint32_t flags)
+{
+ int rv;
+ uint32_t lx_size;
+ lx_sched_attr_t local_attr;
+ uint64_t flg;
+
+ if (pid < 0 || attr == NULL || flags != 0)
+ return (set_errno(EINVAL));
+
+ if (copyin(attr, &lx_size, sizeof (lx_size)))
+ return (set_errno(EFAULT));
+
+ if (lx_size > sizeof (local_attr))
+ return (set_errno(E2BIG));
+
+ bzero(&local_attr, sizeof (local_attr));
+ if (copyin(attr, &local_attr, lx_size))
+ return (set_errno(EFAULT));
+
+ flg = local_attr.lx_sched_flags;
+ if ((flg & ~LX_SCHED_FLAG_RESET_ON_FORK) != 0)
+ return (set_errno(EINVAL));
+
+ if (EMUL_SCHED()) {
+ int policy;
+ proc_t *p;
+ kthread_t *tp = NULL;
+ int incr;
+ lx_lwp_data_t *lwpd;
+
+ /* Find and operate on the target lwp. */
+ if ((rv = lx_sched_pidlock(pid, &p, &tp, B_TRUE)) != 0)
+ return (set_errno(rv));
+
+ policy = local_attr.lx_sched_policy;
+
+ switch (policy) {
+ case LX_SCHED_OTHER:
+ case LX_SCHED_BATCH:
+ case LX_SCHED_IDLE:
+ break;
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ if (crgetuid(CRED()) != 0) {
+ sprunlock(p);
+ return (set_errno(EPERM));
+ }
+ if (local_attr.lx_sched_priority <
+ LX_SCHED_PRIORITY_MIN_RRFIFO ||
+ local_attr.lx_sched_priority >
+ LX_SCHED_PRIORITY_MAX_RRFIFO) {
+ sprunlock(p);
+ return (set_errno(EINVAL));
+ }
+ break;
+
+ case LX_SCHED_DEADLINE:
+ if (crgetuid(CRED()) != 0) {
+ sprunlock(p);
+ return (set_errno(EPERM));
+ }
+ break;
+ default:
+ sprunlock(p);
+ return (set_errno(EINVAL));
+ }
+
+ lwpd = lwptolxlwp(ttolwp(tp));
+ lwpd->br_schd_class = policy;
+ lwpd->br_schd_flags = flg;
+ lwpd->br_schd_pri = local_attr.lx_sched_priority;
+
+ lwpd->br_schd_runtime = local_attr.lx_sched_runtime;
+ lwpd->br_schd_deadline = local_attr.lx_sched_deadline;
+ lwpd->br_schd_period = local_attr.lx_sched_period;
+
+ ASSERT(policy <= LX_SCHED_DEADLINE);
+ incr = lx_emul_pri_map[policy];
+
+ CL_DOPRIO(tp, CRED(), incr, &rv);
+ sprunlock(p);
+ return (0);
+ }
+
+ /* Currently not supported under other classes */
+ return (set_errno(ENOSYS));
+}
+
+long
+lx_sched_getattr(l_pid_t pid, lx_sched_attr_t *attr, uint32_t size,
+ uint32_t flags)
+{
+ lx_sched_attr_t local_attr;
+ int rv;
+
+ if (pid < 0 || attr == NULL || flags != 0 || size < sizeof (local_attr))
+ return (set_errno(EINVAL));
+
+ bzero(&local_attr, sizeof (local_attr));
+ if (EMUL_SCHED()) {
+ proc_t *p;
+ kthread_t *tp = NULL;
+ lx_lwp_data_t *lwpd;
+
+ /* Find and operate on the target lwp. */
+ if ((rv = lx_sched_pidlock(pid, &p, &tp, B_FALSE)) != 0)
+ return (set_errno(rv));
+
+ lwpd = lwptolxlwp(ttolwp(tp));
+ local_attr.lx_sched_policy = lwpd->br_schd_class;
+ local_attr.lx_sched_priority = lwpd->br_schd_pri;
+ local_attr.lx_sched_flags = lwpd->br_schd_flags;
+
+ local_attr.lx_sched_runtime = lwpd->br_schd_runtime;
+ local_attr.lx_sched_deadline = lwpd->br_schd_deadline;
+ local_attr.lx_sched_period = lwpd->br_schd_period;
+
+ sprunlock(p);
+
+ local_attr.lx_size = sizeof (lx_sched_attr_t);
+
+ if (copyout(&local_attr, attr, sizeof (local_attr)))
+ return (set_errno(EFAULT));
+
+ return (0);
+ }
+
+ /* Currently not supported under other classes */
+ return (set_errno(ENOSYS));
+}
+
+static int
+lx_sched_setprocset(procset_t *procset, l_pid_t pid)
{
id_t lid, rid;
idtype_t lidtype, ridtype;
@@ -658,27 +1093,22 @@ sched_setprocset(procset_t *procset, l_pid_t pid)
/*
* define the target lwp
*/
- if (pid == 0) {
- ridtype = P_ALL;
- lidtype = P_PID;
- rid = 0;
- lid = P_MYID;
- } else {
- if (lx_lpid_to_spair(pid, &pid, &lid) < 0)
- return (set_errno(ESRCH));
- if (pid != curproc->p_pid)
- return (set_errno(ESRCH));
- rid = 0;
- ridtype = P_ALL;
- lidtype = P_LWPID;
- }
+ if (pid == 0)
+ pid = curproc->p_pid;
+
+ if (lx_lpid_to_spair(pid, &pid, &lid) < 0)
+ return (set_errno(ESRCH));
+ rid = 0;
+ ridtype = P_ALL;
+ lidtype = P_LWPID;
+
setprocset(procset, POP_AND, lidtype, lid, ridtype, rid);
return (0);
}
-long
-do_priocntlsys(int cmd, procset_t *procset, void *arg)
+static long
+lx_do_priocntlsys(int cmd, procset_t *procset, void *arg)
{
return (priocntl_common(PC_VERSION, procset, cmd, (caddr_t)arg, 0,
UIO_SYSSPACE));