diff options
| author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2016-10-25 19:23:39 +0000 |
|---|---|---|
| committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2016-10-25 19:24:15 +0000 |
| commit | 3125ad3bb6fb793491e3fc152ef05eb4cd221797 (patch) | |
| tree | dc78de15ad1f4b94e7e60f01ddbe0c2fbc7eebbd | |
| parent | 084bab208e0b7c16aa583ebb268d2886e066a4b8 (diff) | |
| download | illumos-joyent-3125ad3bb6fb793491e3fc152ef05eb4cd221797.tar.gz | |
OS-5717 lx sched* syscalls should work while running under FSS
OS-5718 lx sched_setscheduler should not return previous policy
OS-5524 sched_setscheduler missing support for SCHED_BATCH and SCHED_IDLE
OS-5733 missing sched_setattr and sched_getattr
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Approved by: Patrick Mooney <patrick.mooney@joyent.com>
| -rw-r--r-- | usr/src/lib/brand/lx/lx_brand/Makefile.com | 1 | ||||
| -rw-r--r-- | usr/src/lib/brand/lx/lx_brand/common/lx_brand.c | 28 | ||||
| -rw-r--r-- | usr/src/lib/brand/lx/lx_brand/common/sched.c | 476 | ||||
| -rw-r--r-- | usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h | 8 | ||||
| -rw-r--r-- | usr/src/lib/brand/lx/testing/ltp_skiplist | 14 | ||||
| -rw-r--r-- | usr/src/uts/common/brand/lx/os/lx_misc.c | 11 | ||||
| -rw-r--r-- | usr/src/uts/common/brand/lx/os/lx_syscall.c | 36 | ||||
| -rw-r--r-- | usr/src/uts/common/brand/lx/sys/lx_brand.h | 15 | ||||
| -rw-r--r-- | usr/src/uts/common/brand/lx/sys/lx_sched.h | 60 | ||||
| -rw-r--r-- | usr/src/uts/common/brand/lx/sys/lx_syscalls.h | 4 | ||||
| -rw-r--r-- | usr/src/uts/common/brand/lx/syscall/lx_sched.c | 582 |
11 files changed, 568 insertions, 667 deletions
diff --git a/usr/src/lib/brand/lx/lx_brand/Makefile.com b/usr/src/lib/brand/lx/lx_brand/Makefile.com index e653fecd58..53f5246834 100644 --- a/usr/src/lib/brand/lx/lx_brand/Makefile.com +++ b/usr/src/lib/brand/lx/lx_brand/Makefile.com @@ -45,7 +45,6 @@ COBJS = aio.o \ mount_nfs.o \ priority.o \ ptrace.o \ - sched.o \ sendfile.o \ signal.o \ stack.o \ diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c index d98b73941e..5724b6cbba 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c +++ b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c @@ -1153,13 +1153,13 @@ static lx_syscall_handler_t lx_handlers[] = { lx_sysfs, /* 139: sysfs */ lx_getpriority, /* 140: getpriority */ lx_setpriority, /* 141: setpriority */ - lx_sched_setparam, /* 142: sched_setparam */ - lx_sched_getparam, /* 143: sched_getparam */ - lx_sched_setscheduler, /* 144: sched_setscheduler */ - lx_sched_getscheduler, /* 145: sched_getscheduler */ - lx_sched_get_priority_max, /* 146: sched_get_priority_max */ - lx_sched_get_priority_min, /* 147: sched_get_priority_min */ - lx_sched_rr_get_interval, /* 148: sched_rr_get_interval */ + NULL, /* 142: sched_setparam */ + NULL, /* 143: sched_getparam */ + NULL, /* 144: sched_setscheduler */ + NULL, /* 145: sched_getscheduler */ + NULL, /* 146: sched_get_priority_max */ + NULL, /* 147: sched_get_priority_min */ + NULL, /* 148: sched_rr_get_interval */ lx_mlock, /* 149: mlock */ lx_munlock, /* 150: munlock */ lx_mlockall, /* 151: mlockall */ @@ -1496,14 +1496,14 @@ static lx_syscall_handler_t lx_handlers[] = { lx_munlock, /* 151: munlock */ lx_mlockall, /* 152: mlockall */ lx_munlockall, /* 153: munlockall */ - lx_sched_setparam, /* 154: sched_setparam */ - lx_sched_getparam, /* 155: sched_getparam */ - lx_sched_setscheduler, /* 156: sched_setscheduler */ - lx_sched_getscheduler, /* 157: sched_getscheduler */ + NULL, /* 154: sched_setparam */ + NULL, /* 155: sched_getparam */ + NULL, /* 156: sched_setscheduler */ + NULL, /* 157: sched_getscheduler */ NULL, /* 158: sched_yield */ - lx_sched_get_priority_max, /* 159: sched_get_priority_max */ - lx_sched_get_priority_min, /* 160: sched_get_priority_min */ - lx_sched_rr_get_interval, /* 161: sched_rr_get_interval */ + NULL, /* 159: sched_get_priority_max */ + NULL, /* 160: sched_get_priority_min */ + NULL, /* 161: sched_rr_get_interval */ NULL, /* 162: nanosleep */ lx_remap, /* 163: mremap */ NULL, /* 164: setresuid16 */ diff --git a/usr/src/lib/brand/lx/lx_brand/common/sched.c b/usr/src/lib/brand/lx/lx_brand/common/sched.c deleted file mode 100644 index 80b7660985..0000000000 --- a/usr/src/lib/brand/lx/lx_brand/common/sched.c +++ /dev/null @@ -1,476 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * Copyright 2016 Joyent, Inc. - */ - -#include <sys/types.h> -#include <sys/cred_impl.h> -#include <sys/ucred.h> -#include <ucred.h> -#include <stdlib.h> -#include <signal.h> -#include <errno.h> -#include <sched.h> -#include <strings.h> -#include <pthread.h> -#include <time.h> -#include <thread.h> -#include <alloca.h> -#include <unistd.h> -#include <sys/syscall.h> -#include <sys/lx_syscall.h> -#include <sys/lx_debug.h> -#include <sys/lx_brand.h> -#include <sys/lx_misc.h> -#include <sys/lx_sched.h> - -/* Linux only has three valid policies, SCHED_FIFO, SCHED_RR and SCHED_OTHER */ -static int -validate_policy(int policy) -{ - switch (policy) { - case LX_SCHED_FIFO: - return (SCHED_FIFO); - - case LX_SCHED_RR: - return (SCHED_RR); - - case LX_SCHED_OTHER: - return (SCHED_OTHER); - - default: - lx_debug("validate_policy: illegal policy: %d", policy); - return (-EINVAL); - } -} - -/* - * Check to see if we have the permissions to set scheduler parameters and - * policy, based on Linux' demand that such commands fail with errno set to - * EPERM if the current euid is not the euid or ruid of the process in - * question. - */ -static int -check_schedperms(pid_t pid) -{ - size_t sz; - ucred_t *cr; - uid_t euid; - - euid = geteuid(); - - if (pid == getpid()) { - /* - * If we're the process to be checked, simply check the euid - * against our ruid. - */ - if (euid != getuid()) - return (-EPERM); - - return (0); - } - - /* - * We allocate a ucred_t ourselves rather than call ucred_get(3C) - * because ucred_get() calls malloc(3C), which the brand library cannot - * use. Because we allocate the space with SAFE_ALLOCA(), there's - * no need to free it when we're done. - */ - sz = ucred_size(); - cr = (ucred_t *)SAFE_ALLOCA(sz); - - if (cr == NULL) - return (-ENOMEM); - - /* - * If we can't access the process' credentials, fail with errno EPERM - * as the call would not have succeeded anyway. - */ - if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, pid, cr) != 0) - return ((errno == EACCES) ? -EPERM : -errno); - - if ((euid != ucred_geteuid(cr)) && (euid != ucred_getruid(cr))) - return (-EPERM); - - return (0); -} - -static int -ltos_sparam(int policy, struct lx_sched_param *lsp, struct sched_param *sp) -{ - struct lx_sched_param ls; - int smin = sched_get_priority_min(policy); - int smax = sched_get_priority_max(policy); - - if (uucopy(lsp, &ls, sizeof (struct lx_sched_param)) != 0) - return (-errno); - - bzero(sp, sizeof (struct sched_param)); - - /* - * Linux has a fixed priority range, 0 - 99, which we need to convert to - * Solaris's dynamic range. Linux considers lower numbers to be - * higher priority, so we'll invert the priority within Solaris's range. - * - * The formula to convert between ranges is: - * - * L * (smax - smin) - * S = ----------------- + smin - * (lmax - lmin) - * - * where S is the Solaris equivalent of the linux priority L. - * - * To invert the priority, we use: - * S' = smax - S + smin - * - * Together, these two formulas become: - * - * L * (smax - smin) - * S = smax - ----------------- + 2smin - * 99 - */ - sp->sched_priority = smax - - ((ls.lx_sched_prio * (smax - smin)) / LX_PRI_MAX) + 2*smin; - - lx_debug("ltos_sparam: linux prio %d = Solaris prio %d " - "(Solaris range %d,%d)\n", ls.lx_sched_prio, sp->sched_priority, - smin, smax); - - return (0); -} - -static int -stol_sparam(int policy, struct sched_param *sp, struct lx_sched_param *lsp) -{ - struct lx_sched_param ls; - int smin = sched_get_priority_min(policy); - int smax = sched_get_priority_max(policy); - - if (policy == SCHED_OTHER) { - /* - * In Linux, the only valid SCHED_OTHER scheduler priority is 0 - */ - ls.lx_sched_prio = 0; - } else { - /* - * Convert Solaris's dynamic, inverted priority range to the - * fixed Linux range of 1 - 99. - * - * The formula is (see above): - * - * (smax - s + 2smin) * 99 - * l = ----------------------- - * smax - smin - */ - ls.lx_sched_prio = ((smax - sp->sched_priority + 2*smin) * - LX_PRI_MAX) / (smax - smin); - } - - lx_debug("stol_sparam: policy %d: Solaris prio %d = linux prio %d " - "(Solaris range %d,%d)\n", policy, - sp->sched_priority, ls.lx_sched_prio, smin, smax); - - return ((uucopy(&ls, lsp, sizeof (struct lx_sched_param)) != 0) - ? -errno : 0); -} - -long -lx_sched_getparam(uintptr_t pid, uintptr_t param) -{ - int policy, ret; - pid_t s_pid; - lwpid_t s_tid; - - struct sched_param sp; - - if (((pid_t)pid < 0) || (param == NULL)) - return (-EINVAL); - - if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0) - return (-ESRCH); - - /* - * If we're attempting to get information on our own process, we can - * get data on a per-thread basis; if not, punt and use the specified - * pid. - */ - if (s_pid == getpid()) { - if ((ret = pthread_getschedparam(s_tid, &policy, &sp)) != 0) - return (-ret); - } else { - if (sched_getparam(s_pid, &sp) == -1) - return (-errno); - - if ((policy = sched_getscheduler(s_pid)) < 0) - return (-errno); - } - - /* - * Make sure that any non-SCHED_FIFO non-SCHED_RR scheduler is mapped - * onto SCHED_OTHER. - */ - if (policy != SCHED_FIFO && policy != SCHED_RR) - policy = SCHED_OTHER; - - return (stol_sparam(policy, &sp, (struct lx_sched_param *)param)); -} - -long -lx_sched_setparam(uintptr_t pid, uintptr_t param) -{ - int err, policy; - pid_t s_pid; - lwpid_t s_tid; - struct lx_sched_param lp; - struct sched_param sp; - - if (((pid_t)pid < 0) || (param == NULL)) - return (-EINVAL); - - if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0) - return (-ESRCH); - - if (s_pid == getpid()) { - struct sched_param dummy; - - if ((err = pthread_getschedparam(s_tid, &policy, &dummy)) != 0) - return (-err); - } else - if ((policy = sched_getscheduler(s_pid)) < 0) - return (-errno); - - lx_debug("sched_setparam(): current policy %d", policy); - - if (uucopy((void *)param, &lp, sizeof (lp)) != 0) - return (-errno); - - /* - * In Linux, the only valid SCHED_OTHER scheduler priority is 0 - */ - if ((policy == SCHED_OTHER) && (lp.lx_sched_prio != 0)) - return (-EINVAL); - - if ((err = ltos_sparam(policy, (struct lx_sched_param *)&lp, - &sp)) != 0) - return (err); - - /* - * Check if we're allowed to change the scheduler for the process. - * - * If we're operating on a thread, we can't just call - * pthread_setschedparam() because as all threads reside within a - * single Solaris process, Solaris will allow the modification - * - * If we're operating on a process, we can't just call sched_setparam() - * because Solaris will allow the call to succeed if the scheduler - * parameters do not differ from those being installed, but Linux wants - * the call to fail. - */ - if ((err = check_schedperms(s_pid)) != 0) - return (err); - - if (s_pid == getpid()) - return (((err = pthread_setschedparam(s_tid, policy, &sp)) != 0) - ? -err : 0); - - return ((sched_setparam(s_pid, &sp) == -1) ? -errno : 0); -} - -long -lx_sched_rr_get_interval(uintptr_t pid, uintptr_t ts) -{ - pid_t s_pid; - - if ((pid_t)pid < 0) - return (-EINVAL); - - if (lx_lpid_to_spid((pid_t)pid, &s_pid) < 0) - return (-ESRCH); - - if (sched_rr_get_interval(s_pid, (struct timespec *)ts) == 0) - return (0); - else - return (-errno); -} - -long -lx_sched_getscheduler(uintptr_t pid) -{ - int policy, rv; - pid_t s_pid; - lwpid_t s_tid; - - if ((pid_t)pid < 0) - return (-EINVAL); - - if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0) - return (-ESRCH); - - if (s_pid == getpid()) { - struct sched_param dummy; - - if ((rv = pthread_getschedparam(s_tid, &policy, &dummy)) != 0) - return (-rv); - } else - if ((policy = sched_getscheduler(s_pid)) < 0) - return (-errno); - - /* - * Linux only supports certain policies; avoid confusing apps with - * alien policies. - */ - switch (policy) { - case SCHED_FIFO: - return (LX_SCHED_FIFO); - case SCHED_OTHER: - return (LX_SCHED_OTHER); - case SCHED_RR: - return (LX_SCHED_RR); - default: - break; - } - - return (LX_SCHED_OTHER); -} - -long -lx_sched_setscheduler(uintptr_t pid, uintptr_t policy, uintptr_t param) -{ - int rt_pol; - int rv; - pid_t s_pid; - lwpid_t s_tid; - struct lx_sched_param lp; - - struct sched_param sp; - - if (((pid_t)pid < 0) || (param == NULL)) - return (-EINVAL); - - if ((rt_pol = validate_policy((int)policy)) < 0) - return (rt_pol); - - if ((rv = ltos_sparam(policy, (struct lx_sched_param *)param, - &sp)) != 0) - return (rv); - - if (uucopy((void *)param, &lp, sizeof (lp)) != 0) - return (-errno); - - if (rt_pol == LX_SCHED_OTHER) { - /* - * In Linux, the only valid SCHED_OTHER scheduler priority is 0 - */ - if (lp.lx_sched_prio != 0) - return (-EINVAL); - - /* - * If we're already SCHED_OTHER, there's nothing else to do. - */ - if (lx_sched_getscheduler(pid) == LX_SCHED_OTHER) - return (0); - } - - if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0) - return (-ESRCH); - - /* - * Check if we're allowed to change the scheduler for the process. - * - * If we're operating on a thread, we can't just call - * pthread_setschedparam() because as all threads reside within a - * single Solaris process, Solaris will allow the modification. - * - * If we're operating on a process, we can't just call - * sched_setscheduler() because Solaris will allow the call to succeed - * if the scheduler and scheduler parameters do not differ from those - * being installed, but Linux wants the call to fail. - */ - if ((rv = check_schedperms(s_pid)) != 0) - return (rv); - - if (s_pid == getpid()) { - struct sched_param param; - int pol; - - if ((pol = sched_getscheduler(s_pid)) == -1) - return (-errno); - - /* - * sched_setscheduler() returns the previous scheduling policy - * on success, so call pthread_getschedparam() to get the - * current thread's scheduling policy and return that if the - * call to pthread_setschedparam() succeeds. - */ - if ((rv = pthread_getschedparam(s_tid, &pol, ¶m)) != 0) - return (-rv); - - return (((rv = pthread_setschedparam(s_tid, rt_pol, &sp)) != 0) - ? -rv : pol); - } - - return (((rv = sched_setscheduler(s_pid, rt_pol, &sp)) == -1) - ? -errno : rv); -} - -long -lx_sched_get_priority_min(uintptr_t policy) -{ - /* - * In Linux, the only valid SCHED_OTHER scheduler priority is 0. - * Linux scheduling priorities are not alterable, so there is no - * Solaris translation necessary. - */ - switch (policy) { - case LX_SCHED_FIFO: - case LX_SCHED_RR: - return (LX_SCHED_PRIORITY_MIN_RRFIFO); - case LX_SCHED_OTHER: - return (LX_SCHED_PRIORITY_MIN_OTHER); - default: - break; - } - return (-EINVAL); -} - -long -lx_sched_get_priority_max(uintptr_t policy) -{ - /* - * In Linux, the only valid SCHED_OTHER scheduler priority is 0 - * Linux scheduling priorities are not alterable, so there is no - * Solaris translation necessary. - */ - switch (policy) { - case LX_SCHED_FIFO: - case LX_SCHED_RR: - return (LX_SCHED_PRIORITY_MAX_RRFIFO); - case LX_SCHED_OTHER: - return (LX_SCHED_PRIORITY_MAX_OTHER); - default: - break; - } - return (-EINVAL); -} diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h index 1f255a8c58..64e1ca6ab8 100644 --- a/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h @@ -162,14 +162,6 @@ extern long lx_setpriority(uintptr_t, uintptr_t, uintptr_t); extern long lx_ptrace(uintptr_t, uintptr_t, uintptr_t, uintptr_t); -extern long lx_sched_getparam(uintptr_t, uintptr_t); -extern long lx_sched_setparam(uintptr_t, uintptr_t); -extern long lx_sched_rr_get_interval(uintptr_t pid, uintptr_t); -extern long lx_sched_getscheduler(uintptr_t); -extern long lx_sched_setscheduler(uintptr_t, uintptr_t, uintptr_t); -extern long lx_sched_get_priority_min(uintptr_t); -extern long lx_sched_get_priority_max(uintptr_t); - extern long lx_xattr2(uintptr_t, uintptr_t); extern long lx_xattr3(uintptr_t, uintptr_t, uintptr_t); extern long lx_xattr4(uintptr_t, uintptr_t, uintptr_t, uintptr_t); diff --git a/usr/src/lib/brand/lx/testing/ltp_skiplist b/usr/src/lib/brand/lx/testing/ltp_skiplist index e98654f4e9..bb8e4e08f8 100644 --- a/usr/src/lib/brand/lx/testing/ltp_skiplist +++ b/usr/src/lib/brand/lx/testing/ltp_skiplist @@ -71,12 +71,6 @@ quotactl01 quotactl02 remap_file_pages01 remap_file_pages02 -sched_getparam01 -sched_getparam02 -sched_setscheduler01 -sched_setscheduler03 # OS-5524 -sched_setparam02 -sched_setparam03 setfsuid04 setfsuid04_16 settimeofday01 @@ -212,14 +206,6 @@ removexattr01 removexattr02 request_key01 request_key02 -sched_rr_get_interval01 -sched_rr_get_interval02 -sched_rr_get_interval03 -sched_getscheduler01 -sched_getscheduler02 -sched_getattr01 -sched_getattr02 -sched_setattr01 semctl01 semop02 sendfile02 # OS-3296 diff --git a/usr/src/uts/common/brand/lx/os/lx_misc.c b/usr/src/uts/common/brand/lx/os/lx_misc.c index cfc5f95e78..e2043a9ab6 100644 --- a/usr/src/uts/common/brand/lx/os/lx_misc.c +++ b/usr/src/uts/common/brand/lx/os/lx_misc.c @@ -481,6 +481,17 @@ lx_initlwp(klwp_t *lwp, void *lwpbd) if (plwpd != NULL) { lwpd->br_cgroupid = plwpd->br_cgroupid; } + /* + * The new LWP inherits the parent LWP emulated scheduling info. + */ + if (plwpd != NULL) { + lwpd->br_schd_class = plwpd->br_schd_class; + lwpd->br_schd_pri = plwpd->br_schd_pri; + lwpd->br_schd_flags = plwpd->br_schd_flags; + lwpd->br_schd_runtime = plwpd->br_schd_runtime; + lwpd->br_schd_deadline = plwpd->br_schd_deadline; + lwpd->br_schd_period = plwpd->br_schd_period; + } lxzdata = ztolxzd(p->p_zone); mutex_enter(&lxzdata->lxzd_lock); cgrp = lxzdata->lxzd_cgroup; diff --git a/usr/src/uts/common/brand/lx/os/lx_syscall.c b/usr/src/uts/common/brand/lx/os/lx_syscall.c index bfd77c029f..7cb29f1004 100644 --- a/usr/src/uts/common/brand/lx/os/lx_syscall.c +++ b/usr/src/uts/common/brand/lx/os/lx_syscall.c @@ -673,14 +673,14 @@ lx_sysent_t lx_sysent32[] = { {"munlock", NULL, 0, 2}, /* 151 */ {"mlockall", NULL, 0, 1}, /* 152 */ {"munlockall", NULL, 0, 0}, /* 153 */ - {"sched_setparam", NULL, 0, 2}, /* 154 */ - {"sched_getparam", NULL, 0, 2}, /* 155 */ - {"sched_setscheduler", NULL, 0, 3}, /* 156 */ - {"sched_getscheduler", NULL, 0, 1}, /* 157 */ + {"sched_setparam", lx_sched_setparam, 0, 2}, /* 154 */ + {"sched_getparam", lx_sched_getparam, 0, 2}, /* 155 */ + {"sched_setscheduler", lx_sched_setscheduler, 0, 3}, /* 156 */ + {"sched_getscheduler", lx_sched_getscheduler, 0, 1}, /* 157 */ {"sched_yield", lx_sched_yield, 0, 0}, /* 158 */ - {"sched_get_priority_max", NULL, 0, 1}, /* 159 */ - {"sched_get_priority_min", NULL, 0, 1}, /* 160 */ - {"sched_rr_get_interval", NULL, 0, 2}, /* 161 */ + {"sched_get_priority_max", lx_sched_get_priority_max, 0, 1}, /* 159 */ + {"sched_get_priority_min", lx_sched_get_priority_min, 0, 1}, /* 160 */ + {"sched_rr_get_interval", lx_sched_rr_get_interval, 0, 2}, /* 161 */ {"nanosleep", lx_nanosleep, 0, 2}, /* 162 */ {"mremap", NULL, 0, 5}, /* 163 */ {"setresuid16", lx_setresuid16, 0, 3}, /* 164 */ @@ -874,8 +874,8 @@ lx_sysent_t lx_sysent32[] = { {"process_vm_writev", NULL, NOSYS_NULL, 0}, /* 348 */ {"kcmp", NULL, NOSYS_NULL, 0}, /* 349 */ {"finit_module", NULL, NOSYS_NULL, 0}, /* 350 */ - {"sched_setattr", NULL, NOSYS_NULL, 0}, /* 351 */ - {"sched_getattr", NULL, NOSYS_NULL, 0}, /* 352 */ + {"sched_setattr", lx_sched_setattr, 0, 3}, /* 351 */ + {"sched_getattr", lx_sched_getattr, 0, 4}, /* 352 */ {"renameat2", NULL, NOSYS_NULL, 0}, /* 353 */ {"seccomp", NULL, NOSYS_NULL, 0}, /* 354 */ {"getrandom", lx_getrandom, 0, 3}, /* 355 */ @@ -1032,13 +1032,13 @@ lx_sysent_t lx_sysent64[] = { {"sysfs", NULL, 0, 3}, /* 139 */ {"getpriority", NULL, 0, 2}, /* 140 */ {"setpriority", NULL, 0, 3}, /* 141 */ - {"sched_setparam", NULL, 0, 2}, /* 142 */ - {"sched_getparam", NULL, 0, 2}, /* 143 */ - {"sched_setscheduler", NULL, 0, 3}, /* 144 */ - {"sched_getscheduler", NULL, 0, 1}, /* 145 */ - {"sched_get_priority_max", NULL, 0, 1}, /* 146 */ - {"sched_get_priority_min", NULL, 0, 1}, /* 147 */ - {"sched_rr_get_interval", NULL, 0, 2}, /* 148 */ + {"sched_setparam", lx_sched_setparam, 0, 2}, /* 142 */ + {"sched_getparam", lx_sched_getparam, 0, 2}, /* 143 */ + {"sched_setscheduler", lx_sched_setscheduler, 0, 3}, /* 144 */ + {"sched_getscheduler", lx_sched_getscheduler, 0, 1}, /* 145 */ + {"sched_get_priority_max", lx_sched_get_priority_max, 0, 1}, /* 146 */ + {"sched_get_priority_min", lx_sched_get_priority_min, 0, 1}, /* 147 */ + {"sched_rr_get_interval", lx_sched_rr_get_interval, 0, 2}, /* 148 */ {"mlock", NULL, 0, 2}, /* 149 */ {"munlock", NULL, 0, 2}, /* 150 */ {"mlockall", NULL, 0, 1}, /* 151 */ @@ -1204,8 +1204,8 @@ lx_sysent_t lx_sysent64[] = { {"process_vm_writev", NULL, NOSYS_NULL, 0}, /* 311 */ {"kcmp", NULL, NOSYS_NULL, 0}, /* 312 */ {"finit_module", NULL, NOSYS_NULL, 0}, /* 313 */ - {"sched_setattr", NULL, NOSYS_NULL, 0}, /* 314 */ - {"sched_getattr", NULL, NOSYS_NULL, 0}, /* 315 */ + {"sched_setattr", lx_sched_setattr, 0, 3}, /* 314 */ + {"sched_getattr", lx_sched_getattr, 0, 4}, /* 315 */ {"renameat2", NULL, NOSYS_NULL, 0}, /* 316 */ {"seccomp", NULL, NOSYS_NULL, 0}, /* 317 */ {"getrandom", lx_getrandom, 0, 3}, /* 318 */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h index fe372a2124..7a9357c367 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_brand.h +++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h @@ -81,6 +81,8 @@ extern "C" { /* Highest capability we know about */ #define LX_CAP_MAX_VALID 36 +/* sched attr flag values */ +#define LX_SCHED_FLAG_RESET_ON_FORK 0x1 /* * brand(2) subcommands * @@ -554,6 +556,19 @@ struct lx_lwp_data { * ID of the cgroup this thread belongs to. */ uint_t br_cgroupid; + + /* + * When the zone is running under FSS (which is the common case) then + * we cannot change scheduling class, so we emulate that. By default + * Linux uses LX_SCHED_OTHER (which is 0) and that only supports a + * priority of 0, so no special initialization is needed. + */ + int br_schd_class; /* emulated scheduling class */ + int br_schd_pri; /* emulated scheduling priority */ + uint64_t br_schd_flags; /* emulated [sg]et_attr flags */ + uint64_t br_schd_runtime; /* emulated DEADLINE */ + uint64_t br_schd_deadline; /* emulated DEADLINE */ + uint64_t br_schd_period; /* emulated DEADLINE */ }; /* diff --git a/usr/src/uts/common/brand/lx/sys/lx_sched.h b/usr/src/uts/common/brand/lx/sys/lx_sched.h deleted file mode 100644 index b0ae748f3c..0000000000 --- a/usr/src/uts/common/brand/lx/sys/lx_sched.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_LINUX_SCHED_H -#define _SYS_LINUX_SCHED_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/procset.h> -#include <sys/priocntl.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Linux scheduler policies. - */ -#define LX_SCHED_OTHER 0 -#define LX_SCHED_FIFO 1 -#define LX_SCHED_RR 2 - -#define LX_PRI_MAX 99 - -typedef int l_pid_t; - -struct lx_sched_param { - int lx_sched_prio; -}; - -extern int sched_setprocset(procset_t *, l_pid_t); -extern long do_priocntlsys(int, procset_t *, void *); - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_LINUX_SCHED_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h index 34b2c6153c..f8fb1c145d 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h +++ b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h @@ -178,8 +178,12 @@ extern long lx_renameat(); extern long lx_sched_getaffinity(); extern long lx_sched_getparam(); extern long lx_sched_getscheduler(); +extern long lx_sched_getattr(); +extern long lx_sched_get_priority_max(); +extern long lx_sched_get_priority_min(); extern long lx_sched_rr_get_interval(); extern long lx_sched_setaffinity(); +extern long lx_sched_setattr(); extern long lx_sched_setparam(); extern long lx_sched_setscheduler(); extern long lx_sched_yield(); diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sched.c b/usr/src/uts/common/brand/lx/syscall/lx_sched.c index dd4a039905..0fc8046517 100644 --- a/usr/src/uts/common/brand/lx/syscall/lx_sched.c +++ b/usr/src/uts/common/brand/lx/syscall/lx_sched.c @@ -26,6 +26,19 @@ * Copyright 2016 Joyent, Inc. */ +/* + * Emulation for scheduling related syscalls. + * + * Under a typical zone configuration the zones will always be running under + * FSS so that no single zone can monopolize the system. Zones do not have the + * privilege to leave FSS (for the obvious reason that this would violate the + * global zone resource management policies). Thus, for the sched_* syscalls + * we typically will never be able to emulate those using our other native + * scheduling classes. Under this common case we simply track the scheduler + * settings on the lwp's lx brand structure and we also try to adjust the + * lwp priority within the valid range to approximate the intended effect. + */ + #include <sys/types.h> #include <sys/systm.h> #include <sys/errno.h> @@ -35,16 +48,87 @@ #include <sys/tspriocntl.h> #include <sys/processor.h> #include <sys/brand.h> -#include <sys/lx_sched.h> #include <sys/lx_brand.h> #include <sys/sysmacros.h> #include <sys/policy.h> +#include <sys/procset.h> +#include <sys/priocntl.h> + +typedef int l_pid_t; extern int yield(); extern long priocntl_common(int, procset_t *, int, caddr_t, caddr_t, uio_seg_t); +static int lx_sched_setprocset(procset_t *, l_pid_t); +static long lx_do_priocntlsys(int, procset_t *, void *); + #define BITS_PER_BYTE 8 +/* + * Linux scheduler policies. + */ +#define LX_SCHED_OTHER 0 +#define LX_SCHED_FIFO 1 +#define LX_SCHED_RR 2 +#define LX_SCHED_BATCH 3 +#define LX_SCHED_IDLE 5 +#define LX_SCHED_DEADLINE 6 + +/* + * Linux scheduler priority ranges. + */ +#define LX_SCHED_PRIORITY_MIN_OTHER 0 +#define LX_SCHED_PRIORITY_MAX_OTHER 0 +#define LX_SCHED_PRIORITY_MIN_RRFIFO 1 +#define LX_SCHED_PRIORITY_MAX_RRFIFO 99 + +#define MAXPRI 60 /* See FSS_MAXUPRI */ + +/* + * When emulating scheduling priorities (e.g. under FSS) we'll do the best we + * can by adjusting the thread's priority within our range. + */ +static int lx_emul_pri_map[] = { + 0, /* LX_SCHED_OTHER */ + MAXPRI, /* LX_SCHED_FIFO */ + MAXPRI - 1, /* LX_SCHED_RR */ + -MAXPRI + 1, /* LX_SCHED_BATCH */ + 0, /* UNUSED */ + -MAXPRI, /* LX_SCHED_IDLE */ + MAXPRI /* LX_SCHED_DEADLINE */ +}; + +/* + * Determine if we should emulate the sched_* syscalls. A zone is almost always + * going to be running under FSS in any kind of production configuration, and + * FSS is currently the only class which zone processes won't have the privilege + * to leave. Instead of checking for FSS explicitly, we generalize our check + * using CL_CANEXIT. + */ +#define EMUL_SCHED() (CL_CANEXIT(curthread, CRED()) != 0) + +struct lx_sched_param { + int lx_sched_prio; +}; + +typedef struct lx_sched_attr { + uint32_t lx_size; + + uint32_t lx_sched_policy; + uint64_t lx_sched_flags; + + /* For LX_SCHED_OTHER or LX_SCHED_BATCH */ + int lx_sched_nice; + + /* For LX_SCHED_FIFO or LX_SCHED_RR */ + uint32_t lx_sched_priority; + + /* For LX_SCHED_DEADLINE */ + uint64_t lx_sched_runtime; + uint64_t lx_sched_deadline; + uint64_t lx_sched_period; +} lx_sched_attr_t; + long lx_sched_yield(void) { @@ -81,7 +165,8 @@ stol_cpuset(cpuset_t *smask, lx_affmask_t *lmask) /* * Find and lock a process for lx_sched_* operations. - * Sets 'pp' and 'tp' on success, with P_PR_LOCK set (but p_lock not held). + * Sets 'pp' and 'tp' on success, with P_PR_LOCK set and p_lock held. + * The target process must be branded. */ static int lx_sched_pidlock(l_pid_t pid, proc_t **pp, kthread_t **tp, boolean_t is_write) @@ -95,9 +180,9 @@ lx_sched_pidlock(l_pid_t pid, proc_t **pp, kthread_t **tp, boolean_t is_write) } if (pid == 0) { p = curproc; + ASSERT(PROC_IS_BRANDED(p)); mutex_enter(&p->p_lock); sprlock_proc(p); - mutex_exit(&p->p_lock); *tp = curthread; *pp = p; @@ -107,13 +192,18 @@ lx_sched_pidlock(l_pid_t pid, proc_t **pp, kthread_t **tp, boolean_t is_write) if (lx_lpid_lock((pid_t)pid, curzone, PRLOCK, &p, &t) != 0) { return (ESRCH); } - mutex_exit(&p->p_lock); + + ASSERT(MUTEX_HELD(&p->p_lock)); + if (!(PROC_IS_BRANDED(p))) { + sprunlock(p); + return (EPERM); + } if (is_write) { cred_t *cr = CRED(); /* - * To perform a sched_setaffinity on a thread outside of the + * To perform a sched_* operation on a thread outside of the * current process, either the euid/egid of the target must * match, or the calling process must hold CAP_SYS_NICE. * (PRIV_PROC_PRIOUP maps to CAP_SYS_NICE) @@ -121,14 +211,15 @@ lx_sched_pidlock(l_pid_t pid, proc_t **pp, kthread_t **tp, boolean_t is_write) err = 0; if (secpolicy_raisepriority(cr) != 0) { err = 0; + mutex_exit(&p->p_lock); mutex_enter(&p->p_crlock); if (crgetuid(cr) != crgetuid(p->p_cred) || crgetgid(cr) != crgetgid(p->p_cred)) { err = EPERM; } mutex_exit(&p->p_crlock); + mutex_enter(&p->p_lock); if (err != 0) { - mutex_enter(&p->p_lock); sprunlock(p); return (err); } @@ -136,6 +227,7 @@ lx_sched_pidlock(l_pid_t pid, proc_t **pp, kthread_t **tp, boolean_t is_write) } *pp = p; *tp = t; + ASSERT(MUTEX_HELD(&p->p_lock)); return (0); } @@ -163,6 +255,7 @@ lx_sched_getaffinity(l_pid_t pid, unsigned int len, void *maskp) return (set_errno(err)); } + mutex_exit(&p->p_lock); mutex_enter(&cpu_lock); mutex_enter(&p->p_lock); /* @@ -228,6 +321,7 @@ lx_sched_setaffinity(l_pid_t pid, unsigned int len, void *maskp) /* * Constrain the mask to currently active CPUs. */ + mutex_exit(&p->p_lock); mutex_enter(&cpu_lock); mutex_enter(&p->p_lock); lwpd = ttolxlwp(tp); @@ -310,35 +404,81 @@ lx_sched_setscheduler(l_pid_t pid, int policy, struct lx_sched_param *param) int prio, maxupri; int rv; - if (pid < 0) - return (set_errno(ESRCH)); - - if ((rv = sched_setprocset(&procset, pid))) - return (rv); + if (pid < 0 || param == NULL) + return (set_errno(EINVAL)); if (copyin(param, &sched_param, sizeof (sched_param))) return (set_errno(EFAULT)); prio = sched_param.lx_sched_prio; - if (policy < 0) { - /* - * get the class id - */ - pcparm.pc_cid = PC_CLNULL; - (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm); - if (lwp->lwp_errno) - return (lwp->lwp_errno); + if (EMUL_SCHED()) { + proc_t *p; + kthread_t *tp = NULL; + int incr; + lx_lwp_data_t *lwpd; + + switch (policy) { + case LX_SCHED_OTHER: + case LX_SCHED_BATCH: + case LX_SCHED_IDLE: + case LX_SCHED_DEADLINE: + if (prio != LX_SCHED_PRIORITY_MIN_OTHER) + return (set_errno(EINVAL)); + break; + case LX_SCHED_FIFO: + case LX_SCHED_RR: + if (crgetuid(CRED()) != 0) + return (set_errno(EPERM)); + if (prio < LX_SCHED_PRIORITY_MIN_RRFIFO || + prio > LX_SCHED_PRIORITY_MAX_RRFIFO) + return (set_errno(EINVAL)); + break; + default: + return (set_errno(EINVAL)); + } - /* - * get the current policy - */ - bzero(&pcinfo, sizeof (pcinfo)); - pcinfo.pc_cid = pcparm.pc_cid; - (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); - if (lwp->lwp_errno) - return (lwp->lwp_errno); + /* Find and operate on the target lwp. */ + if ((rv = lx_sched_pidlock(pid, &p, &tp, B_TRUE)) != 0) + return (set_errno(rv)); + + lwpd = lwptolxlwp(ttolwp(tp)); + if (lwpd->br_schd_class == LX_SCHED_IDLE && + policy != LX_SCHED_IDLE && crgetuid(CRED()) != 0) { + + sprunlock(p); + return (set_errno(EPERM)); + } + lwpd->br_schd_class = policy; + lwpd->br_schd_pri = prio; + + ASSERT(policy <= LX_SCHED_DEADLINE); + incr = lx_emul_pri_map[policy]; + + CL_DOPRIO(tp, CRED(), incr, &rv); + + sprunlock(p); + return (0); + } + + if ((rv = lx_sched_setprocset(&procset, pid))) + return (rv); + + /* get the class id */ + pcparm.pc_cid = PC_CLNULL; + (void) lx_do_priocntlsys(PC_GETPARMS, &procset, &pcparm); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + /* get the current policy */ + bzero(&pcinfo, sizeof (pcinfo)); + pcinfo.pc_cid = pcparm.pc_cid; + (void) lx_do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + if (policy < 0) { if (strcmp(pcinfo.pc_clname, "TS") == 0) { policy = LX_SCHED_OTHER; } else if (strcmp(pcinfo.pc_clname, "RT") == 0) { @@ -356,7 +496,7 @@ lx_sched_setscheduler(l_pid_t pid, int policy, struct lx_sched_param *param) case LX_SCHED_FIFO: case LX_SCHED_RR: (void) strcpy(pcinfo.pc_clname, "RT"); - (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo); + (void) lx_do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo); if (lwp->lwp_errno) return (lwp->lwp_errno); @@ -371,7 +511,7 @@ lx_sched_setscheduler(l_pid_t pid, int policy, struct lx_sched_param *param) case LX_SCHED_OTHER: (void) strcpy(pcinfo.pc_clname, "TS"); - (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo); + (void) lx_do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo); if (lwp->lwp_errno) return (lwp->lwp_errno); @@ -392,7 +532,7 @@ lx_sched_setscheduler(l_pid_t pid, int policy, struct lx_sched_param *param) /* * finally set scheduling policy and parameters */ - (void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm); + (void) lx_do_priocntlsys(PC_SETPARMS, &procset, &pcparm); return (0); } @@ -408,16 +548,30 @@ lx_sched_getscheduler(l_pid_t pid) int rv; if (pid < 0) - return (set_errno(ESRCH)); + return (set_errno(EINVAL)); + + if (EMUL_SCHED()) { + proc_t *p; + kthread_t *tp = NULL; - if ((rv = sched_setprocset(&procset, pid))) + /* Find and operate on the target lwp. */ + if ((rv = lx_sched_pidlock(pid, &p, &tp, B_FALSE)) != 0) + return (set_errno(rv)); + + policy = lwptolxlwp(ttolwp(tp))->br_schd_class; + sprunlock(p); + + return (policy); + } + + if ((rv = lx_sched_setprocset(&procset, pid))) return (rv); /* * get the class id */ pcparm.pc_cid = PC_CLNULL; - (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm); + (void) lx_do_priocntlsys(PC_GETPARMS, &procset, &pcparm); if (lwp->lwp_errno) return (lwp->lwp_errno); @@ -426,17 +580,18 @@ lx_sched_getscheduler(l_pid_t pid) */ bzero(&pcinfo, sizeof (pcinfo)); pcinfo.pc_cid = pcparm.pc_cid; - (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); + (void) lx_do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); if (lwp->lwp_errno) return (lwp->lwp_errno); - if (strcmp(pcinfo.pc_clname, "TS") == 0) + if (strcmp(pcinfo.pc_clname, "TS") == 0) { policy = LX_SCHED_OTHER; - else if (strcmp(pcinfo.pc_clname, "RT") == 0) + } else if (strcmp(pcinfo.pc_clname, "RT") == 0) { policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs == RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR; - else + } else { policy = set_errno(EINVAL); + } return (policy); } @@ -455,22 +610,71 @@ lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param) int prio, maxupri; int rv; - if (pid < 0) - return (set_errno(ESRCH)); - - if ((rv = sched_setprocset(&procset, pid))) - return (rv); + if (pid < 0 || param == NULL) + return (set_errno(EINVAL)); if (copyin(param, &sched_param, sizeof (sched_param))) return (set_errno(EFAULT)); prio = sched_param.lx_sched_prio; + if (EMUL_SCHED()) { + proc_t *p; + kthread_t *tp = NULL; + int incr; + + /* Find and operate on the target lwp. */ + if ((rv = lx_sched_pidlock(pid, &p, &tp, B_TRUE)) != 0) + return (set_errno(rv)); + + policy = lwptolxlwp(ttolwp(tp))->br_schd_class; + switch (policy) { + case LX_SCHED_OTHER: + case LX_SCHED_BATCH: + case LX_SCHED_IDLE: + case LX_SCHED_DEADLINE: + if (prio != LX_SCHED_PRIORITY_MIN_OTHER) { + sprunlock(p); + return (set_errno(EINVAL)); + } + break; + case LX_SCHED_FIFO: + case LX_SCHED_RR: + if (crgetuid(CRED()) != 0) { + sprunlock(p); + return (set_errno(EPERM)); + } + if (prio < LX_SCHED_PRIORITY_MIN_RRFIFO || + prio > LX_SCHED_PRIORITY_MAX_RRFIFO) { + sprunlock(p); + return (set_errno(EINVAL)); + } + break; + default: + /* this shouldn't happen */ + ASSERT(0); + sprunlock(p); + return (set_errno(EINVAL)); + } + + lwptolxlwp(ttolwp(tp))->br_schd_pri = prio; + + ASSERT(policy <= LX_SCHED_DEADLINE); + incr = lx_emul_pri_map[policy]; + + CL_DOPRIO(tp, CRED(), incr, &rv); + sprunlock(p); + return (0); + } + + if ((rv = lx_sched_setprocset(&procset, pid))) + return (rv); + /* * get the class id */ pcparm.pc_cid = PC_CLNULL; - (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm); + (void) lx_do_priocntlsys(PC_GETPARMS, &procset, &pcparm); if (lwp->lwp_errno) return (lwp->lwp_errno); @@ -479,7 +683,7 @@ lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param) */ bzero(&pcinfo, sizeof (pcinfo)); pcinfo.pc_cid = pcparm.pc_cid; - (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); + (void) lx_do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); if (lwp->lwp_errno) return (lwp->lwp_errno); @@ -498,7 +702,7 @@ lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param) case LX_SCHED_FIFO: case LX_SCHED_RR: (void) strcpy(pcinfo.pc_clname, "RT"); - (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo); + (void) lx_do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo); if (lwp->lwp_errno) return (lwp->lwp_errno); @@ -513,7 +717,7 @@ lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param) case LX_SCHED_OTHER: (void) strcpy(pcinfo.pc_clname, "TS"); - (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo); + (void) lx_do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo); if (lwp->lwp_errno) return (lwp->lwp_errno); @@ -534,7 +738,7 @@ lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param) /* * finally set scheduling policy and parameters */ - (void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm); + (void) lx_do_priocntlsys(PC_SETPARMS, &procset, &pcparm); return (0); } @@ -551,17 +755,33 @@ lx_sched_getparam(l_pid_t pid, struct lx_sched_param *param) int prio, scale; int rv; - if (pid < 0) - return (set_errno(ESRCH)); + if (pid < 0 || param == NULL) + return (set_errno(EINVAL)); + + if (EMUL_SCHED()) { + proc_t *p; + kthread_t *tp = NULL; + + /* Find and operate on the target lwp. */ + if ((rv = lx_sched_pidlock(pid, &p, &tp, B_FALSE)) != 0) + return (set_errno(rv)); + + local_param.lx_sched_prio = lwptolxlwp(ttolwp(tp))->br_schd_pri; + sprunlock(p); + if (copyout(&local_param, param, sizeof (local_param))) + return (set_errno(EFAULT)); + + return (0); + } - if ((rv = sched_setprocset(&procset, pid))) + if ((rv = lx_sched_setprocset(&procset, pid))) return (rv); /* * get the class id */ pcparm.pc_cid = PC_CLNULL; - (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm); + (void) lx_do_priocntlsys(PC_GETPARMS, &procset, &pcparm); if (lwp->lwp_errno) return (lwp->lwp_errno); @@ -570,7 +790,7 @@ lx_sched_getparam(l_pid_t pid, struct lx_sched_param *param) */ bzero(&pcinfo, sizeof (pcinfo)); pcinfo.pc_cid = pcparm.pc_cid; - (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); + (void) lx_do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); if (lwp->lwp_errno) return (lwp->lwp_errno); @@ -612,16 +832,51 @@ lx_sched_rr_get_interval(l_pid_t pid, struct timespec *ival) int rv; if (pid < 0) - return (set_errno(ESRCH)); + return (set_errno(EINVAL)); + + if (EMUL_SCHED()) { + int policy; + proc_t *p; + kthread_t *tp = NULL; + + /* Find and operate on the target lwp. */ + if ((rv = lx_sched_pidlock(pid, &p, &tp, B_FALSE)) != 0) + return (set_errno(rv)); + + policy = lwptolxlwp(ttolwp(tp))->br_schd_class; + sprunlock(p); + + interval.tv_sec = 0; + if (policy == LX_SCHED_RR) { + /* Use a made-up value similar to Linux */ + interval.tv_nsec = 100000000; + } else { + interval.tv_nsec = 0; + } + + if (copyout(&interval, ival, sizeof (interval))) + return (set_errno(EFAULT)); + + return (0); + } - if ((rv = sched_setprocset(&procset, pid))) + if ((rv = lx_sched_setprocset(&procset, pid))) return (rv); /* * get the class id */ pcparm.pc_cid = PC_CLNULL; - (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm); + (void) lx_do_priocntlsys(PC_GETPARMS, &procset, &pcparm); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + /* + * get the class info and identify the equivalent linux policy + */ + bzero(&pcinfo, sizeof (pcinfo)); + pcinfo.pc_cid = pcparm.pc_cid; + (void) lx_do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); if (lwp->lwp_errno) return (lwp->lwp_errno); @@ -631,12 +886,15 @@ lx_sched_rr_get_interval(l_pid_t pid, struct timespec *ival) setprocset(&procset, POP_AND, P_PID, 0, P_ALL, 0); bzero(&pcinfo, sizeof (pcinfo)); (void) strcpy(pcinfo.pc_clname, "RT"); - (void) do_priocntlsys(PC_GETCID, &procset, &pcinfo); + (void) lx_do_priocntlsys(PC_GETCID, &procset, &pcinfo); if (lwp->lwp_errno) return (lwp->lwp_errno); - if (pcparm.pc_cid == pcinfo.pc_cid && - ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs != RT_TQINF) { + /* + * Contrary to what the man page says, you don't have to be in RR to + * get this interval. + */ + if (((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs != RT_TQINF) { interval.tv_sec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqsecs; interval.tv_nsec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs; @@ -649,8 +907,185 @@ lx_sched_rr_get_interval(l_pid_t pid, struct timespec *ival) return (set_errno(EINVAL)); } -int -sched_setprocset(procset_t *procset, l_pid_t pid) +long +lx_sched_get_priority_min(uintptr_t policy) +{ + /* + * Linux scheduling priorities are not alterable, so there is no + * illumos translation necessary. + */ + switch (policy) { + case LX_SCHED_FIFO: + case LX_SCHED_RR: + return (LX_SCHED_PRIORITY_MIN_RRFIFO); + case LX_SCHED_OTHER: + case LX_SCHED_BATCH: + case LX_SCHED_IDLE: + case LX_SCHED_DEADLINE: + return (LX_SCHED_PRIORITY_MIN_OTHER); + default: + break; + } + return (set_errno(EINVAL)); +} + +long +lx_sched_get_priority_max(uintptr_t policy) +{ + /* + * Linux scheduling priorities are not alterable, so there is no + * illumos translation necessary. + */ + switch (policy) { + case LX_SCHED_FIFO: + case LX_SCHED_RR: + return (LX_SCHED_PRIORITY_MAX_RRFIFO); + case LX_SCHED_OTHER: + case LX_SCHED_BATCH: + case LX_SCHED_IDLE: + case LX_SCHED_DEADLINE: + return (LX_SCHED_PRIORITY_MAX_OTHER); + default: + break; + } + return (set_errno(EINVAL)); +} + +long +lx_sched_setattr(l_pid_t pid, lx_sched_attr_t *attr, uint32_t flags) +{ + int rv; + uint32_t lx_size; + lx_sched_attr_t local_attr; + uint64_t flg; + + if (pid < 0 || attr == NULL || flags != 0) + return (set_errno(EINVAL)); + + if (copyin(attr, &lx_size, sizeof (lx_size))) + return (set_errno(EFAULT)); + + if (lx_size > sizeof (local_attr)) + return (set_errno(E2BIG)); + + bzero(&local_attr, sizeof (local_attr)); + if (copyin(attr, &local_attr, lx_size)) + return (set_errno(EFAULT)); + + flg = local_attr.lx_sched_flags; + if ((flg & ~LX_SCHED_FLAG_RESET_ON_FORK) != 0) + return (set_errno(EINVAL)); + + if (EMUL_SCHED()) { + int policy; + proc_t *p; + kthread_t *tp = NULL; + int incr; + lx_lwp_data_t *lwpd; + + /* Find and operate on the target lwp. */ + if ((rv = lx_sched_pidlock(pid, &p, &tp, B_TRUE)) != 0) + return (set_errno(rv)); + + policy = local_attr.lx_sched_policy; + + switch (policy) { + case LX_SCHED_OTHER: + case LX_SCHED_BATCH: + case LX_SCHED_IDLE: + break; + case LX_SCHED_FIFO: + case LX_SCHED_RR: + if (crgetuid(CRED()) != 0) { + sprunlock(p); + return (set_errno(EPERM)); + } + if (local_attr.lx_sched_priority < + LX_SCHED_PRIORITY_MIN_RRFIFO || + local_attr.lx_sched_priority > + LX_SCHED_PRIORITY_MAX_RRFIFO) { + sprunlock(p); + return (set_errno(EINVAL)); + } + break; + + case LX_SCHED_DEADLINE: + if (crgetuid(CRED()) != 0) { + sprunlock(p); + return (set_errno(EPERM)); + } + break; + default: + sprunlock(p); + return (set_errno(EINVAL)); + } + + lwpd = lwptolxlwp(ttolwp(tp)); + lwpd->br_schd_class = policy; + lwpd->br_schd_flags = flg; + lwpd->br_schd_pri = local_attr.lx_sched_priority; + + lwpd->br_schd_runtime = local_attr.lx_sched_runtime; + lwpd->br_schd_deadline = local_attr.lx_sched_deadline; + lwpd->br_schd_period = local_attr.lx_sched_period; + + ASSERT(policy <= LX_SCHED_DEADLINE); + incr = lx_emul_pri_map[policy]; + + CL_DOPRIO(tp, CRED(), incr, &rv); + sprunlock(p); + return (0); + } + + /* Currently not supported under other classes */ + return (set_errno(ENOSYS)); +} + +long +lx_sched_getattr(l_pid_t pid, lx_sched_attr_t *attr, uint32_t size, + uint32_t flags) +{ + lx_sched_attr_t local_attr; + int rv; + + if (pid < 0 || attr == NULL || flags != 0 || size < sizeof (local_attr)) + return (set_errno(EINVAL)); + + bzero(&local_attr, sizeof (local_attr)); + if (EMUL_SCHED()) { + proc_t *p; + kthread_t *tp = NULL; + lx_lwp_data_t *lwpd; + + /* Find and operate on the target lwp. */ + if ((rv = lx_sched_pidlock(pid, &p, &tp, B_FALSE)) != 0) + return (set_errno(rv)); + + lwpd = lwptolxlwp(ttolwp(tp)); + local_attr.lx_sched_policy = lwpd->br_schd_class; + local_attr.lx_sched_priority = lwpd->br_schd_pri; + local_attr.lx_sched_flags = lwpd->br_schd_flags; + + local_attr.lx_sched_runtime = lwpd->br_schd_runtime; + local_attr.lx_sched_deadline = lwpd->br_schd_deadline; + local_attr.lx_sched_period = lwpd->br_schd_period; + + sprunlock(p); + + local_attr.lx_size = sizeof (lx_sched_attr_t); + + if (copyout(&local_attr, attr, sizeof (local_attr))) + return (set_errno(EFAULT)); + + return (0); + } + + /* Currently not supported under other classes */ + return (set_errno(ENOSYS)); +} + +static int +lx_sched_setprocset(procset_t *procset, l_pid_t pid) { id_t lid, rid; idtype_t lidtype, ridtype; @@ -658,27 +1093,22 @@ sched_setprocset(procset_t *procset, l_pid_t pid) /* * define the target lwp */ - if (pid == 0) { - ridtype = P_ALL; - lidtype = P_PID; - rid = 0; - lid = P_MYID; - } else { - if (lx_lpid_to_spair(pid, &pid, &lid) < 0) - return (set_errno(ESRCH)); - if (pid != curproc->p_pid) - return (set_errno(ESRCH)); - rid = 0; - ridtype = P_ALL; - lidtype = P_LWPID; - } + if (pid == 0) + pid = curproc->p_pid; + + if (lx_lpid_to_spair(pid, &pid, &lid) < 0) + return (set_errno(ESRCH)); + rid = 0; + ridtype = P_ALL; + lidtype = P_LWPID; + setprocset(procset, POP_AND, lidtype, lid, ridtype, rid); return (0); } -long -do_priocntlsys(int cmd, procset_t *procset, void *arg) +static long +lx_do_priocntlsys(int cmd, procset_t *procset, void *arg) { return (priocntl_common(PC_VERSION, procset, cmd, (caddr_t)arg, 0, UIO_SYSSPACE)); |
