diff options
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/common/brand/lx/lx_syscall.h | 47 | ||||
-rw-r--r-- | usr/src/lib/brand/lx/lx_brand/common/clone.c | 28 | ||||
-rw-r--r-- | usr/src/lib/brand/lx/lx_brand/common/wait.c | 19 | ||||
-rw-r--r-- | usr/src/uts/common/brand/lx/os/lx_brand.c | 54 | ||||
-rw-r--r-- | usr/src/uts/common/brand/lx/os/lx_misc.c | 153 | ||||
-rw-r--r-- | usr/src/uts/common/brand/lx/sys/lx_brand.h | 22 | ||||
-rw-r--r-- | usr/src/uts/common/brand/lx/syscall/lx_clone.c | 109 | ||||
-rw-r--r-- | usr/src/uts/common/brand/sn1/sn1_brand.c | 5 | ||||
-rw-r--r-- | usr/src/uts/common/brand/sngl/sngl_brand.c | 8 | ||||
-rw-r--r-- | usr/src/uts/common/brand/solaris10/s10_brand.c | 5 | ||||
-rw-r--r-- | usr/src/uts/common/os/brand.c | 3 | ||||
-rw-r--r-- | usr/src/uts/common/os/exit.c | 79 | ||||
-rw-r--r-- | usr/src/uts/common/sys/brand.h | 4 | ||||
-rw-r--r-- | usr/src/uts/common/sys/proc.h | 11 | ||||
-rw-r--r-- | usr/src/uts/i86pc/ml/offsets.in | 2 | ||||
-rw-r--r-- | usr/src/uts/intel/brand/common/brand_asm.h | 2 | ||||
-rw-r--r-- | usr/src/uts/sun4/brand/common/brand_solaris.s | 2 | ||||
-rw-r--r-- | usr/src/uts/sun4/ml/offsets.in | 2 |
18 files changed, 473 insertions, 82 deletions
diff --git a/usr/src/common/brand/lx/lx_syscall.h b/usr/src/common/brand/lx/lx_syscall.h new file mode 100644 index 0000000000..7bfc0537c9 --- /dev/null +++ b/usr/src/common/brand/lx/lx_syscall.h @@ -0,0 +1,47 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2014 Joyent, Inc. All rights reserved. + */ + +#ifndef _LX_SYSCALL_H +#define _LX_SYSCALL_H + +#include <sys/lx_brand.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The br_scall_args field of lx_lwp_data is going to be populated with + * pointers to structs. The types of these structs should be defined in this + * header file. These are Linux specific arguments to system calls that don't + * exist in illumos. Each section should be labelled with which system call it + * belongs to. + */ + +/* arguments for waitpid(2) */ +/* see comments in usr/src/lib/brand/lx/lx_brand/common/wait.c */ +#define LX_WNOTHREAD 0x20000000 /* Do not wait on siblings' children */ +#define LX_WALL 0x40000000 /* Wait on all children */ +#define LX_WCLONE 0x80000000 /* Wait only on clone children */ +typedef struct lx_waitid_args { + int waitid_flags; +} lx_waitid_args_t; + + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_SYSCALL_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/common/clone.c b/usr/src/lib/brand/lx/lx_brand/common/clone.c index 8752d0b02f..abd5fdea04 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/clone.c +++ b/usr/src/lib/brand/lx/lx_brand/common/clone.c @@ -48,6 +48,7 @@ #include <sys/lx_brand.h> #include <sys/lx_debug.h> #include <sys/lx_thread.h> +#include <sys/fork.h> #define LX_CSIGNAL 0x000000ff #define LX_CLONE_VM 0x00000100 @@ -67,7 +68,8 @@ #define LX_CLONE_CHILD_SETTID 0x01000000 #define SHARED_AS \ - (LX_CLONE_VM | LX_CLONE_FS | LX_CLONE_FILES | LX_CLONE_SIGHAND) + (LX_CLONE_VM | LX_CLONE_FS | LX_CLONE_FILES | LX_CLONE_SIGHAND \ + | LX_CLONE_THREAD) #define CLONE_VFORK (LX_CLONE_VM | LX_CLONE_VFORK) #define CLONE_TD (LX_CLONE_THREAD|LX_CLONE_DETACH) @@ -353,6 +355,7 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, int pid; lx_regs_t *rp; sigset_t sigmask; + int fork_flags = 0; if (flags & LX_CLONE_SETTLS) { lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p ldt=0x%p " @@ -413,13 +416,16 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, if (flags & LX_CLONE_PTRACE) lx_ptrace_fork(); + if ((flags & LX_CSIGNAL) == 0) + fork_flags |= FORK_NOSIGCHLD; + if (flags & LX_CLONE_VFORK) { is_vforked++; - rval = vfork(); + rval = vforkx(fork_flags); if (rval != 0) is_vforked--; } else { - rval = fork1(); + rval = forkx(fork_flags); if (rval == 0 && lx_is_rpm) (void) sleep(lx_rpm_delay); } @@ -449,12 +455,24 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, if (rval != 0) return ((rval < 0) ? -errno : rval); + + /* + * Set up additional data in the lx_proc_data structure as + * necessary. + */ + rval = syscall(SYS_brand, B_EMULATE_SYSCALL + LX_SYS_clone, + flags, cldstk, ptidp, ldtinfo, ctidp, NULL); + if (rval < 0) { + return (rval); + } + /* * If provided, the child needs its new stack set up. */ if (cldstk) lx_setup_clone(rp->lxr_gs, (void *)rp->lxr_eip, cldstk); + /* lx_setup_clone() doesn't return */ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACECLONE); return (0); } @@ -465,8 +483,8 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, */ if (((flags & SHARED_AS) != SHARED_AS)) { lx_unsupported(gettext( - "clone(2) requires that all or none of CLONE_VM " - "CLONE_FS, CLONE_FILES, and CLONE_SIGHAND be set.\n")); + "clone(2) requires that all or none of CLONE_VM, CLONE_FS," + "CLONE_FILES, CLONE_THREAD and CLONE_SIGHAND be set.\n")); return (-ENOTSUP); } diff --git a/usr/src/lib/brand/lx/lx_brand/common/wait.c b/usr/src/lib/brand/lx/lx_brand/common/wait.c index 7512838bf3..7485808a2b 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/wait.c +++ b/usr/src/lib/brand/lx/lx_brand/common/wait.c @@ -72,10 +72,12 @@ #include <sys/lx_signal.h> #include <sys/lx_misc.h> #include <sys/lx_syscall.h> +#include <sys/syscall.h> #include <sys/times.h> #include <strings.h> #include <unistd.h> #include <assert.h> +#include <lx_syscall.h> /* * Convert between Linux options and Solaris options, returning -1 if any @@ -99,13 +101,28 @@ static int ltos_options(uintptr_t options) { int newoptions = 0; + int rval; + lx_waitid_args_t extra; if (((options) & ~(LX_WNOHANG | LX_WUNTRACED | LX_WEXITED | LX_WCONTINUED | LX_WNOWAIT | LX_WNOTHREAD | LX_WALL | LX_WCLONE)) != 0) { return (-1); } - /* XXX implement LX_WNOTHREAD, LX_WALL, LX_WCLONE */ + /* + * We use the B_STORE_ARGS command to store any of LX_WNOTHREAD, + * LX_WALL, and LX_WCLONE that have been set as options on this waitid + * call. These flags are stored as part of the lwp_brand_data, so that + * when there is a later syscall to waitid, the brand code there can + * detect that we added extra flags here and use them as appropriate. + * We pass them in here rather than the normal channel for flags to + * prevent polluting the namespace. + */ + extra.waitid_flags = options & (LX_WNOTHREAD | LX_WALL | LX_WCLONE); + rval = syscall(SYS_brand, B_STORE_ARGS, &extra, + sizeof (lx_waitid_args_t), NULL, NULL, NULL, NULL); + if (rval < 0) + return (rval); if (options & LX_WNOHANG) newoptions |= WNOHANG; diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c index 1ddc0367c8..972f76431e 100644 --- a/usr/src/uts/common/brand/lx/os/lx_brand.c +++ b/usr/src/uts/common/brand/lx/os/lx_brand.c @@ -58,6 +58,7 @@ #include <sys/zone.h> #include <sys/brand.h> #include <sys/sdt.h> +#include <lx_signum.h> int lx_debug = 0; @@ -78,6 +79,8 @@ extern int lx_initlwp(klwp_t *); extern void lx_forklwp(klwp_t *, klwp_t *); extern void lx_exitlwp(klwp_t *); extern void lx_freelwp(klwp_t *); +extern void lx_exit_with_sig(proc_t *, sigqueue_t *, void *); +extern boolean_t lx_wait_filter(proc_t *, proc_t *); extern greg_t lx_fixsegreg(greg_t, model_t); extern int lx_sched_affinity(int, uintptr_t, int, uintptr_t, int64_t *); @@ -112,6 +115,8 @@ struct brand_ops lx_brops = { NULL, NULL, NSIG, + lx_exit_with_sig, + lx_wait_filter, }; struct brand_mach_ops lx_mops = { @@ -127,7 +132,8 @@ struct brand lx_brand = { BRAND_VER_1, "lx", &lx_brops, - &lx_mops + &lx_mops, + sizeof (struct lx_proc_data) }; static struct modlbrand modlbrand = { @@ -142,6 +148,7 @@ void lx_proc_exit(proc_t *p, klwp_t *lwp) { zone_t *z = p->p_zone; + int sig = ptolxproc(p)->l_signal; ASSERT(p->p_brand != NULL); ASSERT(p->p_brand_data != NULL); @@ -167,8 +174,7 @@ lx_proc_exit(proc_t *p, klwp_t *lwp) */ if (lwp != NULL) lx_exitlwp(lwp); - kmem_free(p->p_brand_data, sizeof (struct lx_proc_data)); - p->p_brand_data = NULL; + p->p_exit_data = sig; } void @@ -181,6 +187,7 @@ lx_setbrand(proc_t *p) ASSERT(ttolxlwp(curthread) == NULL); p->p_brand_data = kmem_zalloc(sizeof (struct lx_proc_data), KM_SLEEP); + ptolxproc(p)->l_signal = stol_signo[SIGCHLD]; /* * This routine can only be called for single-threaded processes. @@ -760,6 +767,47 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, return (0); + case B_STORE_ARGS: + /* + * B_STORE_ARGS subcommand + * arg1 = address of struct to be copied in + * arg2 = size of the struct being copied in + * arg3-arg6 ignored + * rval = the amount of data copied. + */ + { + int err; + lx_lwp_data_t *lwpd = ttolxlwp(curthread); + void *buf; + + /* only have upper limit because arg2 is unsigned */ + if (arg2 > LX_BR_ARGS_SIZE_MAX) { + return (EINVAL); + } + + buf = kmem_alloc(arg2, KM_SLEEP); + if ((err = copyin((void *)arg1, buf, arg2)) != 0) { + lx_print("Failed to copyin scall arg at 0x%p\n", + (void *) arg1); + kmem_free(buf, arg2); + /* + * Purposely not setting br_scall_args to NULL + * to preserve data for debugging. + */ + return (EFAULT); + } + + if (lwpd->br_scall_args != NULL) { + ASSERT(lwpd->br_args_size > 0); + kmem_free(lwpd->br_scall_args, + lwpd->br_args_size); + } + + lwpd->br_scall_args = buf; + lwpd->br_args_size = arg2; + *rval = arg2; + return (0); + } default: linux_call = cmd - B_EMULATE_SYSCALL; /* diff --git a/usr/src/uts/common/brand/lx/os/lx_misc.c b/usr/src/uts/common/brand/lx/os/lx_misc.c index 797ac399d0..2d565a4902 100644 --- a/usr/src/uts/common/brand/lx/os/lx_misc.c +++ b/usr/src/uts/common/brand/lx/os/lx_misc.c @@ -38,6 +38,12 @@ #include <sys/lx_brand.h> #include <sys/lx_pid.h> #include <sys/lx_futex.h> +#include <sys/cmn_err.h> +#include <sys/siginfo.h> +#include <sys/contract/process_impl.h> +#include <lx_signum.h> +#include <lx_syscall.h> +#include <sys/proc.h> /* Linux specific functions and definitions */ void lx_setrval(klwp_t *, int, int); @@ -184,6 +190,10 @@ lx_exitlwp(klwp_t *lwp) } free: + if (lwpd->br_scall_args != NULL) { + ASSERT(lwpd->br_args_size > 0); + kmem_free(lwpd->br_scall_args, lwpd->br_args_size); + } if (sqp) kmem_free(sqp, sizeof (sigqueue_t)); @@ -221,7 +231,8 @@ lx_initlwp(klwp_t *lwp) lwpd->br_set_ctidp = NULL; lwpd->br_signal = 0; /* - * lwpd->br_affinitymask was zeroed by kmem_zalloc(). + * lwpd->br_affinitymask was zeroed by kmem_zalloc() + * as was lwpd->br_scall_args and lwpd->br_args_size. */ /* @@ -280,7 +291,7 @@ lx_forklwp(klwp_t *srclwp, klwp_t *dstlwp) * copy only these flags */ dst->br_lwp_flags = src->br_lwp_flags & BR_CPU_BOUND; - dst->br_clone_args = NULL; + dst->br_scall_args = NULL; } /* @@ -372,3 +383,141 @@ lx_fixsegreg(greg_t sr, model_t datamodel) return (sr | SEL_TI_LDT | SEL_UPL); #endif /* __amd64 */ } + +/* + * These two functions simulate winfo and post_sigcld for the lx brand. The + * difference is delivering a designated signal as opposed to always SIGCLD. + */ +void +lx_winfo(proc_t *pp, k_siginfo_t *ip, struct lx_proc_data *dat) +{ + ASSERT(MUTEX_HELD(&pidlock)); + bzero(ip, sizeof (k_siginfo_t)); + ip->si_signo = ltos_signo[dat->l_signal]; + ip->si_code = pp->p_wcode; + ip->si_pid = pp->p_pid; + ip->si_ctid = PRCTID(pp); + ip->si_zoneid = pp->p_zone->zone_id; + ip->si_status = pp->p_wdata; + ip->si_stime = pp->p_stime; + ip->si_utime = pp->p_utime; +} + +void +lx_post_exit_sig(proc_t *cp, sigqueue_t *sqp, struct lx_proc_data *dat) +{ + proc_t *pp = cp->p_parent; + + ASSERT(MUTEX_HELD(&pidlock)); + mutex_enter(&pp->p_lock); + /* + * Since Linux doesn't queue SIGCHLD, or any other non RT + * signals, we just blindly deliver whatever signal we can. + */ + ASSERT(sqp != NULL); + lx_winfo(cp, &sqp->sq_info, dat); + sigaddqa(pp, NULL, sqp); + sqp = NULL; + mutex_exit(&pp->p_lock); +} + + +/* + * Brand specific code for exiting and sending a signal to the parent, as + * opposed to sigcld(). + */ +void +lx_exit_with_sig(proc_t *cp, sigqueue_t *sqp, void *brand_data) +{ + proc_t *pp = cp->p_parent; + struct lx_proc_data *lx_brand_data = brand_data; + ASSERT(MUTEX_HELD(&pidlock)); + + switch (cp->p_wcode) { + case CLD_EXITED: + case CLD_DUMPED: + case CLD_KILLED: + ASSERT(cp->p_stat == SZOMB); + /* + * The broadcast on p_srwchan_cv is a kludge to + * wakeup a possible thread in uadmin(A_SHUTDOWN). + */ + cv_broadcast(&cp->p_srwchan_cv); + + /* + * Add to newstate list of the parent + */ + add_ns(pp, cp); + + cv_broadcast(&pp->p_cv); + if ((pp->p_flag & SNOWAIT) || + PTOU(pp)->u_signal[SIGCLD - 1] == SIG_IGN) { + if (!(cp->p_pidflag & CLDWAITPID)) + freeproc(cp); + } else if (!(cp->p_pidflag & CLDNOSIGCHLD) && + lx_brand_data->l_signal != 0) { + lx_post_exit_sig(cp, sqp, lx_brand_data); + sqp = NULL; + } + break; + + case CLD_STOPPED: + case CLD_CONTINUED: + case CLD_TRAPPED: + panic("Should not be called in this case"); + } + + if (sqp) + siginfofree(sqp); +} + +/* + * Filters based on arguments that have been passed in by a separate syscall + * using the B_STORE_ARGS mechanism. if the __WALL flag is set, no filter is + * applied, otherwise we look at the difference between a clone and non-clone + * process. + * The definition of a clone process in Linux is a thread that does not deliver + * SIGCHLD to its parent. The option __WCLONE indicates to wait only on clone + * processes. Without that option, a process should only wait on normal + * children. The following table shows the cases. + * + * default __WCLONE + * no SIGCHLD - X + * SIGCHLD X - + * + * This is an XOR of __WCLONE being set, and SIGCHLD being the signal sent on + * process exit. Since (flags & __WCLONE) is not guaranteed to have the + * least-significant bit set when the flags is enabled, !! is used to place + * that bit into the least significant bit. Then, the bitwise XOR can be + * used, because there is no logical XOR in the C language. + * + * More information on wait in lx brands can be found at + * usr/src/lib/brand/lx/lx_brand/common/wait.c. + */ +boolean_t +lx_wait_filter(proc_t *pp, proc_t *cp) +{ + int flags; + boolean_t ret; + + if (LX_ARGS(waitid) != NULL) { + flags = LX_ARGS(waitid)->waitid_flags; + mutex_enter(&cp->p_lock); + if (flags & LX_WALL) { + ret = B_TRUE; + } else if (cp->p_stat == SZOMB || + cp->p_brand == &native_brand) { + ret = (((!!(flags & LX_WCLONE)) ^ + (stol_signo[SIGCHLD] == cp->p_exit_data)) + ? B_TRUE : B_FALSE); + } else { + ret = (((!!(flags & LX_WCLONE)) ^ + (stol_signo[SIGCHLD] == ptolxproc(cp)->l_signal)) + ? B_TRUE : B_FALSE); + } + mutex_exit(&cp->p_lock); + return (ret); + } else { + return (B_TRUE); + } +} diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h index dcbd37c21b..49d3d0a446 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_brand.h +++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h @@ -78,6 +78,7 @@ extern "C" { #define B_PTRACE_EXT_OPTS 134 #define B_PTRACE_STOP_FOR_OPT 135 #define B_UNSUPPORTED 136 +#define B_STORE_ARGS 137 #define B_EMULATE_SYSCALL 192 @@ -186,12 +187,13 @@ typedef struct lx_proc_data { uintptr_t l_handler; /* address of user-space handler */ uintptr_t l_tracehandler; /* address of user-space traced handler */ uintptr_t l_traceflag; /* address of 32-bit tracing flag */ - void (*l_sigrestorer[MAXSIG])(void); /* array of sigrestorer fns */ pid_t l_ppid; /* pid of originating parent proc */ uint64_t l_ptrace; /* process being observed with ptrace */ uint_t l_ptrace_opts; /* process's extended ptrace options */ uint_t l_ptrace_event; /* extended ptrace option trap event */ lx_elf_data_t l_elf_data; /* ELF data for linux executable */ + int l_signal; /* signal to deliver to parent when this */ + /* thread group dies */ } lx_proc_data_t; #endif /* _KERNEL */ @@ -231,14 +233,22 @@ typedef struct lx_lwp_data { void *br_set_ctidp; /* clone thread id ptr */ /* - * The following struct is used by lx_clone() - * to pass info into fork() + * The following struct is used by some system calls to pass extra + * flags into the kernel without impinging on the namespace for + * illumos. */ - void *br_clone_args; + void *br_scall_args; + int br_args_size; /* size in bytes of br_scall_args */ uint_t br_ptrace; /* ptrace is active for this LWP */ } lx_lwp_data_t; +/* + * Upper limit on br_args_size, low because this value can persist until + * overridden with another value, and the size is given from userland. + */ +#define LX_BR_ARGS_SIZE_MAX (1024) + /* brand specific data */ typedef struct lx_zone_data { char lxzd_kernel_version[LX_VERS_MAX]; @@ -250,6 +260,10 @@ typedef struct lx_zone_data { #define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t)) #define lwptolxlwp(l) ((struct lx_lwp_data *)lwptolwpbrand(l)) #define ttolxproc(t) ((struct lx_proc_data *)(t)->t_procp->p_brand_data) +#define ptolxproc(p) ((struct lx_proc_data *)(p)->p_brand_data) +/* Macro for converting to system call arguments. */ +#define LX_ARGS(scall) ((struct lx_##scall##_args *)\ + (ttolxlwp(curthread)->br_scall_args)) void lx_brand_int80_callback(void); int64_t lx_emulate_syscall(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t, diff --git a/usr/src/uts/common/brand/lx/syscall/lx_clone.c b/usr/src/uts/common/brand/lx/syscall/lx_clone.c index 3c5c7464ab..4006e1187f 100644 --- a/usr/src/uts/common/brand/lx/syscall/lx_clone.c +++ b/usr/src/uts/common/brand/lx/syscall/lx_clone.c @@ -30,6 +30,7 @@ #include <sys/brand.h> #include <sys/lx_brand.h> #include <sys/lx_ldt.h> +#include <lx_signum.h> #define LX_CSIGNAL 0x000000ff #define LX_CLONE_VM 0x00000100 @@ -57,6 +58,7 @@ long lx_clone(int flags, void *stkp, void *ptidp, void *ldtinfo, void *ctidp) { struct lx_lwp_data *lwpd = ttolxlwp(curthread); + struct lx_proc_data *lproc = ttolxproc(curthread); struct ldt_info info; struct user_desc descr; int tls_index; @@ -64,62 +66,65 @@ lx_clone(int flags, void *stkp, void *ptidp, void *ldtinfo, void *ctidp) int signo; signo = flags & LX_CSIGNAL; - if (signo < 0 || signo > MAXSIG) + if (signo < 0 || signo > LX_NSIG) return (set_errno(EINVAL)); - if (flags & LX_CLONE_SETTLS) { - if (copyin((caddr_t)ldtinfo, &info, sizeof (info))) - return (set_errno(EFAULT)); - - if (LDT_INFO_EMPTY(&info)) - return (set_errno(EINVAL)); - - entry = info.entry_number; - if (entry < GDT_TLSMIN || entry > GDT_TLSMAX) - return (set_errno(EINVAL)); - - tls_index = entry - GDT_TLSMIN; - - /* - * Convert the user-space structure into a real x86 - * descriptor and copy it into this LWP's TLS array. We - * also load it into the GDT. - */ - LDT_INFO_TO_DESC(&info, &descr); - bcopy(&descr, &lwpd->br_tls[tls_index], sizeof (descr)); - lx_set_gdt(entry, &lwpd->br_tls[tls_index]); + if (!(flags & LX_CLONE_THREAD)) { + lproc->l_signal = signo; } else { - tls_index = -1; - bzero(&descr, sizeof (descr)); - } - - lwpd->br_clear_ctidp = - (flags & LX_CLONE_CHILD_CLEARTID) ? ctidp : NULL; - - if (signo && ! (flags & LX_CLONE_DETACH)) - lwpd->br_signal = signo; - else - lwpd->br_signal = 0; - - if (flags & LX_CLONE_THREAD) - lwpd->br_tgid = curthread->t_procp->p_pid; - - if (flags & LX_CLONE_PARENT) - lwpd->br_ppid = 0; - - if ((flags & LX_CLONE_CHILD_SETTID) && (ctidp != NULL) && - (suword32(ctidp, lwpd->br_pid) != 0)) { - if (entry >= 0) - lx_clear_gdt(entry); - return (set_errno(EFAULT)); - } - if ((flags & LX_CLONE_PARENT_SETTID) && (ptidp != NULL) && - (suword32(ptidp, lwpd->br_pid) != 0)) { - if (entry >= 0) - lx_clear_gdt(entry); - return (set_errno(EFAULT)); + if (flags & LX_CLONE_SETTLS) { + if (copyin((caddr_t)ldtinfo, &info, sizeof (info))) + return (set_errno(EFAULT)); + + if (LDT_INFO_EMPTY(&info)) + return (set_errno(EINVAL)); + + entry = info.entry_number; + if (entry < GDT_TLSMIN || entry > GDT_TLSMAX) + return (set_errno(EINVAL)); + + tls_index = entry - GDT_TLSMIN; + + /* + * Convert the user-space structure into a real x86 + * descriptor and copy it into this LWP's TLS array. We + * also load it into the GDT. + */ + LDT_INFO_TO_DESC(&info, &descr); + bcopy(&descr, &lwpd->br_tls[tls_index], sizeof (descr)); + lx_set_gdt(entry, &lwpd->br_tls[tls_index]); + } else { + tls_index = -1; + bzero(&descr, sizeof (descr)); + } + + lwpd->br_clear_ctidp = + (flags & LX_CLONE_CHILD_CLEARTID) ? ctidp : NULL; + + if (signo && ! (flags & LX_CLONE_DETACH)) + lwpd->br_signal = signo; + else + lwpd->br_signal = 0; + + if (flags & LX_CLONE_THREAD) + lwpd->br_tgid = curthread->t_procp->p_pid; + + if (flags & LX_CLONE_PARENT) + lwpd->br_ppid = 0; + + if ((flags & LX_CLONE_CHILD_SETTID) && (ctidp != NULL) && + (suword32(ctidp, lwpd->br_pid) != 0)) { + if (entry >= 0) + lx_clear_gdt(entry); + return (set_errno(EFAULT)); + } + if ((flags & LX_CLONE_PARENT_SETTID) && (ptidp != NULL) && + (suword32(ptidp, lwpd->br_pid) != 0)) { + if (entry >= 0) + lx_clear_gdt(entry); + return (set_errno(EFAULT)); + } } - return (lwpd->br_pid); } diff --git a/usr/src/uts/common/brand/sn1/sn1_brand.c b/usr/src/uts/common/brand/sn1/sn1_brand.c index ab733a07cc..65a7760f63 100644 --- a/usr/src/uts/common/brand/sn1/sn1_brand.c +++ b/usr/src/uts/common/brand/sn1/sn1_brand.c @@ -79,6 +79,8 @@ struct brand_ops sn1_brops = { NULL, NULL, NSIG, + NULL, + NULL, }; #ifdef sparc @@ -119,7 +121,8 @@ struct brand sn1_brand = { BRAND_VER_1, "sn1", &sn1_brops, - &sn1_mops + &sn1_mops, + sizeof (brand_proc_data_t), }; static struct modlbrand modlbrand = { diff --git a/usr/src/uts/common/brand/sngl/sngl_brand.c b/usr/src/uts/common/brand/sngl/sngl_brand.c index 23e23286ea..b0d658a1d0 100644 --- a/usr/src/uts/common/brand/sngl/sngl_brand.c +++ b/usr/src/uts/common/brand/sngl/sngl_brand.c @@ -81,6 +81,8 @@ struct brand_ops sngl_brops = { NULL, NULL, NSIG, + NULL, + NULL, }; #ifdef __amd64 @@ -106,7 +108,8 @@ struct brand sngl_brand = { BRAND_VER_1, "sngl", &sngl_brops, - &sngl_mops + &sngl_mops, + sizeof (brand_proc_data_t), }; static struct modlbrand modlbrand = { @@ -147,7 +150,8 @@ sngl_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, int res; *rval = 0; - res = brand_solaris_cmd(cmd, arg1, arg2, arg3, &sngl_brand, SNGL_VERSION); + res = brand_solaris_cmd(cmd, arg1, arg2, arg3, &sngl_brand, + SNGL_VERSION); if (res >= 0) return (res); diff --git a/usr/src/uts/common/brand/solaris10/s10_brand.c b/usr/src/uts/common/brand/solaris10/s10_brand.c index 2e2309a33e..23ba010af7 100644 --- a/usr/src/uts/common/brand/solaris10/s10_brand.c +++ b/usr/src/uts/common/brand/solaris10/s10_brand.c @@ -84,6 +84,8 @@ struct brand_ops s10_brops = { s10_sigset_native_to_s10, s10_sigset_s10_to_native, S10_NSIG, + NULL, + NULL, }; #ifdef sparc @@ -124,7 +126,8 @@ struct brand s10_brand = { BRAND_VER_1, "solaris10", &s10_brops, - &s10_mops + &s10_mops, + sizeof (brand_proc_data_t), }; static struct modlbrand modlbrand = { diff --git a/usr/src/uts/common/os/brand.c b/usr/src/uts/common/os/brand.c index 2e7c604277..688c152eca 100644 --- a/usr/src/uts/common/os/brand.c +++ b/usr/src/uts/common/os/brand.c @@ -54,7 +54,8 @@ brand_t native_brand = { BRAND_VER_1, "native", NULL, - &native_mach_ops + &native_mach_ops, + 0, }; /* diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c index 7c5b8323e3..612e936540 100644 --- a/usr/src/uts/common/os/exit.c +++ b/usr/src/uts/common/os/exit.c @@ -21,7 +21,7 @@ /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, Joyent, Inc. All rights reserved. + * Copyright (c) 2011, 2014 Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -345,6 +345,8 @@ proc_exit(int why, int what) refstr_t *cwd; hrtime_t hrutime, hrstime; int evaporate; + brand_t *orig_brand = NULL; + void *brand_data = NULL; /* * Stop and discard the process's lwps except for the current one, @@ -374,10 +376,16 @@ proc_exit(int why, int what) * is always the last lwp, will also perform lwp_exit and free brand * data */ + mutex_enter(&p->p_lock); if (PROC_IS_BRANDED(p)) { + orig_brand = p->p_brand; + if (p->p_brand_data != NULL && orig_brand->b_data_size > 0) { + brand_data = p->p_brand_data; + } lwp_detach_brand_hdlrs(lwp); brand_clearbrand(p, B_FALSE); } + mutex_exit(&p->p_lock); /* * Don't let init exit unless zone_start_init() failed its exec, or @@ -839,8 +847,60 @@ proc_exit(int why, int what) mutex_exit(&p->p_lock); if (!evaporate) { - p->p_pidflag &= ~CLDPEND; - sigcld(p, sqp); + /* + * The brand specific code only happens when the brand has a + * function to call in place of sigcld, the data itself still + * existed, and the parent of the exiting process is not the + * global zone init. If the parent is the global zone init, + * then the process was reparented, and we don't want brand + * code delivering possibly strange signals to init. Also, init + * is not branded, so any brand specific exit data will not be + * picked up by init anyway. + * It is assumed by this code that any brand where + * b_exit_with_sig == NULL, will free its own brand_data rather + * than letting this piece of code free it. + */ + if (orig_brand != NULL && + orig_brand->b_ops->b_exit_with_sig != NULL && + brand_data != NULL && p->p_ppid != 1) { + /* + * The code for _fini that could unload the brand_t + * blocks until the count of zones using the module + * reaches zero. Zones decrement the refcount on their + * brands only after all user tasks in that zone have + * exited and been waited on. The decrement on the + * brand's refcount happen in zone_destroy(). That + * depends on zone_shutdown() having been completed. + * zone_shutdown() includes a call to zone_empty(), + * where the zone waits for itself to reach the state + * ZONE_IS_EMPTY. This state is only set in either + * zone_shutdown(), when there are no user processes as + * the zone enters this function, or in + * zone_task_rele(). zone_task_rele() is called from + * code triggered by waiting on processes, not by the + * processes exiting through proc_exit(). This means + * all the branded processes that could exist for a + * specific brand_t must exit and get reaped before the + * refcount on the brand_t can reach 0. _fini will + * never unload the corresponding brand module before + * proc_exit finishes execution for all processes + * branded with a particular brand_t, which makes the + * operation below safe to do. Brands that wish to use + * this mechanism must wait in _fini as described + * above. + */ + orig_brand->b_ops->b_exit_with_sig(p, + sqp, brand_data); + } else { + p->p_pidflag &= ~CLDPEND; + sigcld(p, sqp); + } + if (brand_data != NULL) { + kmem_free(brand_data, orig_brand->b_data_size); + brand_data = NULL; + orig_brand = NULL; + } + } else { /* * Do what sigcld() would do if the disposition @@ -950,7 +1010,8 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) pp = ttoproc(curthread); /* - * lock parent mutex so that sibling chain can be searched. + * Anytime you are looking for a process, you take pidlock to prevent + * things from changing as you look. */ mutex_enter(&pidlock); @@ -981,6 +1042,11 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) continue; if (idtype == P_PGID && id != cp->p_pgrp) continue; + if (PROC_IS_BRANDED(pp)) { + if (BROP(pp)->b_wait_filter != NULL && + BROP(pp)->b_wait_filter(pp, cp) == B_FALSE) + continue; + } switch (cp->p_wcode) { @@ -1031,6 +1097,11 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options) continue; if (idtype == P_PGID && id != cp->p_pgrp) continue; + if (PROC_IS_BRANDED(pp)) { + if (BROP(pp)->b_wait_filter != NULL && + BROP(pp)->b_wait_filter(pp, cp) == B_FALSE) + continue; + } switch (cp->p_wcode) { case CLD_TRAPPED: diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h index badc3faff8..dfbbd870db 100644 --- a/usr/src/uts/common/sys/brand.h +++ b/usr/src/uts/common/sys/brand.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2014 Joyent, Inc. All rights reserved. */ #ifndef _SYS_BRAND_H @@ -125,6 +126,8 @@ struct brand_ops { void (*b_sigset_native_to_brand)(sigset_t *); void (*b_sigset_brand_to_native)(sigset_t *); int b_nsig; + void (*b_exit_with_sig)(proc_t *, sigqueue_t *, void *); + boolean_t (*b_wait_filter)(proc_t *, proc_t *); }; /* @@ -135,6 +138,7 @@ typedef struct brand { char *b_name; struct brand_ops *b_ops; struct brand_mach_ops *b_machops; + size_t b_data_size; } brand_t; extern brand_t native_brand; diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h index f1a2fc5485..ee5892066b 100644 --- a/usr/src/uts/common/sys/proc.h +++ b/usr/src/uts/common/sys/proc.h @@ -21,6 +21,7 @@ /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014 Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -346,7 +347,12 @@ typedef struct proc { struct zone *p_zone; /* zone in which process lives */ struct vnode *p_execdir; /* directory that p_exec came from */ struct brand *p_brand; /* process's brand */ - void *p_brand_data; /* per-process brand state */ + + /* per-process brand state */ + union { + void *__brand_data; + int __exit_data; + } __p_brand_data; /* additional lock to protect p_sessp (but not its contents) */ kmutex_t p_splock; @@ -361,7 +367,8 @@ typedef struct proc { */ struct user p_user; /* (see sys/user.h) */ } proc_t; - +#define p_brand_data __p_brand_data.__brand_data +#define p_exit_data __p_brand_data.__exit_data #define PROC_T /* headers relying on proc_t are OK */ #ifdef _KERNEL diff --git a/usr/src/uts/i86pc/ml/offsets.in b/usr/src/uts/i86pc/ml/offsets.in index 50a27b3d30..9922891e56 100644 --- a/usr/src/uts/i86pc/ml/offsets.in +++ b/usr/src/uts/i86pc/ml/offsets.in @@ -80,7 +80,7 @@ proc PROCSIZE p_agenttp p_zone p_brand - p_brand_data + __p_brand_data _kthread THREAD_SIZE t_pcb T_LABEL diff --git a/usr/src/uts/intel/brand/common/brand_asm.h b/usr/src/uts/intel/brand/common/brand_asm.h index 1d540db2a9..c820d8e187 100644 --- a/usr/src/uts/intel/brand/common/brand_asm.h +++ b/usr/src/uts/intel/brand/common/brand_asm.h @@ -161,7 +161,7 @@ extern "C" { #define GET_P_BRAND_DATA(sp, pcnt, reg) \ GET_PROCP(sp, pcnt, reg); \ - mov P_BRAND_DATA(reg), reg /* get p_brand_data */ + mov __P_BRAND_DATA(reg), reg /* get p_brand_data */ /* * Each of the following macros returns to the standard syscall codepath if diff --git a/usr/src/uts/sun4/brand/common/brand_solaris.s b/usr/src/uts/sun4/brand/common/brand_solaris.s index 889218bc5f..9097273036 100644 --- a/usr/src/uts/sun4/brand/common/brand_solaris.s +++ b/usr/src/uts/sun4/brand/common/brand_solaris.s @@ -236,7 +236,7 @@ _emulation_check: #endif /* sun4v */ ldn [%g2 + CPU_THREAD], %g3; /* get thread ptr */ ldn [%g3 + T_PROCP], %g4; /* get proc ptr */ - ldn [%g4 + P_BRAND_DATA], %g5; /* get brand data ptr */ + ldn [%g4 + __P_BRAND_DATA], %g5; /* get brand data ptr */ ldn [%g5 + SPD_HANDLER], %g5; /* get userland brnd hdlr ptr */ brz %g5, _exit; /* has it been set? */ nop; diff --git a/usr/src/uts/sun4/ml/offsets.in b/usr/src/uts/sun4/ml/offsets.in index 4f6d19ba01..de214274ee 100644 --- a/usr/src/uts/sun4/ml/offsets.in +++ b/usr/src/uts/sun4/ml/offsets.in @@ -109,7 +109,7 @@ proc PROCSIZE p_utraps p_agenttp p_brand - p_brand_data + __p_brand_data \#define P_UTRAP4 (UT_ILLTRAP_INSTRUCTION * CPTRSIZE) \#define P_UTRAP7 (UT_FP_DISABLED * CPTRSIZE) |