diff options
Diffstat (limited to 'usr/src/uts/common/os')
-rw-r--r-- | usr/src/uts/common/os/brand.c | 323 | ||||
-rw-r--r-- | usr/src/uts/common/os/ddi.c | 20 | ||||
-rw-r--r-- | usr/src/uts/common/os/exec.c | 116 | ||||
-rw-r--r-- | usr/src/uts/common/os/exit.c | 36 | ||||
-rw-r--r-- | usr/src/uts/common/os/fork.c | 16 | ||||
-rw-r--r-- | usr/src/uts/common/os/lwp.c | 43 | ||||
-rw-r--r-- | usr/src/uts/common/os/main.c | 13 | ||||
-rw-r--r-- | usr/src/uts/common/os/modconf.c | 34 | ||||
-rw-r--r-- | usr/src/uts/common/os/pid.c | 49 | ||||
-rw-r--r-- | usr/src/uts/common/os/printf.c | 31 | ||||
-rw-r--r-- | usr/src/uts/common/os/procset.c | 21 | ||||
-rw-r--r-- | usr/src/uts/common/os/session.c | 651 | ||||
-rw-r--r-- | usr/src/uts/common/os/streamio.c | 231 | ||||
-rw-r--r-- | usr/src/uts/common/os/strsubr.c | 74 | ||||
-rw-r--r-- | usr/src/uts/common/os/sysent.c | 15 | ||||
-rw-r--r-- | usr/src/uts/common/os/zone.c | 80 |
16 files changed, 1434 insertions, 319 deletions
diff --git a/usr/src/uts/common/os/brand.c b/usr/src/uts/common/os/brand.c new file mode 100644 index 0000000000..15d82871bf --- /dev/null +++ b/usr/src/uts/common/os/brand.c @@ -0,0 +1,323 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/kmem.h> +#include <sys/errno.h> +#include <sys/systm.h> +#include <sys/cmn_err.h> +#include <sys/brand.h> +#include <sys/machbrand.h> +#include <sys/modctl.h> +#include <sys/rwlock.h> +#include <sys/zone.h> + +#define SUPPORTED_BRAND_VERSION BRAND_VER_1 + +#if defined(__sparcv9) +struct brand_mach_ops native_mach_ops = { + NULL, NULL +}; +#else +struct brand_mach_ops native_mach_ops = { + NULL, NULL, NULL, NULL, NULL, NULL +}; +#endif + +brand_t native_brand = { + BRAND_VER_1, + "native", + NULL, + &native_mach_ops +}; + +/* + * Used to maintain a list of all the brands currently loaded into the + * kernel. + */ +struct brand_list { + int bl_refcnt; + struct brand_list *bl_next; + brand_t *bl_brand; +}; + +static struct brand_list *brand_list = NULL; + +/* + * This lock protects the integrity of the brand list. + */ +static kmutex_t brand_list_lock; + +void +brand_init() +{ + mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL); + p0.p_brand = &native_brand; +} + +int +brand_register(brand_t *brand) +{ + struct brand_list *list, *scan; + + if (brand == NULL) + return (EINVAL); + + if (is_system_labeled()) { + cmn_err(CE_WARN, + "Branded zones are not allowed on labeled systems."); + return (EINVAL); + } + + if (brand->b_version != SUPPORTED_BRAND_VERSION) { + if (brand->b_version < SUPPORTED_BRAND_VERSION) { + cmn_err(CE_WARN, + "brand '%s' was built to run on older versions " + "of Solaris.", + brand->b_name); + } else { + cmn_err(CE_WARN, + "brand '%s' was built to run on a newer version " + "of Solaris.", + brand->b_name); + } + return (EINVAL); + } + + /* Sanity checks */ + if (brand->b_name == NULL || brand->b_ops == NULL || + brand->b_ops->b_brandsys == NULL) { + cmn_err(CE_WARN, "Malformed brand"); + return (EINVAL); + } + + list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP); + + /* Add the brand to the list of loaded brands. */ + mutex_enter(&brand_list_lock); + + /* + * Check to be sure we haven't already registered this brand. + */ + for (scan = brand_list; scan != NULL; scan = scan->bl_next) { + if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) { + cmn_err(CE_WARN, + "Invalid attempt to load a second instance of " + "brand %s", brand->b_name); + mutex_exit(&brand_list_lock); + kmem_free(list, sizeof (struct brand_list)); + return (EINVAL); + } + } + + list->bl_brand = brand; + list->bl_refcnt = 0; + list->bl_next = brand_list; + brand_list = list; + mutex_exit(&brand_list_lock); + + return (0); +} + +/* + * The kernel module implementing this brand is being unloaded, so remove + * it from the list of active brands. + */ +int +brand_unregister(brand_t *brand) +{ + struct brand_list *list, *prev; + + /* Sanity checks */ + if (brand == NULL || brand->b_name == NULL) { + cmn_err(CE_WARN, "Malformed brand"); + return (EINVAL); + } + + prev = NULL; + mutex_enter(&brand_list_lock); + + for (list = brand_list; list != NULL; list = list->bl_next) { + if (list->bl_brand == brand) + break; + prev = list; + } + + if (list == NULL) { + cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name); + mutex_exit(&brand_list_lock); + return (EINVAL); + } + + if (list->bl_refcnt > 0) { + cmn_err(CE_WARN, "Unregistering brand %s which is still in use", + brand->b_name); + mutex_exit(&brand_list_lock); + return (EBUSY); + } + + /* Remove brand from the list */ + if (prev != NULL) + prev->bl_next = list->bl_next; + else + brand_list = list->bl_next; + + mutex_exit(&brand_list_lock); + + kmem_free(list, sizeof (struct brand_list)); + + return (0); +} + +/* + * Record that a zone of this brand has been instantiated. If the kernel + * module implementing this brand's functionality is not present, this + * routine attempts to load the module as a side effect. + */ +brand_t * +brand_register_zone(struct brand_attr *attr) +{ + struct brand_list *l = NULL; + ddi_modhandle_t hdl = NULL; + char *modname; + int err = 0; + + if (is_system_labeled()) { + cmn_err(CE_WARN, + "Branded zones are not allowed on labeled systems."); + return (NULL); + } + + /* + * We make at most two passes through this loop. The first time + * through, we're looking to see if this is a new user of an + * already loaded brand. If the brand hasn't been loaded, we + * call ddi_modopen() to force it to be loaded and then make a + * second pass through the list of brands. If we don't find the + * brand the second time through it means that the modname + * specified in the brand_attr structure doesn't provide the brand + * specified in the brandname field. This would suggest a bug in + * the brand's config.xml file. We close the module and return + * 'NULL' to the caller. + */ + for (;;) { + /* + * Search list of loaded brands + */ + mutex_enter(&brand_list_lock); + for (l = brand_list; l != NULL; l = l->bl_next) + if (strcmp(attr->ba_brandname, + l->bl_brand->b_name) == 0) + break; + if ((l != NULL) || (hdl != NULL)) + break; + mutex_exit(&brand_list_lock); + + /* + * We didn't find that the requested brand has been loaded + * yet, so we trigger the load of the appropriate kernel + * module and search the list again. + */ + modname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) strcpy(modname, "brand/"); + (void) strcat(modname, attr->ba_modname); + hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err); + kmem_free(modname, MAXPATHLEN); + + if (err != 0) + return (NULL); + } + + /* + * If we found the matching brand, bump its reference count. + */ + if (l != NULL) + l->bl_refcnt++; + + mutex_exit(&brand_list_lock); + + if (hdl != NULL) + (void) ddi_modclose(hdl); + + return ((l != NULL) ? l->bl_brand : NULL); +} + +/* + * Return the number of zones currently using this brand. + */ +int +brand_zone_count(struct brand *bp) +{ + struct brand_list *l; + int cnt = 0; + + mutex_enter(&brand_list_lock); + for (l = brand_list; l != NULL; l = l->bl_next) + if (l->bl_brand == bp) { + cnt = l->bl_refcnt; + break; + } + mutex_exit(&brand_list_lock); + + return (cnt); +} + +void +brand_unregister_zone(struct brand *bp) +{ + struct brand_list *list; + + mutex_enter(&brand_list_lock); + for (list = brand_list; list != NULL; list = list->bl_next) { + if (list->bl_brand == bp) { + ASSERT(list->bl_refcnt > 0); + list->bl_refcnt--; + break; + } + } + mutex_exit(&brand_list_lock); +} + +void +brand_setbrand(proc_t *p) +{ + brand_t *bp = p->p_zone->zone_brand; + + ASSERT(bp != NULL); + ASSERT(p->p_brand == &native_brand); + + /* + * We should only be called from exec(), when we know the process + * is single-threaded. + */ + ASSERT(p->p_tlist == p->p_tlist->t_forw); + + p->p_brand = bp; + if (PROC_IS_BRANDED(p)) { + BROP(p)->b_setbrand(p); + lwp_attach_brand_hdlrs(p->p_tlist->t_lwp); + } +} diff --git a/usr/src/uts/common/os/ddi.c b/usr/src/uts/common/os/ddi.c index ec12f51f37..6a0b6ace80 100644 --- a/usr/src/uts/common/os/ddi.c +++ b/usr/src/uts/common/os/ddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -24,7 +23,7 @@ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -293,14 +292,15 @@ WR(queue_t *q) int drv_getparm(unsigned int parm, void *valuep) { - time_t now; + proc_t *p = curproc; + time_t now; switch (parm) { case UPROCP: - *(proc_t **)valuep = ttoproc(curthread); + *(proc_t **)valuep = p; break; case PPGRP: - *(pid_t *)valuep = ttoproc(curthread)->p_pgrp; + *(pid_t *)valuep = p->p_pgrp; break; case LBOLT: *(clock_t *)valuep = lbolt; @@ -317,10 +317,12 @@ drv_getparm(unsigned int parm, void *valuep) } break; case PPID: - *(pid_t *)valuep = ttoproc(curthread)->p_pid; + *(pid_t *)valuep = p->p_pid; break; case PSID: - *(pid_t *)valuep = ttoproc(curthread)->p_sessp->s_sid; + mutex_enter(&p->p_splock); + *(pid_t *)valuep = p->p_sessp->s_sid; + mutex_exit(&p->p_splock); break; case UCRED: *(cred_t **)valuep = CRED(); diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c index a3cd19e423..3b01993465 100644 --- a/usr/src/uts/common/os/exec.c +++ b/usr/src/uts/common/os/exec.c @@ -65,6 +65,7 @@ #include <sys/lwpchan_impl.h> #include <sys/pool.h> #include <sys/sdt.h> +#include <sys/brand.h> #include <c2/audit.h> @@ -89,7 +90,6 @@ uint_t auxv_hwcap32 = 0; /* 32-bit version of auxv_hwcap */ #endif int exec_lpg_disable = 0; - #define PSUIDFLAGS (SNOCD|SUGID) /* @@ -109,12 +109,13 @@ exece(const char *fname, const char **argp, const char **envp) { int error; - error = exec_common(fname, argp, envp); + error = exec_common(fname, argp, envp, EBA_NONE); return (error ? (set_errno(error)) : 0); } int -exec_common(const char *fname, const char **argp, const char **envp) +exec_common(const char *fname, const char **argp, const char **envp, + int brand_action) { vnode_t *vp = NULL, *dir = NULL, *tmpvp = NULL; proc_t *p = ttoproc(curthread); @@ -136,6 +137,7 @@ exec_common(const char *fname, const char **argp, const char **envp) lwpdir_t **old_tidhash; uint_t old_tidhash_sz; lwpent_t *lep; + int brandme = 0; /* * exec() is not supported for the /proc agent lwp. @@ -146,6 +148,35 @@ exec_common(const char *fname, const char **argp, const char **envp) if ((error = secpolicy_basic_exec(CRED())) != 0) return (error); + if (brand_action != EBA_NONE) { + /* + * Brand actions are not supported for processes that are not + * running in a branded zone. + */ + if (!ZONE_IS_BRANDED(p->p_zone)) + return (ENOTSUP); + + if (brand_action == EBA_NATIVE) { + /* Only branded processes can be unbranded */ + if (!PROC_IS_BRANDED(p)) + return (ENOTSUP); + } else { + /* Only unbranded processes can be branded */ + if (PROC_IS_BRANDED(p)) + return (ENOTSUP); + brandme = 1; + } + } else { + /* + * If this is a native zone, or if the process is already + * branded, then we don't need to do anything. If this is + * a native process in a branded zone, we need to brand the + * process as it exec()s the new binary. + */ + if (ZONE_IS_BRANDED(p->p_zone) && !PROC_IS_BRANDED(p)) + brandme = 1; + } + /* * Inform /proc that an exec() has started. * Hold signals that are ignored by default so that we will @@ -237,8 +268,14 @@ exec_common(const char *fname, const char **argp, const char **envp) ua.argp = argp; ua.envp = envp; + /* If necessary, brand this process before we start the exec. */ + if (brandme != 0) + brand_setbrand(p); + if ((error = gexec(&vp, &ua, &args, NULL, 0, &execsz, - exec_file, p->p_cred)) != 0) { + exec_file, p->p_cred, brand_action)) != 0) { + if (brandme != 0) + BROP(p)->b_proc_exit(p, lwp); VN_RELE(vp); if (dir != NULL) VN_RELE(dir); @@ -351,6 +388,12 @@ exec_common(const char *fname, const char **argp, const char **envp) */ close_exec(P_FINFO(p)); TRACE_2(TR_FAC_PROC, TR_PROC_EXEC, "proc_exec:p %p up %p", p, up); + + /* Unbrand ourself if requested. */ + if (brand_action == EBA_NATIVE) + BROP(p)->b_proc_exit(p, lwp); + ASSERT((brand_action != EBA_NATIVE) || !PROC_IS_BRANDED(p)); + setregs(&args); /* Mark this as an executable vnode */ @@ -376,6 +419,9 @@ exec_common(const char *fname, const char **argp, const char **envp) lep = kmem_zalloc(sizeof (*lep), KM_SLEEP); } + if (PROC_IS_BRANDED(p)) + BROP(p)->b_exec(); + mutex_enter(&p->p_lock); prbarrier(p); @@ -411,6 +457,7 @@ exec_common(const char *fname, const char **argp, const char **envp) lep->le_start = curthread->t_start; lwp_hash_in(p, lep); } + /* * Restore the saved signal mask and * inform /proc that the exec() has finished. @@ -422,6 +469,7 @@ exec_common(const char *fname, const char **argp, const char **envp) kmem_free(old_lwpdir, old_lwpdir_sz * sizeof (lwpdir_t)); kmem_free(old_tidhash, old_tidhash_sz * sizeof (lwpdir_t *)); } + ASSERT(error == 0); DTRACE_PROC(exec__success); return (0); @@ -451,7 +499,8 @@ gexec( int level, long *execsz, caddr_t exec_file, - struct cred *cred) + struct cred *cred, + int brand_action) { struct vnode *vp; proc_t *pp = ttoproc(curthread); @@ -593,7 +642,7 @@ gexec( setidfl |= EXECSETID_PRIVS; error = (*eswp->exec_func)(vp, uap, args, idatap, level, execsz, - setidfl, exec_file, cred); + setidfl, exec_file, cred, brand_action); rw_exit(eswp->exec_lock); if (error != 0) { if (newcred != NULL) @@ -1016,17 +1065,44 @@ execmap(struct vnode *vp, caddr_t addr, size_t len, size_t zfodlen, } if (zfodlen) { + struct as *as = curproc->p_as; + struct seg *seg; + uint_t zprot = 0; + end = (size_t)addr + len; zfodbase = (caddr_t)roundup(end, PAGESIZE); zfoddiff = (uintptr_t)zfodbase - end; if (zfoddiff) { + /* + * Before we go to zero the remaining space on the last + * page, make sure we have write permission. + */ + + AS_LOCK_ENTER(as, &as->a_lock, RW_READER); + seg = as_segat(curproc->p_as, (caddr_t)end); + if (seg != NULL) + SEGOP_GETPROT(seg, (caddr_t)end, zfoddiff - 1, + &zprot); + AS_LOCK_EXIT(as, &as->a_lock); + + if (seg != NULL && (zprot & PROT_WRITE) == 0) { + (void) as_setprot(as, (caddr_t)end, + zfoddiff - 1, zprot | PROT_WRITE); + } + if (on_fault(&ljb)) { no_fault(); + if (seg != NULL && (zprot & PROT_WRITE) == 0) + (void) as_setprot(as, (caddr_t)end, + zfoddiff - 1, zprot); error = EFAULT; goto bad; } uzero((void *)end, zfoddiff); no_fault(); + if (seg != NULL && (zprot & PROT_WRITE) == 0) + (void) as_setprot(as, (caddr_t)end, + zfoddiff - 1, zprot); } if (zfodlen > zfoddiff) { struct segvn_crargs crargs = @@ -1326,13 +1402,22 @@ stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp) args->ne = args->na - argc; /* - * Add AT_SUN_PLATFORM and AT_SUN_EXECNAME strings to the stack. + * Add AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME, and + * AT_SUN_EMULATOR strings to the stack. */ if (auxvpp != NULL && *auxvpp != NULL) { if ((error = stk_add(args, platform, UIO_SYSSPACE)) != 0) return (error); if ((error = stk_add(args, args->pathname, UIO_SYSSPACE)) != 0) return (error); + if (args->brandname != NULL && + (error = stk_add(args, args->brandname, + UIO_SYSSPACE)) != 0) + return (error); + if (args->emulator != NULL && + (error = stk_add(args, args->emulator, + UIO_SYSSPACE)) != 0) + return (error); } /* @@ -1438,19 +1523,32 @@ stk_copyout(uarg_t *args, char *usrstack, void **auxvpp, user_t *up) /* * Fill in the aux vector now that we know the user stack addresses - * for the AT_SUN_PLATFORM and AT_SUN_EXECNAME strings. + * for the AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME and + * AT_SUN_EMULATOR strings. */ if (auxvpp != NULL && *auxvpp != NULL) { if (args->to_model == DATAMODEL_NATIVE) { auxv_t **a = (auxv_t **)auxvpp; ADDAUX(*a, AT_SUN_PLATFORM, (long)&ustrp[*--offp]) ADDAUX(*a, AT_SUN_EXECNAME, (long)&ustrp[*--offp]) + if (args->brandname != NULL) + ADDAUX(*a, + AT_SUN_BRANDNAME, (long)&ustrp[*--offp]) + if (args->emulator != NULL) + ADDAUX(*a, + AT_SUN_EMULATOR, (long)&ustrp[*--offp]) } else { auxv32_t **a = (auxv32_t **)auxvpp; ADDAUX(*a, AT_SUN_PLATFORM, (int)(uintptr_t)&ustrp[*--offp]) ADDAUX(*a, - AT_SUN_EXECNAME, (int)(uintptr_t)&ustrp[*--offp]); + AT_SUN_EXECNAME, (int)(uintptr_t)&ustrp[*--offp]) + if (args->brandname != NULL) + ADDAUX(*a, AT_SUN_BRANDNAME, + (int)(uintptr_t)&ustrp[*--offp]) + if (args->emulator != NULL) + ADDAUX(*a, AT_SUN_EMULATOR, + (int)(uintptr_t)&ustrp[*--offp]) } } diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c index 70061a7d3e..3063e5717f 100644 --- a/usr/src/uts/common/os/exit.c +++ b/usr/src/uts/common/os/exit.c @@ -73,6 +73,7 @@ #include <sys/pool.h> #include <sys/sdt.h> #include <sys/corectl.h> +#include <sys/brand.h> /* * convert code/data pair into old style wait status @@ -158,7 +159,6 @@ restart_init(int what, int why) user_t *up = PTOU(p); vnode_t *oldcd, *oldrd; - sess_t *sp; int i, err; char reason_buf[64]; @@ -257,17 +257,9 @@ restart_init(int what, int why) if (oldcd != NULL) VN_RELE(oldcd); - /* - * Free the controlling tty. - */ - mutex_enter(&pidlock); - sp = p->p_sessp; - if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) { - mutex_exit(&pidlock); - freectty(sp); - } else { - mutex_exit(&pidlock); - } + /* Free the controlling tty. (freectty() always assumes curproc.) */ + ASSERT(p == curproc); + (void) freectty(B_TRUE); /* * Now exec() the new init(1M) on top of the current process. If we @@ -343,7 +335,6 @@ proc_exit(int why, int what) timeout_id_t tmp_id; int rv; proc_t *q; - sess_t *sp; task_t *tk; vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; sigqueue_t *sqp; @@ -367,6 +358,14 @@ proc_exit(int why, int what) DTRACE_PROC1(exit, int, why); /* + * Will perform any brand specific proc exit processing, since this + * is always the last lwp, will also perform lwp_exit and free brand + * data + */ + if (PROC_IS_BRANDED(p)) + BROP(p)->b_proc_exit(p, lwp); + + /* * Don't let init exit unless zone_start_init() failed its exec, or * we are shutting down the zone or the machine. * @@ -377,6 +376,7 @@ proc_exit(int why, int what) if (z->zone_boot_err == 0 && zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && + z->zone_restart_init == B_TRUE && restart_init(what, why) == 0) return (0); /* @@ -523,13 +523,9 @@ proc_exit(int why, int what) closeall(P_FINFO(p)); - mutex_enter(&pidlock); - sp = p->p_sessp; - if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) { - mutex_exit(&pidlock); - freectty(sp); - } else - mutex_exit(&pidlock); + /* Free the controlling tty. (freectty() always assumes curproc.) */ + ASSERT(p == curproc); + (void) freectty(B_TRUE); #if defined(__sparc) if (p->p_utraps != NULL) diff --git a/usr/src/uts/common/os/fork.c b/usr/src/uts/common/os/fork.c index c7c400246d..fbda5b8c4a 100644 --- a/usr/src/uts/common/os/fork.c +++ b/usr/src/uts/common/os/fork.c @@ -80,6 +80,7 @@ #include <sys/sdt.h> #include <sys/class.h> #include <sys/corectl.h> +#include <sys/brand.h> static int64_t cfork(int, int); static int getproc(proc_t **, int); @@ -461,8 +462,10 @@ cfork(int isvfork, int isfork1) mutex_exit(&p->p_lock); } - /* set return values for child */ - lwp_setrval(clone, p->p_pid, 1); + if (PROC_IS_BRANDED(p)) + BROP(p)->b_lwp_setrval(clone, p->p_pid, 1); + else + lwp_setrval(clone, p->p_pid, 1); /* set return values for parent */ r.r_val1 = (int)cp->p_pid; @@ -873,6 +876,7 @@ getproc(proc_t **cpp, int kernel) /* * Make proc entry for child process */ + mutex_init(&cp->p_splock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&cp->p_crlock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&cp->p_pflock, NULL, MUTEX_DEFAULT, NULL); #if defined(__x86) @@ -882,7 +886,7 @@ getproc(proc_t **cpp, int kernel) cp->p_stat = SIDL; cp->p_mstart = gethrtime(); - if ((newpid = pid_assign(cp)) == -1) { + if ((newpid = pid_allocate(cp, PID_ALLOC_PROC)) == -1) { if (nproc == v.v_proc) { CPU_STATS_ADDQ(CPU, sys, procovf, 1); cmn_err(CE_WARN, "out of processes"); @@ -926,10 +930,13 @@ getproc(proc_t **cpp, int kernel) cp->p_siginfo = pp->p_siginfo; cp->p_flag = pp->p_flag & (SJCTL|SNOWAIT|SNOCD); cp->p_sessp = pp->p_sessp; - SESS_HOLD(pp->p_sessp); + sess_hold(pp); cp->p_exec = pp->p_exec; cp->p_execdir = pp->p_execdir; cp->p_zone = pp->p_zone; + cp->p_brand = pp->p_brand; + if (PROC_IS_BRANDED(pp)) + BROP(pp)->b_copy_procdata(cp, pp); cp->p_bssbase = pp->p_bssbase; cp->p_brkbase = pp->p_brkbase; @@ -1198,6 +1205,7 @@ try_again: if (p->p_segacct) shmexit(p); + /* * We grab p_lock for the benefit of /proc */ diff --git a/usr/src/uts/common/os/lwp.c b/usr/src/uts/common/os/lwp.c index dbccf77b9e..26a12c805e 100644 --- a/usr/src/uts/common/os/lwp.c +++ b/usr/src/uts/common/os/lwp.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -59,6 +58,7 @@ #include <sys/cpc_impl.h> #include <sys/sdt.h> #include <sys/cmn_err.h> +#include <sys/brand.h> void *segkp_lwp; /* cookie for pool of segkp resources */ @@ -87,6 +87,7 @@ lwp_create(void (*proc)(), caddr_t arg, size_t len, proc_t *p, uint_t old_hashsz = 0; int i; int rctlfail = 0; + boolean_t branded = 0; mutex_enter(&p->p_lock); mutex_enter(&p->p_zone->zone_nlwps_lock); @@ -448,6 +449,19 @@ grow: break; } while (lwp_hash_lookup(p, t->t_tid) != NULL); } + + /* + * If this is a branded process, let the brand do any necessary lwp + * initialization. + */ + if (PROC_IS_BRANDED(p)) { + if (BROP(p)->b_initlwp(lwp)) { + err = 1; + goto error; + } + branded = 1; + } + p->p_lwpcnt++; t->t_waitfor = -1; @@ -540,6 +554,9 @@ error: if (cid != NOCLASS && bufp != NULL) CL_FREE(cid, bufp); + if (branded) + BROP(p)->b_freelwp(lwp); + mutex_exit(&p->p_lock); t->t_state = TS_FREE; thread_rele(t); @@ -673,6 +690,13 @@ lwp_exit(void) if (t->t_upimutex != NULL) upimutex_cleanup(); + /* + * Perform any brand specific exit processing, then release any + * brand data associated with the lwp + */ + if (PROC_IS_BRANDED(p)) + BROP(p)->b_lwpexit(lwp); + mutex_enter(&p->p_lock); lwp_cleanup(); @@ -1565,6 +1589,7 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid) proc_t *p = lwptoproc(lwp); int cid; void *bufp; + void *brand_data; int val; ASSERT(p == curproc); @@ -1578,6 +1603,7 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid) if (t == curthread) /* copy args out of registers first */ (void) save_syscall_args(); + clwp = lwp_create(cp->p_lwpcnt == 0 ? lwp_rtt_initial : lwp_rtt, NULL, 0, cp, TS_STOPPED, t->t_pri, &t->t_hold, NOCLASS, lwpid); if (clwp == NULL) @@ -1591,14 +1617,16 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid) ct = clwp->lwp_thread; tregs = clwp->lwp_regs; tfpu = clwp->lwp_fpu; + brand_data = clwp->lwp_brand; /* copy parent lwp to child lwp */ *clwp = *lwp; /* fix up child's lwp */ - clwp->lwp_pcb.pcb_flags = 0; -#if defined(__sparc) +#if defined(__i386) || defined(__amd64) + clwp->lwp_pcb.pcb_flags = clwp->lwp_pcb.pcb_flags & RUPDATE_PENDING; +#elif defined(__sparc) clwp->lwp_pcb.pcb_step = STEP_NONE; #endif clwp->lwp_cursig = 0; @@ -1608,6 +1636,7 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid) ct->t_sysnum = t->t_sysnum; clwp->lwp_regs = tregs; clwp->lwp_fpu = tfpu; + clwp->lwp_brand = brand_data; clwp->lwp_ap = clwp->lwp_arg; clwp->lwp_procp = cp; bzero(clwp->lwp_timer, sizeof (clwp->lwp_timer)); @@ -1640,6 +1669,10 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid) ct->t_proc_flag |= TP_MSACCT; mutex_exit(&cp->p_lock); + /* Allow brand to propagate brand-specific state */ + if (PROC_IS_BRANDED(p)) + BROP(p)->b_forklwp(lwp, clwp); + retry: cid = t->t_cid; diff --git a/usr/src/uts/common/os/main.c b/usr/src/uts/common/os/main.c index 958bbf96c8..ec9fc6c3e3 100644 --- a/usr/src/uts/common/os/main.c +++ b/usr/src/uts/common/os/main.c @@ -70,6 +70,7 @@ #include <sys/errorq.h> #include <sys/class.h> #include <sys/stack.h> +#include <sys/brand.h> #include <vm/as.h> #include <vm/seg_kmem.h> @@ -124,6 +125,7 @@ cluster_wrapper(void) char initname[INITNAME_SZ] = "/sbin/init"; /* also referenced by zone0 */ char initargs[BOOTARGS_MAX] = ""; /* also referenced by zone0 */ +extern int64_t lwp_sigmask(int, uint_t, uint_t); /* * Construct a stack for init containing the arguments to it, then @@ -144,6 +146,7 @@ exec_init(const char *initpath, const char *args) int error = 0, count = 0; proc_t *p = ttoproc(curthread); klwp_t *lwp = ttolwp(curthread); + int brand_action; if (args == NULL) args = ""; @@ -247,9 +250,17 @@ exec_init(const char *initpath, const char *args) curthread->t_post_sys = 1; curthread->t_sysnum = SYS_execve; + /* + * If we are executing init from zsched, we may have inherited its + * parent process's signal mask. Clear it now so that we behave in + * the same way as when started from the global zone. + */ + (void) lwp_sigmask(SIG_UNBLOCK, 0xffffffff, 0xffffffff); + + brand_action = ZONE_IS_BRANDED(p->p_zone) ? EBA_BRAND : EBA_NONE; again: error = exec_common((const char *)(uintptr_t)exec_fnamep, - (const char **)(uintptr_t)uap, NULL); + (const char **)(uintptr_t)uap, NULL, brand_action); /* * Normally we would just set lwp_argsaved and t_post_sys and diff --git a/usr/src/uts/common/os/modconf.c b/usr/src/uts/common/os/modconf.c index 2992567207..3e662fac7d 100644 --- a/usr/src/uts/common/os/modconf.c +++ b/usr/src/uts/common/os/modconf.c @@ -55,6 +55,7 @@ #include <ipp/ipp.h> #include <sys/strsubr.h> #include <sys/kcpc.h> +#include <sys/brand.h> #include <sys/cpc_pcbe.h> #include <sys/kstat.h> #include <sys/fs/sdev_node.h> @@ -237,6 +238,16 @@ struct mod_ops mod_pcbeops = { mod_installpcbe, mod_removepcbe, mod_infonull }; +/* + * Brand modules. + */ +static int mod_installbrand(struct modlbrand *, struct modlinkage *); +static int mod_removebrand(struct modlbrand *, struct modlinkage *); + +struct mod_ops mod_brandops = { + mod_installbrand, mod_removebrand, mod_infonull +}; + static struct sysent *mod_getsysent(struct modlinkage *, struct sysent *); static char uninstall_err[] = "Cannot uninstall %s; not installed"; @@ -496,6 +507,23 @@ mod_removepcbe(struct modlpcbe *modl, struct modlinkage *modlp) } /* + * Manage BrandZ modules. + */ +/*ARGSUSED*/ +static int +mod_installbrand(struct modlbrand *modl, struct modlinkage *modlp) +{ + return (brand_register(modl->brand_branddef)); +} + +/*ARGSUSED*/ +static int +mod_removebrand(struct modlbrand *modl, struct modlinkage *modlp) +{ + return (brand_unregister(modl->brand_branddef)); +} + +/* * manage /dev fs modules */ /*ARGSUSED*/ @@ -1075,8 +1103,10 @@ mod_removefs(struct modlfs *modl, struct modlinkage *modlp) return (EBUSY); } - /* XXX - Shouldn't the refcount be sufficient? */ - + /* + * A mounted filesystem could still have vsw_count = 0 + * so we must check whether anyone is actually using our ops + */ if (vfs_opsinuse(&vswp->vsw_vfsops)) { vfs_unrefvfssw(vswp); WUNLOCK_VFSSW(); diff --git a/usr/src/uts/common/os/pid.c b/usr/src/uts/common/os/pid.c index 66cfed74b4..88b0258afe 100644 --- a/usr/src/uts/common/os/pid.c +++ b/usr/src/uts/common/os/pid.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -21,7 +20,7 @@ */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -115,6 +114,18 @@ pid_lookup(pid_t pid) return (pidp); } +struct pid * +pid_find(pid_t pid) +{ + struct pid *pidp; + + mutex_enter(&pidlinklock); + pidp = pid_lookup(pid); + mutex_exit(&pidlinklock); + + return (pidp); +} + void pid_setmin(void) { @@ -154,14 +165,13 @@ pid_getlockslot(int prslot) } /* - * This function assigns a pid for use in a fork request. It allocates - * a pid structure, tries to find an empty slot in the proc table, - * and selects the process id. + * This function allocates a pid structure, a free pid, and optionally a + * slot in the proc table for it. * - * pid_assign() returns the new pid on success, -1 on failure. + * pid_allocate() returns the new pid on success, -1 on failure. */ pid_t -pid_assign(proc_t *prp) +pid_allocate(proc_t *prp, int flags) { struct pid *pidp; union procent *pep; @@ -170,7 +180,7 @@ pid_assign(proc_t *prp) pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP); mutex_enter(&pidlinklock); - if ((pep = procentfree) == NULL) { + if ((flags & PID_ALLOC_PROC) && (pep = procentfree) == NULL) { /* * ran out of /proc directory entries */ @@ -190,10 +200,6 @@ pid_assign(proc_t *prp) goto failed; } - procentfree = pep->pe_next; - pep->pe_proc = prp; - prp->p_pidp = pidp; - /* * Put pid into the pid hash table. */ @@ -201,8 +207,17 @@ pid_assign(proc_t *prp) HASHPID(newpid) = pidp; pidp->pid_ref = 1; pidp->pid_id = newpid; - pidp->pid_prslot = pep - procdir; - prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)]; + + if (flags & PID_ALLOC_PROC) { + procentfree = pep->pe_next; + pidp->pid_prslot = pep - procdir; + pep->pe_proc = prp; + prp->p_pidp = pidp; + prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)]; + } else { + pidp->pid_prslot = 0; + } + mutex_exit(&pidlinklock); return (newpid); @@ -264,7 +279,7 @@ pid_exit(proc_t *prp) if (prp->p_pgidp != NULL) pgexit(prp); - SESS_RELE(prp->p_sessp); + sess_rele(prp->p_sessp, B_TRUE); pidp = prp->p_pidp; diff --git a/usr/src/uts/common/os/printf.c b/usr/src/uts/common/os/printf.c index 603da31b62..a50bfa0db9 100644 --- a/usr/src/uts/common/os/printf.c +++ b/usr/src/uts/common/os/printf.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -142,21 +141,15 @@ retry: if (sl & SL_USER) { ssize_t resid; - sess_t *sessp; - - mutex_enter(&pidlock); - sessp = curproc->p_sessp; - SESS_HOLD(sessp); - TTY_HOLD(sessp); - mutex_exit(&pidlock); - if (sessp->s_vp) - (void) vn_rdwr(UIO_WRITE, sessp->s_vp, - body, len, 0LL, UIO_SYSSPACE, - FAPPEND, (rlim64_t)LOG_HIWAT, kcred, &resid); - mutex_enter(&pidlock); - TTY_RELE(sessp); - SESS_RELE(sessp); - mutex_exit(&pidlock); + sess_t *sp; + + if ((sp = tty_hold()) != NULL) { + if (sp->s_vp != NULL) + (void) vn_rdwr(UIO_WRITE, sp->s_vp, body, + len, 0LL, UIO_SYSSPACE, FAPPEND, + (rlim64_t)LOG_HIWAT, kcred, &resid); + tty_rele(sp); + } } if (on_intr && !panicstr) { diff --git a/usr/src/uts/common/os/procset.c b/usr/src/uts/common/os/procset.c index 7a675c604e..ae5473847e 100644 --- a/usr/src/uts/common/os/procset.c +++ b/usr/src/uts/common/os/procset.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -290,8 +289,10 @@ procinset(proc_t *pp, procset_t *psp) break; case P_SID: + mutex_enter(&pp->p_splock); if (pp->p_sessp->s_sid == psp->p_lid) loperand++; + mutex_exit(&pp->p_splock); break; case P_CID: @@ -380,8 +381,10 @@ procinset(proc_t *pp, procset_t *psp) break; case P_SID: + mutex_enter(&pp->p_splock); if (pp->p_sessp->s_sid == psp->p_rid) roperand++; + mutex_exit(&pp->p_splock); break; case P_TASKID: @@ -533,8 +536,10 @@ lwpinset(proc_t *pp, procset_t *psp, kthread_t *tp, int *done) break; case P_SID: + mutex_enter(&pp->p_splock); if (pp->p_sessp->s_sid == psp->p_lid) loperand++; + mutex_exit(&pp->p_splock); break; case P_TASKID: @@ -617,8 +622,10 @@ lwpinset(proc_t *pp, procset_t *psp, kthread_t *tp, int *done) break; case P_SID: + mutex_enter(&pp->p_splock); if (pp->p_sessp->s_sid == psp->p_rid) roperand++; + mutex_exit(&pp->p_splock); break; case P_TASKID: @@ -756,6 +763,7 @@ getmyid(idtype_t idtype) proc_t *pp; uid_t uid; gid_t gid; + pid_t sid; pp = ttoproc(curthread); @@ -773,7 +781,10 @@ getmyid(idtype_t idtype) return (pp->p_pgrp); case P_SID: - return (pp->p_sessp->s_sid); + mutex_enter(&pp->p_splock); + sid = pp->p_sessp->s_sid; + mutex_exit(&pp->p_splock); + return (sid); case P_TASKID: return (pp->p_task->tk_tkid); diff --git a/usr/src/uts/common/os/session.c b/usr/src/uts/common/os/session.c index 972677f7dc..7790a09094 100644 --- a/usr/src/uts/common/os/session.c +++ b/usr/src/uts/common/os/session.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -47,102 +46,614 @@ #include <sys/kmem.h> #include <sys/cmn_err.h> #include <sys/strsubr.h> +#include <sys/fs/snode.h> sess_t session0 = { - 1, /* s_ref */ - NODEV, /* s_dev */ - NULL, /* s_vp */ - &pid0, /* s_sidp */ - NULL /* s_cred */ + &pid0, /* s_sidp */ + {0}, /* s_lock */ + 1, /* s_ref */ + B_FALSE, /* s_sighuped */ + B_FALSE, /* s_exit */ + 0, /* s_exit_cv */ + 0, /* s_cnt */ + 0, /* s_cnt_cv */ + NODEV, /* s_dev */ + NULL, /* s_vp */ + NULL /* s_cred */ }; void -sess_rele(sess_t *sp) +sess_hold(proc_t *p) { - ASSERT(MUTEX_HELD(&pidlock)); + ASSERT(MUTEX_HELD(&pidlock) || MUTEX_HELD(&p->p_splock)); + mutex_enter(&p->p_sessp->s_lock); + p->p_sessp->s_ref++; + mutex_exit(&p->p_sessp->s_lock); +} + +void +sess_rele(sess_t *sp, boolean_t pidlock_held) +{ + ASSERT(MUTEX_HELD(&pidlock) || !pidlock_held); + + mutex_enter(&sp->s_lock); ASSERT(sp->s_ref != 0); - if (--sp->s_ref == 0) { - if (sp == &session0) - panic("sp == &session0"); - PID_RELE(sp->s_sidp); - mutex_destroy(&sp->s_lock); - cv_destroy(&sp->s_wait_cv); - kmem_free(sp, sizeof (sess_t)); + if (--sp->s_ref > 0) { + mutex_exit(&sp->s_lock); + return; } + ASSERT(sp->s_ref == 0); + + /* + * It's ok to free this session structure now because we know + * that no one else can have a pointer to it. We know this + * to be true because the only time that s_ref can possibly + * be incremented is when pidlock or p_splock is held AND there + * is a proc_t that points to that session structure. In that + * case we are guaranteed that the s_ref is at least 1 since there + * is a proc_t that points to it. So when s_ref finally drops to + * zero then no one else has a reference (and hence pointer) to + * this session structure and there is no valid proc_t pointing + * to this session structure anymore so, no one can acquire a + * reference (and pointer) to this session structure so it's + * ok to free it here. + */ + + if (sp == &session0) + panic("sp == &session0"); + + /* make sure there are no outstanding holds */ + ASSERT(sp->s_cnt == 0); + + /* make sure there is no exit in progress */ + ASSERT(!sp->s_exit); + + /* make sure someone already freed any ctty */ + ASSERT(sp->s_vp == NULL); + ASSERT(sp->s_dev == NODEV); + + if (!pidlock_held) + mutex_enter(&pidlock); + PID_RELE(sp->s_sidp); + if (!pidlock_held) + mutex_exit(&pidlock); + + mutex_destroy(&sp->s_lock); + cv_destroy(&sp->s_cnt_cv); + kmem_free(sp, sizeof (sess_t)); +} + +sess_t * +tty_hold(void) +{ + proc_t *p = curproc; + sess_t *sp; + boolean_t got_sig = B_FALSE; + + /* make sure the caller isn't holding locks they shouldn't */ + ASSERT(MUTEX_NOT_HELD(&pidlock)); + + for (;;) { + mutex_enter(&p->p_splock); /* protect p->p_sessp */ + sp = p->p_sessp; + mutex_enter(&sp->s_lock); /* protect sp->* */ + + /* make sure the caller isn't holding locks they shouldn't */ + ASSERT((sp->s_vp == NULL) || + MUTEX_NOT_HELD(&sp->s_vp->v_stream->sd_lock)); + + /* + * If the session leader process is not exiting (and hence + * not trying to release the session's ctty) then we can + * safely grab a hold on the current session structure + * and return it. If on the other hand the session leader + * process is exiting and clearing the ctty then we'll + * wait till it's done before we loop around and grab a + * hold on the session structure. + */ + if (!sp->s_exit) + break; + + /* need to hold the session so it can't be freed */ + sp->s_ref++; + mutex_exit(&p->p_splock); + + /* Wait till the session leader is done */ + if (!cv_wait_sig(&sp->s_exit_cv, &sp->s_lock)) + got_sig = B_TRUE; + + /* + * Now we need to drop our hold on the session structure, + * but we can't hold any locks when we do this because + * sess_rele() may need to aquire pidlock. + */ + mutex_exit(&sp->s_lock); + sess_rele(sp, B_FALSE); + + if (got_sig) + return (NULL); + } + + /* whew, we finally got a hold */ + sp->s_cnt++; + sp->s_ref++; + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); + return (sp); } void -sess_create(void) +tty_rele(sess_t *sp) { - proc_t *pp; - sess_t *sp; + /* make sure the caller isn't holding locks they shouldn't */ + ASSERT(MUTEX_NOT_HELD(&pidlock)); - pp = ttoproc(curthread); + mutex_enter(&sp->s_lock); + if ((--sp->s_cnt) == 0) + cv_broadcast(&sp->s_cnt_cv); + mutex_exit(&sp->s_lock); + + sess_rele(sp, B_FALSE); +} + +void +sess_create(void) +{ + proc_t *p = curproc; + sess_t *sp, *old_sp; sp = kmem_zalloc(sizeof (sess_t), KM_SLEEP); mutex_init(&sp->s_lock, NULL, MUTEX_DEFAULT, NULL); - cv_init(&sp->s_wait_cv, NULL, CV_DEFAULT, NULL); + cv_init(&sp->s_cnt_cv, NULL, CV_DEFAULT, NULL); + /* + * we need to grap p_lock to protect p_pgidp because + * /proc looks at p_pgidp while holding only p_lock. + * + * we don't need to hold p->p_sessp->s_lock or get a hold on the + * session structure since we're not actually updating any of + * the contents of the old session structure. + */ mutex_enter(&pidlock); + mutex_enter(&p->p_lock); + mutex_enter(&p->p_splock); + + pgexit(p); + + sp->s_sidp = p->p_pidp; + sp->s_ref = 1; + sp->s_dev = NODEV; + + old_sp = p->p_sessp; + p->p_sessp = sp; + + pgjoin(p, p->p_pidp); + PID_HOLD(p->p_pidp); + + mutex_exit(&p->p_splock); + mutex_exit(&p->p_lock); + mutex_exit(&pidlock); + sess_rele(old_sp, B_FALSE); +} + +/* + * Note that sess_ctty_clear() resets all the fields in the session + * structure but doesn't release any holds or free any objects + * that the session structure might currently point to. it is the + * callers responsibility to do this. + */ +static void +sess_ctty_clear(sess_t *sp, stdata_t *stp) +{ /* - * We need to protect p_pgidp with p_lock because - * /proc looks at it while holding only p_lock. + * Assert that we hold all the necessary locks. We also need + * to be holding proc_t->p_splock for the process associated + * with this session, but since we don't have a proc pointer + * passed in we can't assert this here. */ - mutex_enter(&pp->p_lock); - pgexit(pp); - SESS_RELE(pp->p_sessp); + ASSERT(MUTEX_HELD(&stp->sd_lock) && MUTEX_HELD(&pidlock) && + MUTEX_HELD(&sp->s_lock)); - sp->s_sidp = pp->p_pidp; - sp->s_ref = 1; + /* reset the session structure members to defaults */ + sp->s_sighuped = B_FALSE; sp->s_dev = NODEV; + sp->s_vp = NULL; + sp->s_cred = NULL; + + /* reset the stream session and group pointers */ + stp->sd_pgidp = NULL; + stp->sd_sidp = NULL; +} + +static void +sess_ctty_set(proc_t *p, sess_t *sp, stdata_t *stp) +{ + cred_t *crp; + + /* Assert that we hold all the necessary locks. */ + ASSERT(MUTEX_HELD(&stp->sd_lock) && MUTEX_HELD(&pidlock) && + MUTEX_HELD(&p->p_splock) && MUTEX_HELD(&sp->s_lock)); + + /* get holds on structures */ + mutex_enter(&p->p_crlock); + crhold(crp = p->p_cred); + mutex_exit(&p->p_crlock); + PID_HOLD(sp->s_sidp); /* requires pidlock */ + PID_HOLD(sp->s_sidp); /* requires pidlock */ + + /* update the session structure members */ + sp->s_vp = makectty(stp->sd_vnode); + sp->s_dev = sp->s_vp->v_rdev; + sp->s_cred = crp; + + /* update the stream emebers */ + stp->sd_flag |= STRISTTY; /* just to be sure */ + stp->sd_sidp = sp->s_sidp; + stp->sd_pgidp = sp->s_sidp; +} + +int +strctty(stdata_t *stp) +{ + sess_t *sp; + proc_t *p = curproc; + boolean_t got_sig = B_FALSE; + + /* + * We are going to try to make stp the default ctty for the session + * associated with curproc. Not only does this require holding a + * bunch of locks but it also requires waiting for any outstanding + * holds on the session structure (aquired via tty_hold()) to be + * released. Hence, we have the following for(;;) loop that will + * aquire our locks, do some sanity checks, and wait for the hold + * count on the session structure to hit zero. If we get a signal + * while waiting for outstanding holds to be released then we abort + * the operation and return. + */ + for (;;) { + mutex_enter(&stp->sd_lock); /* protects sd_pgidp/sd_sidp */ + mutex_enter(&pidlock); /* protects p_pidp */ + mutex_enter(&p->p_splock); /* protects p_sessp */ + sp = p->p_sessp; + mutex_enter(&sp->s_lock); /* protects sp->* */ + + if (((stp->sd_flag & (STRHUP|STRDERR|STWRERR|STPLEX)) != 0) || + (stp->sd_sidp != NULL) || /* stp already ctty? */ + (p->p_pidp != sp->s_sidp) || /* we're not leader? */ + (sp->s_vp != NULL)) { /* session has ctty? */ + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); + mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); + return (ENOTTY); + } + + /* sanity check. we can't be exiting right now */ + ASSERT(!sp->s_exit); + + /* + * If no one else has a hold on this session structure + * then we now have exclusive access to it, so break out + * of this loop and update the session structure. + */ + if (sp->s_cnt == 0) + break; + + /* need to hold the session so it can't be freed */ + sp->s_ref++; - pp->p_sessp = sp; + /* ain't locking order fun? */ + mutex_exit(&p->p_splock); + mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); - pgjoin(pp, pp->p_pidp); - mutex_exit(&pp->p_lock); + if (!cv_wait_sig(&sp->s_cnt_cv, &sp->s_lock)) + got_sig = B_TRUE; + mutex_exit(&sp->s_lock); + sess_rele(sp, B_FALSE); - PID_HOLD(sp->s_sidp); + if (got_sig) + return (EINTR); + } + + /* set the session ctty bindings */ + sess_ctty_set(p, sp, stp); + + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); + return (0); } -void -freectty(sess_t *sp) +/* + * freectty_lock() attempts to aquire the army of locks required to free + * the ctty associated with a given session leader process. If it returns + * successfully the following locks will be held: + * sd_lock, pidlock, p_splock, s_lock + * + * as a secondary bit of convience, freectty_lock() will also return + * pointers to the session, ctty, and ctty stream associated with the + * specified session leader process. + */ +static boolean_t +freectty_lock(proc_t *p, sess_t **spp, vnode_t **vpp, stdata_t **stpp, + boolean_t at_exit) { - vnode_t *vp = sp->s_vp; - cred_t *cred = sp->s_cred; + sess_t *sp; + vnode_t *vp; + stdata_t *stp; - strfreectty(vp->v_stream); + mutex_enter(&pidlock); /* protect p_pidp */ + mutex_enter(&p->p_splock); /* protect p->p_sessp */ + sp = p->p_sessp; + mutex_enter(&sp->s_lock); /* protect sp->* */ - mutex_enter(&sp->s_lock); - while (sp->s_cnt > 0) { - cv_wait(&sp->s_wait_cv, &sp->s_lock); + if ((sp->s_sidp != p->p_pidp) || /* we're not leader? */ + (sp->s_vp == NULL)) { /* no ctty? */ + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); + mutex_exit(&pidlock); + return (B_FALSE); + } + + vp = sp->s_vp; + stp = sp->s_vp->v_stream; + + if (at_exit) { + /* stop anyone else calling tty_hold() */ + sp->s_exit = B_TRUE; + } else { + /* + * due to locking order we have to grab stp->sd_lock before + * grabbing all the other proc/session locks. but after we + * drop all our current locks it's possible that someone + * could come in and change our current session or close + * the current ctty (vp) there by making sp or stp invalid. + * (a VN_HOLD on vp won't protect stp because that only + * prevents the vnode from being freed not closed.) so + * to prevent this we bump s_ref and s_cnt here. + * + * course this doesn't matter if we're the last thread in + * an exiting process that is the session leader, since no + * one else can change our session or free our ctty. + */ + sp->s_ref++; /* hold the session structure */ + sp->s_cnt++; /* protect vp and stp */ + } + + /* drop our session locks */ + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); + mutex_exit(&pidlock); + + /* grab locks in the right order */ + mutex_enter(&stp->sd_lock); /* protects sd_pgidp/sd_sidp */ + mutex_enter(&pidlock); /* protect p_pidp */ + mutex_enter(&p->p_splock); /* protects p->p_sessp */ + mutex_enter(&sp->s_lock); /* protects sp->* */ + + /* if the session has changed, abort mission */ + if (sp != p->p_sessp) { + /* + * this can't happen during process exit since we're the + * only thread in the process and we sure didn't change + * our own session at this point. + */ + ASSERT(!at_exit); + + /* release our locks and holds */ + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); + mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); + tty_rele(sp); + return (B_FALSE); } - ASSERT(sp->s_cnt == 0); - ASSERT(vp->v_count >= 1); - sp->s_vp = NULL; - sp->s_cred = NULL; /* - * It is possible for the VOP_CLOSE below to call stralloctty() - * and reallocate a new tty vnode. To prevent that the - * session is marked as closing here. + * sanity checks. none of this should have changed since we had + * holds on the current ctty. */ + ASSERT(sp->s_sidp == p->p_pidp); /* we're the leader */ + ASSERT(sp->s_vp != NULL); /* a ctty exists */ + ASSERT(vp == sp->s_vp); + ASSERT(stp == sp->s_vp->v_stream); + + /* release our holds */ + if (!at_exit) { + if ((--(sp)->s_cnt) == 0) + cv_broadcast(&sp->s_cnt_cv); + sp->s_ref--; + ASSERT(sp->s_ref > 0); + } + + /* return our pointers */ + *spp = sp; + *vpp = vp; + *stpp = stp; - sp->s_flag = SESS_CLOSE; + return (B_TRUE); +} + +/* + * Returns B_FALSE if no signal is sent to the process group associated with + * this ctty. Returns B_TRUE if a signal is sent to the process group. + * If it return B_TRUE it also means that all the locks we were holding + * were dropped so that we could send the signal. + */ +static boolean_t +freectty_signal(proc_t *p, sess_t *sp, stdata_t *stp, boolean_t at_exit) +{ + /* Assert that we hold all the necessary locks. */ + ASSERT(MUTEX_HELD(&stp->sd_lock) && MUTEX_HELD(&pidlock) && + MUTEX_HELD(&p->p_splock) && MUTEX_HELD(&sp->s_lock)); + + /* check if we already signaled this group */ + if (sp->s_sighuped) + return (B_FALSE); + + sp->s_sighuped = B_TRUE; + + if (!at_exit) { + /* + * once again, we're about to drop our army of locks and we + * don't want sp or stp to be freed. (see the comment in + * freectty_lock()) + */ + sp->s_ref++; /* hold the session structure */ + sp->s_cnt++; /* protect vp and stp */ + } + + /* can't hold these locks while calling pgsignal() */ mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); + mutex_exit(&pidlock); + + /* signal anyone in the foreground process group */ + pgsignal(stp->sd_pgidp, SIGHUP); + + /* signal anyone blocked in poll on this stream */ + if (!(stp->sd_flag & STRHUP)) + strhup(stp); + + mutex_exit(&stp->sd_lock); + + /* release our holds */ + if (!at_exit) + tty_rele(sp); + + return (B_TRUE); +} + +int +freectty(boolean_t at_exit) +{ + proc_t *p = curproc; + stdata_t *stp; + vnode_t *vp; + cred_t *cred; + sess_t *sp; + struct pid *pgidp, *sidp; + boolean_t got_sig = B_FALSE; /* - * This will be the only thread with access to - * this vnode, from this point on. + * If the current process is a session leader we are going to + * try to release the ctty associated our current session. To + * do this we need to aquire a bunch of locks, signal any + * processes in the forground that are associated with the ctty, + * and make sure no one has any outstanding holds on the current + * session * structure (aquired via tty_hold()). Hence, we have + * the following for(;;) loop that will do all this work for + * us and break out when the hold count on the session structure + * hits zero. */ + for (;;) { + if (!freectty_lock(p, &sp, &vp, &stp, at_exit)) + return (EIO); + + if (freectty_signal(p, sp, stp, at_exit)) { + /* loop around to re-aquire locks */ + continue; + } + + /* + * Only a session leader process can free a ctty. So if + * we've made it here we know we're a session leader and + * if we're not actively exiting it impossible for another + * thread in this process to be exiting. (Because that + * thread would have already stopped all other threads + * in the current process.) + */ + ASSERT(at_exit || !sp->s_exit); + + /* + * If no one else has a hold on this session structure + * then we now have exclusive access to it, so break out + * of this loop and update the session structure. + */ + if (sp->s_cnt == 0) + break; + + if (!at_exit) { + /* need to hold the session so it can't be freed */ + sp->s_ref++; + } + + /* ain't locking order fun? */ + mutex_exit(&p->p_splock); + mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); + + if (at_exit) { + /* + * if we're exiting then we can't allow this operation + * to fail so we do a cw_wait() instead of a + * cv_wait_sig(). if there are threads with active + * holds on this ctty that are blocked, then + * they should only be blocked in a cv_wait_sig() + * and hopefully they were in the foreground process + * group and recieved the SIGHUP we sent above. of + * course it's possible that they weren't in the + * foreground process group and didn't get our + * signal (or they could be stopped by job control + * in which case our signal wouldn't matter until + * they are restarted). in this case we won't + * exit until someone else sends them a signal. + */ + cv_wait(&sp->s_cnt_cv, &sp->s_lock); + mutex_exit(&sp->s_lock); + continue; + } + + if (!cv_wait_sig(&sp->s_cnt_cv, &sp->s_lock)) { + got_sig = B_TRUE; + } + + mutex_exit(&sp->s_lock); + sess_rele(sp, B_FALSE); + + if (got_sig) + return (EINTR); + } + ASSERT(sp->s_cnt == 0); + /* save some pointers for later */ + cred = sp->s_cred; + pgidp = stp->sd_pgidp; + sidp = stp->sd_sidp; + + /* clear the session ctty bindings */ + sess_ctty_clear(sp, stp); + + /* wake up anyone blocked in tty_hold() */ + if (at_exit) { + ASSERT(sp->s_exit); + sp->s_exit = B_FALSE; + cv_broadcast(&sp->s_exit_cv); + } + + /* we can drop these locks now */ + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); + mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); + + /* This is the only remaining thread with access to this vnode */ (void) VOP_CLOSE(vp, 0, 1, (offset_t)0, cred); VN_RELE(vp); - crfree(cred); + + /* release our holds on assorted structures and return */ + mutex_enter(&pidlock); + PID_RELE(pgidp); + PID_RELE(sidp); + mutex_exit(&pidlock); + + return (1); } /* @@ -169,23 +680,29 @@ vhangup(void) dev_t cttydev(proc_t *pp) { - sess_t *sp = pp->p_sessp; + sess_t *sp; + dev_t dev; + + mutex_enter(&pp->p_splock); /* protects p->p_sessp */ + sp = pp->p_sessp; + +#ifdef DEBUG + mutex_enter(&sp->s_lock); /* protects sp->* */ if (sp->s_vp == NULL) - return (NODEV); - return (sp->s_dev); + ASSERT(sp->s_dev == NODEV); + else + ASSERT(sp->s_dev != NODEV); + mutex_exit(&sp->s_lock); +#endif /* DEBUG */ + + dev = sp->s_dev; + mutex_exit(&pp->p_splock); + return (dev); } void -alloctty(proc_t *pp, vnode_t *vp) +ctty_clear_sighuped(void) { - sess_t *sp = pp->p_sessp; - cred_t *crp; - - sp->s_vp = vp; - sp->s_dev = vp->v_rdev; - - mutex_enter(&pp->p_crlock); - crhold(crp = pp->p_cred); - mutex_exit(&pp->p_crlock); - sp->s_cred = crp; + ASSERT(MUTEX_HELD(&pidlock) || MUTEX_HELD(&curproc->p_splock)); + curproc->p_sessp->s_sighuped = B_FALSE; } diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c index ffa676604f..e189a1627d 100644 --- a/usr/src/uts/common/os/streamio.c +++ b/usr/src/uts/common/os/streamio.c @@ -77,6 +77,19 @@ #include <sys/autoconf.h> #include <sys/policy.h> + +/* + * This define helps improve the readability of streams code while + * still maintaining a very old streams performance enhancement. The + * performance enhancement basically involved having all callers + * of straccess() perform the first check that straccess() will do + * locally before actually calling straccess(). (There by reducing + * the number of unnecessary calls to straccess().) + */ +#define i_straccess(x, y) ((stp->sd_sidp == NULL) ? 0 : \ + (stp->sd_vnode->v_type == VFIFO) ? 0 : \ + straccess((x), (y))) + /* * what is mblk_pull_len? * @@ -1095,11 +1108,13 @@ strread(struct vnode *vp, struct uio *uiop, cred_t *crp) ASSERT(vp->v_stream); stp = vp->v_stream; - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) - if (error = straccess(stp, JCREAD)) - return (error); - mutex_enter(&stp->sd_lock); + + if ((error = i_straccess(stp, JCREAD)) != 0) { + mutex_exit(&stp->sd_lock); + return (error); + } + if (stp->sd_flag & (STRDERR|STPLEX)) { error = strgeterr(stp, STRDERR|STPLEX, 0); if (error != 0) { @@ -1161,12 +1176,8 @@ strread(struct vnode *vp, struct uio *uiop, cred_t *crp) } TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE, "strread awakes:%p, %p, %p", vp, uiop, crp); - if (stp->sd_sidp != NULL && - stp->sd_vnode->v_type != VFIFO) { - mutex_exit(&stp->sd_lock); - if (error = straccess(stp, JCREAD)) - goto oops1; - mutex_enter(&stp->sd_lock); + if ((error = i_straccess(stp, JCREAD)) != 0) { + goto oops; } first = 0; } @@ -2026,8 +2037,8 @@ strrput_nondata(queue_t *q, mblk_t *bp) cv_broadcast(&q->q_wait); /* the readers */ cv_broadcast(&_WR(q)->q_wait); /* the writers */ cv_broadcast(&stp->sd_monitor); /* the ioctllers */ - mutex_exit(&stp->sd_lock); strhup(stp); + mutex_exit(&stp->sd_lock); return (0); case M_UNHANGUP: @@ -2665,18 +2676,23 @@ strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag) ASSERT(vp->v_stream); stp = vp->v_stream; - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) - if ((error = straccess(stp, JCWRITE)) != 0) - return (error); + mutex_enter(&stp->sd_lock); + + if ((error = i_straccess(stp, JCWRITE)) != 0) { + mutex_exit(&stp->sd_lock); + return (error); + } if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { - mutex_enter(&stp->sd_lock); error = strwriteable(stp, B_TRUE, B_TRUE); - mutex_exit(&stp->sd_lock); - if (error != 0) + if (error != 0) { + mutex_exit(&stp->sd_lock); return (error); + } } + mutex_exit(&stp->sd_lock); + wqp = stp->sd_wrq; /* get these values from them cached in the stream head */ @@ -2778,11 +2794,11 @@ strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag) } TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE, "strwrite wake:q %p awakes", wqp); + if ((error = i_straccess(stp, JCWRITE)) != 0) { + mutex_exit(&stp->sd_lock); + goto out; + } mutex_exit(&stp->sd_lock); - if (stp->sd_sidp != NULL && - stp->sd_vnode->v_type != VFIFO) - if (error = straccess(stp, JCWRITE)) - goto out; } waitflag |= NOINTR; TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID, @@ -3101,6 +3117,7 @@ job_control_type(int cmd) case JAGENT: /* Obsolete */ case JTRUN: /* Obsolete */ case JXTPROTO: /* Obsolete */ + case TIOCSETLD: return (JCSETP); } @@ -3162,10 +3179,12 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR) return (EINVAL); - if (access != -1 && stp->sd_sidp != NULL && - stp->sd_vnode->v_type != VFIFO) - if (error = straccess(stp, access)) - return (error); + mutex_enter(&stp->sd_lock); + if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) { + mutex_exit(&stp->sd_lock); + return (error); + } + mutex_exit(&stp->sd_lock); /* * Check for sgttyb-related ioctls first, and complain as @@ -3307,11 +3326,16 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, secpolicy_sti(crp) != 0) { return (EPERM); } - if (stp->sd_sidp != - ttoproc(curthread)->p_sessp->s_sidp && + mutex_enter(&stp->sd_lock); + mutex_enter(&curproc->p_splock); + if (stp->sd_sidp != curproc->p_sessp->s_sidp && secpolicy_sti(crp) != 0) { + mutex_exit(&curproc->p_splock); + mutex_exit(&stp->sd_lock); return (EACCES); } + mutex_exit(&curproc->p_splock); + mutex_exit(&stp->sd_lock); strioc.ic_len = sizeof (char); strioc.ic_dp = (char *)arg; @@ -3445,10 +3469,13 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, return (EINVAL); access = job_control_type(strioc.ic_cmd); - if (access != -1 && stp->sd_sidp != NULL && - stp->sd_vnode->v_type != VFIFO && - (error = straccess(stp, access)) != 0) + mutex_enter(&stp->sd_lock); + if ((access != -1) && + ((error = i_straccess(stp, access)) != 0)) { + mutex_exit(&stp->sd_lock); return (error); + } + mutex_exit(&stp->sd_lock); /* * The I_STR facility provides a trap door for malicious @@ -3699,7 +3726,7 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, /* * try to allocate it as a controlling terminal */ - stralloctty(stp); + (void) strctty(stp); } } @@ -5053,15 +5080,11 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, releasef(STRUCT_FGET(strfdinsert, fildes)); return (error); } - if (stp->sd_sidp != NULL && - stp->sd_vnode->v_type != VFIFO) { + if ((error = i_straccess(stp, access)) != 0) { mutex_exit(&stp->sd_lock); - if (error = straccess(stp, access)) { - releasef( - STRUCT_FGET(strfdinsert, fildes)); - return (error); - } - mutex_enter(&stp->sd_lock); + releasef( + STRUCT_FGET(strfdinsert, fildes)); + return (error); } } mutex_exit(&stp->sd_lock); @@ -5144,12 +5167,9 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, mutex_exit(&stp->sd_lock); return (error); } - if (stp->sd_sidp != NULL && - stp->sd_vnode->v_type != VFIFO) { + if ((error = i_straccess(stp, access)) != 0) { mutex_exit(&stp->sd_lock); - if (error = straccess(stp, access)) - return (error); - mutex_enter(&stp->sd_lock); + return (error); } } if (mp->b_datap->db_type != M_PASSFP) { @@ -5446,13 +5466,13 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, { pid_t sid; - mutex_enter(&pidlock); + mutex_enter(&stp->sd_lock); if (stp->sd_sidp == NULL) { - mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); return (ENOTTY); } sid = stp->sd_sidp->pid_id; - mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); return (strcopyout(&sid, (void *)arg, sizeof (pid_t), copyflag)); } @@ -5494,6 +5514,7 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, bg_pgid = stp->sd_pgidp->pid_id; CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid); PID_RELE(stp->sd_pgidp); + ctty_clear_sighuped(); stp->sd_pgidp = q->p_pgidp; PID_HOLD(stp->sd_pgidp); mutex_exit(&pidlock); @@ -5505,17 +5526,30 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, { pid_t pgrp; - mutex_enter(&pidlock); + mutex_enter(&stp->sd_lock); if (stp->sd_sidp == NULL) { - mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); return (ENOTTY); } pgrp = stp->sd_pgidp->pid_id; - mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t), copyflag)); } + case TIOCSCTTY: + { + return (strctty(stp)); + } + + case TIOCNOTTY: + { + /* freectty() always assumes curproc. */ + if (freectty(B_FALSE) != 0) + return (0); + return (ENOTTY); + } + case FIONBIO: case FIOASYNC: return (0); /* handled by the upper layer */ @@ -6233,18 +6267,21 @@ strgetmsg( stp = vp->v_stream; rvp->r_val1 = 0; - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) - if (error = straccess(stp, JCREAD)) - return (error); + mutex_enter(&stp->sd_lock); + + if ((error = i_straccess(stp, JCREAD)) != 0) { + mutex_exit(&stp->sd_lock); + return (error); + } - /* Fast check of flags before acquiring the lock */ if (stp->sd_flag & (STRDERR|STPLEX)) { - mutex_enter(&stp->sd_lock); error = strgeterr(stp, STRDERR|STPLEX, 0); - mutex_exit(&stp->sd_lock); - if (error != 0) + if (error != 0) { + mutex_exit(&stp->sd_lock); return (error); + } } + mutex_exit(&stp->sd_lock); switch (*flagsp) { case MSG_HIPRI: @@ -6381,11 +6418,9 @@ strgetmsg( } TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE, "strgetmsg awakes:%p, %p", vp, uiop); - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) { + if ((error = i_straccess(stp, JCREAD)) != 0) { mutex_exit(&stp->sd_lock); - if (error = straccess(stp, JCREAD)) - return (error); - mutex_enter(&stp->sd_lock); + return (error); } first = 0; } @@ -6797,23 +6832,26 @@ kstrgetmsg( stp = vp->v_stream; rvp->r_val1 = 0; - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) - if (error = straccess(stp, JCREAD)) - return (error); + mutex_enter(&stp->sd_lock); + + if ((error = i_straccess(stp, JCREAD)) != 0) { + mutex_exit(&stp->sd_lock); + return (error); + } flags = *flagsp; - /* Fast check of flags before acquiring the lock */ if (stp->sd_flag & (STRDERR|STPLEX)) { if ((stp->sd_flag & STPLEX) || (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) { - mutex_enter(&stp->sd_lock); error = strgeterr(stp, STRDERR|STPLEX, (flags & MSG_IPEEK)); - mutex_exit(&stp->sd_lock); - if (error != 0) + if (error != 0) { + mutex_exit(&stp->sd_lock); return (error); + } } } + mutex_exit(&stp->sd_lock); switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) { case MSG_HIPRI: @@ -6955,11 +6993,9 @@ retry: } TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE, "kstrgetmsg awakes:%p, %p", vp, uiop); - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) { + if ((error = i_straccess(stp, JCREAD)) != 0) { mutex_exit(&stp->sd_lock); - if (error = straccess(stp, JCREAD)) - return (error); - mutex_enter(&stp->sd_lock); + return (error); } first = 0; } @@ -7430,18 +7466,23 @@ strputmsg( audit_strputmsg(vp, mctl, mdata, pri, flag, fmode); #endif - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) - if (error = straccess(stp, JCWRITE)) - return (error); + mutex_enter(&stp->sd_lock); + + if ((error = i_straccess(stp, JCWRITE)) != 0) { + mutex_exit(&stp->sd_lock); + return (error); + } if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { - mutex_enter(&stp->sd_lock); error = strwriteable(stp, B_FALSE, xpg4); - mutex_exit(&stp->sd_lock); - if (error != 0) + if (error != 0) { + mutex_exit(&stp->sd_lock); return (error); + } } + mutex_exit(&stp->sd_lock); + /* * Check for legal flag value. */ @@ -7561,10 +7602,11 @@ strputmsg( } TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE, "strputmsg wake:stp %p wakes", stp); + if ((error = i_straccess(stp, JCWRITE)) != 0) { + mutex_exit(&stp->sd_lock); + return (error); + } mutex_exit(&stp->sd_lock); - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) - if (error = straccess(stp, JCWRITE)) - return (error); } out: /* @@ -7617,25 +7659,27 @@ kstrputmsg( if (mctl == NULL) return (EINVAL); - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) { - if (error = straccess(stp, JCWRITE)) { - freemsg(mctl); - return (error); - } + mutex_enter(&stp->sd_lock); + + if ((error = i_straccess(stp, JCWRITE)) != 0) { + mutex_exit(&stp->sd_lock); + freemsg(mctl); + return (error); } if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) { if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { - mutex_enter(&stp->sd_lock); error = strwriteable(stp, B_FALSE, B_TRUE); - mutex_exit(&stp->sd_lock); if (error != 0) { + mutex_exit(&stp->sd_lock); freemsg(mctl); return (error); } } } + mutex_exit(&stp->sd_lock); + /* * Check for legal flag value. */ @@ -7804,13 +7848,12 @@ kstrputmsg( } TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE, "kstrputmsg wake:stp %p wakes", stp); - mutex_exit(&stp->sd_lock); - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) { - if (error = straccess(stp, JCWRITE)) { - freemsg(mctl); - return (error); - } + if ((error = i_straccess(stp, JCWRITE)) != 0) { + mutex_exit(&stp->sd_lock); + freemsg(mctl); + return (error); } + mutex_exit(&stp->sd_lock); } out: freemsg(mctl); diff --git a/usr/src/uts/common/os/strsubr.c b/usr/src/uts/common/os/strsubr.c index 57a918a3f0..ae99e5198a 100644 --- a/usr/src/uts/common/os/strsubr.c +++ b/usr/src/uts/common/os/strsubr.c @@ -3107,13 +3107,18 @@ straccess(struct stdata *stp, enum jcaccess mode) proc_t *p = ttoproc(t); sess_t *sp; + ASSERT(mutex_owned(&stp->sd_lock)); + if (stp->sd_sidp == NULL || stp->sd_vnode->v_type == VFIFO) return (0); - mutex_enter(&p->p_lock); - sp = p->p_sessp; + mutex_enter(&p->p_lock); /* protects p_pgidp */ for (;;) { + mutex_enter(&p->p_splock); /* protects p->p_sessp */ + sp = p->p_sessp; + mutex_enter(&sp->s_lock); /* protects sp->* */ + /* * If this is not the calling process's controlling terminal * or if the calling process is already in the foreground @@ -3121,6 +3126,8 @@ straccess(struct stdata *stp, enum jcaccess mode) */ if (sp->s_dev != stp->sd_vnode->v_rdev || p->p_pgidp == stp->sd_pgidp) { + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); mutex_exit(&p->p_lock); return (0); } @@ -3131,10 +3138,15 @@ straccess(struct stdata *stp, enum jcaccess mode) if (sp->s_vp == NULL) { if (!cantsend(p, t, SIGHUP)) sigtoproc(p, t, SIGHUP); + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); mutex_exit(&p->p_lock); return (EIO); } + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); + if (mode == JCGETP) { mutex_exit(&p->p_lock); return (0); @@ -3146,7 +3158,9 @@ straccess(struct stdata *stp, enum jcaccess mode) return (EIO); } mutex_exit(&p->p_lock); + mutex_exit(&stp->sd_lock); pgsignal(p->p_pgidp, SIGTTIN); + mutex_enter(&stp->sd_lock); mutex_enter(&p->p_lock); } else { /* mode == JCWRITE or JCSETP */ if ((mode == JCWRITE && !(stp->sd_flag & STRTOSTOP)) || @@ -3159,7 +3173,9 @@ straccess(struct stdata *stp, enum jcaccess mode) return (EIO); } mutex_exit(&p->p_lock); + mutex_exit(&stp->sd_lock); pgsignal(p->p_pgidp, SIGTTOU); + mutex_enter(&stp->sd_lock); mutex_enter(&p->p_lock); } @@ -3174,10 +3190,15 @@ straccess(struct stdata *stp, enum jcaccess mode) * We can't get here if the signal is ignored or * if the current thread is blocking the signal. */ + mutex_exit(&stp->sd_lock); if (!cv_wait_sig_swap(&lbolt_cv, &p->p_lock)) { mutex_exit(&p->p_lock); + mutex_enter(&stp->sd_lock); return (EINTR); } + mutex_exit(&p->p_lock); + mutex_enter(&stp->sd_lock); + mutex_enter(&p->p_lock); } } @@ -4001,59 +4022,12 @@ strsignal(stdata_t *stp, int sig, int32_t band) void strhup(stdata_t *stp) { + ASSERT(mutex_owned(&stp->sd_lock)); pollwakeup(&stp->sd_pollist, POLLHUP); - mutex_enter(&stp->sd_lock); if (stp->sd_sigflags & S_HANGUP) strsendsig(stp->sd_siglist, S_HANGUP, 0, 0); - mutex_exit(&stp->sd_lock); -} - -void -stralloctty(stdata_t *stp) -{ - proc_t *p = curproc; - sess_t *sp = p->p_sessp; - - mutex_enter(&stp->sd_lock); - /* - * No need to hold the session lock or do a TTY_HOLD() because - * this is the only thread that can be the session leader and not - * have a controlling tty. - */ - if ((stp->sd_flag & - (STRHUP|STRDERR|STWRERR|STPLEX|STRISTTY)) == STRISTTY && - stp->sd_sidp == NULL && /* not allocated as ctty */ - sp->s_sidp == p->p_pidp && /* session leader */ - sp->s_flag != SESS_CLOSE && /* session is not closing */ - sp->s_vp == NULL) { /* without ctty */ - ASSERT(stp->sd_pgidp == NULL); - alloctty(p, makectty(stp->sd_vnode)); - - mutex_enter(&pidlock); - stp->sd_sidp = sp->s_sidp; - stp->sd_pgidp = sp->s_sidp; - PID_HOLD(stp->sd_pgidp); - PID_HOLD(stp->sd_sidp); - mutex_exit(&pidlock); - } - mutex_exit(&stp->sd_lock); } -void -strfreectty(stdata_t *stp) -{ - mutex_enter(&stp->sd_lock); - pgsignal(stp->sd_pgidp, SIGHUP); - mutex_enter(&pidlock); - PID_RELE(stp->sd_pgidp); - PID_RELE(stp->sd_sidp); - stp->sd_pgidp = NULL; - stp->sd_sidp = NULL; - mutex_exit(&pidlock); - mutex_exit(&stp->sd_lock); - if (!(stp->sd_flag & STRHUP)) - strhup(stp); -} /* * Backenable the first queue upstream from `q' with a service procedure. */ diff --git a/usr/src/uts/common/os/sysent.c b/usr/src/uts/common/os/sysent.c index 80761e102c..8211e23d01 100644 --- a/usr/src/uts/common/os/sysent.c +++ b/usr/src/uts/common/os/sysent.c @@ -51,6 +51,7 @@ int access(); int alarm(); int auditsys(); +int64_t brandsys(); int brk(); int chdir(); int chmod(); @@ -131,6 +132,8 @@ int unlink(); int utime(); int64_t utssys32(); int64_t utssys64(); +int uucopy(); +ssize_t uucopystr(); int64_t wait(); ssize_t write(); ssize_t readv(); @@ -473,7 +476,7 @@ struct sysent sysent[NSYSCALL] = SYSENT_NOSYS(), SYSENT_CI("fstatfs", fstatfs32, 4)), /* 39 */ SYSENT_CI("setpgrp", setpgrp, 3), - /* 40 */ SYSENT_LOADABLE(), /* (was cxenix) */ + /* 40 */ SYSENT_CI("uucopystr", uucopystr, 3), /* 41 */ SYSENT_CI("dup", dup, 1), /* 42 */ SYSENT_LOADABLE(), /* (was pipe ) */ /* 43 */ SYSENT_CL("times", times, 1), @@ -658,7 +661,7 @@ struct sysent sysent[NSYSCALL] = SYSENT_NOSYS(), SYSENT_C("llseek", llseek32, 4)), /* 176 */ SYSENT_LOADABLE(), /* inst_sync */ - /* 177 */ SYSENT_LOADABLE(), /* (was srmlimitsys) */ + /* 177 */ SYSENT_CI("brandsys", brandsys, 6), /* 178 */ SYSENT_LOADABLE(), /* kaio */ /* 179 */ SYSENT_LOADABLE(), /* cpc */ /* 180 */ SYSENT_CI("lgrpsys", lgrpsys, 3), @@ -770,7 +773,7 @@ struct sysent sysent[NSYSCALL] = /* 251 */ SYSENT_CI("lwp_mutex_trylock", lwp_mutex_trylock, 1), /* 252 */ SYSENT_CI("lwp_mutex_init", lwp_mutex_init, 2), /* 253 */ SYSENT_CI("cladm", cladm, 3), - /* 254 */ SYSENT_LOADABLE(), /* (was lwp_sigtimedwait) */ + /* 254 */ SYSENT_CI("uucopy", uucopy, 3), /* 255 */ SYSENT_CI("umount2", umount2, 2) /* ONC_PLUS EXTRACT START */ }; @@ -876,7 +879,7 @@ struct sysent sysent32[NSYSCALL] = /* 37 */ SYSENT_CI("kill", kill, 2), /* 38 */ SYSENT_CI("fstatfs", fstatfs32, 4), /* 39 */ SYSENT_CI("setpgrp", setpgrp, 3), - /* 40 */ SYSENT_LOADABLE32(), /* (was cxenix) */ + /* 40 */ SYSENT_CI("uucopystr", uucopystr, 3), /* 41 */ SYSENT_CI("dup", dup, 1), /* 42 */ SYSENT_LOADABLE32(), /* (was pipe ) */ /* 43 */ SYSENT_CI("times", times32, 1), @@ -1036,7 +1039,7 @@ struct sysent sysent32[NSYSCALL] = /* 174 */ SYSENT_CI("pwrite", pwrite32, 4), /* 175 */ SYSENT_C("llseek", llseek32, 4), /* 176 */ SYSENT_LOADABLE32(), /* inst_sync */ - /* 177 */ SYSENT_LOADABLE32(), /* srmlimitsys */ + /* 177 */ SYSENT_CI("brandsys", brandsys, 6), /* 178 */ SYSENT_LOADABLE32(), /* kaio */ /* 179 */ SYSENT_LOADABLE32(), /* cpc */ /* 180 */ SYSENT_CI("lgrpsys", lgrpsys, 3), @@ -1116,7 +1119,7 @@ struct sysent sysent32[NSYSCALL] = /* 251 */ SYSENT_CI("lwp_mutex_trylock", lwp_mutex_trylock, 1), /* 252 */ SYSENT_CI("lwp_mutex_init", lwp_mutex_init, 2), /* 253 */ SYSENT_CI("cladm", cladm, 3), - /* 254 */ SYSENT_LOADABLE32(), /* (was lwp_sigtimedwait) */ + /* 254 */ SYSENT_CI("uucopy", uucopy, 3), /* 255 */ SYSENT_CI("umount2", umount2, 2) /* ONC_PLUS EXTRACT START */ }; diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index 6a5c9243b3..9fd6b423bd 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -228,6 +228,7 @@ #include <sys/nvpair.h> #include <sys/rctl.h> #include <sys/fss.h> +#include <sys/brand.h> #include <sys/zone.h> #include <sys/tsol/label.h> @@ -330,7 +331,6 @@ static kmutex_t mount_lock; const char * const zone_default_initname = "/sbin/init"; static char * const zone_prefix = "/zone/"; - static int zone_shutdown(zoneid_t zoneid); /* @@ -1223,6 +1223,8 @@ zone_init(void) zone0.zone_nlwps = p0.p_lwpcnt; zone0.zone_ntasks = 1; mutex_exit(&p0.p_lock); + zone0.zone_restart_init = B_TRUE; + zone0.zone_brand = &native_brand; rctl_prealloc_destroy(gp); /* * pool_default hasn't been initialized yet, so we let pool_init() take @@ -2330,33 +2332,40 @@ void zone_start_init(void) { proc_t *p = ttoproc(curthread); + zone_t *z = p->p_zone; ASSERT(!INGLOBALZONE(curproc)); /* + * For all purposes (ZONE_ATTR_INITPID and restart_init), + * storing just the pid of init is sufficient. + */ + z->zone_proc_initpid = p->p_pid; + + /* * We maintain zone_boot_err so that we can return the cause of the * failure back to the caller of the zone_boot syscall. */ p->p_zone->zone_boot_err = start_init_common(); mutex_enter(&zone_status_lock); - if (p->p_zone->zone_boot_err != 0) { + if (z->zone_boot_err != 0) { /* * Make sure we are still in the booting state-- we could have * raced and already be shutting down, or even further along. */ - if (zone_status_get(p->p_zone) == ZONE_IS_BOOTING) - zone_status_set(p->p_zone, ZONE_IS_SHUTTING_DOWN); + if (zone_status_get(z) == ZONE_IS_BOOTING) + zone_status_set(z, ZONE_IS_SHUTTING_DOWN); mutex_exit(&zone_status_lock); /* It's gone bad, dispose of the process */ - if (proc_exit(CLD_EXITED, p->p_zone->zone_boot_err) != 0) { + if (proc_exit(CLD_EXITED, z->zone_boot_err) != 0) { mutex_enter(&p->p_lock); ASSERT(p->p_flag & SEXITLWPS); lwp_exit(); } } else { - if (zone_status_get(p->p_zone) == ZONE_IS_BOOTING) - zone_status_set(p->p_zone, ZONE_IS_RUNNING); + if (zone_status_get(z) == ZONE_IS_BOOTING) + zone_status_set(z, ZONE_IS_RUNNING); mutex_exit(&zone_status_lock); /* cause the process to return to userland. */ lwp_rtt(); @@ -2939,6 +2948,9 @@ zone_create(const char *zone_name, const char *zone_root, zone->zone_psetid = ZONE_PS_INVAL; zone->zone_ncpus = 0; zone->zone_ncpus_online = 0; + zone->zone_restart_init = B_TRUE; + zone->zone_brand = &native_brand; + zone->zone_initname = NULL; mutex_init(&zone->zone_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zone->zone_nlwps_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&zone->zone_cv, NULL, CV_DEFAULT, NULL); @@ -3464,6 +3476,9 @@ zone_shutdown(zoneid_t zoneid) zone_rele(zone); return (set_errno(EINTR)); } + + brand_unregister_zone(zone->zone_brand); + zone_rele(zone); return (0); } @@ -3771,6 +3786,18 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize) copyout(&initpid, buf, bufsize) != 0) error = EFAULT; break; + case ZONE_ATTR_BRAND: + size = strlen(zone->zone_brand->b_name) + 1; + + if (bufsize > size) + bufsize = size; + if (buf != NULL) { + err = copyoutstr(zone->zone_brand->b_name, buf, + bufsize, NULL); + if (err != 0 && err != ENAMETOOLONG) + error = EFAULT; + } + break; case ZONE_ATTR_INITNAME: size = strlen(zone->zone_initname) + 1; if (bufsize > size) @@ -3797,7 +3824,12 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize) } break; default: - error = EINVAL; + if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone)) { + size = bufsize; + error = ZBROP(zone)->b_getattr(zone, attr, buf, &size); + } else { + error = EINVAL; + } } zone_rele(zone); @@ -3815,6 +3847,7 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize) { zone_t *zone; zone_status_t zone_status; + struct brand_attr *attrp; int err; if (secpolicy_zone_config(CRED()) != 0) @@ -3847,8 +3880,33 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize) case ZONE_ATTR_BOOTARGS: err = zone_set_bootargs(zone, (const char *)buf); break; + case ZONE_ATTR_BRAND: + ASSERT(!ZONE_IS_BRANDED(zone)); + err = 0; + attrp = kmem_alloc(sizeof (struct brand_attr), KM_SLEEP); + if ((buf == NULL) || + (copyin(buf, attrp, sizeof (struct brand_attr)) != 0)) { + kmem_free(attrp, sizeof (struct brand_attr)); + err = EFAULT; + break; + } + + if (is_system_labeled() && strncmp(attrp->ba_brandname, + NATIVE_BRAND_NAME, MAXNAMELEN) != 0) { + err = EPERM; + break; + } + + zone->zone_brand = brand_register_zone(attrp); + kmem_free(attrp, sizeof (struct brand_attr)); + if (zone->zone_brand == NULL) + err = EINVAL; + break; default: - err = EINVAL; + if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone)) + err = ZBROP(zone)->b_setattr(zone, attr, buf, bufsize); + else + err = EINVAL; } done: @@ -4145,10 +4203,10 @@ zone_enter(zoneid_t zoneid) */ mutex_enter(&pidlock); sp = zone->zone_zsched->p_sessp; - SESS_HOLD(sp); + sess_hold(zone->zone_zsched); mutex_enter(&pp->p_lock); pgexit(pp); - SESS_RELE(pp->p_sessp); + sess_rele(pp->p_sessp, B_TRUE); pp->p_sessp = sp; pgjoin(pp, zone->zone_zsched->p_pidp); mutex_exit(&pp->p_lock); |