diff options
Diffstat (limited to 'usr/src/uts/common/disp/thread.c')
-rw-r--r-- | usr/src/uts/common/disp/thread.c | 372 |
1 files changed, 310 insertions, 62 deletions
diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c index f2685af534..c923ba5d1a 100644 --- a/usr/src/uts/common/disp/thread.c +++ b/usr/src/uts/common/disp/thread.c @@ -21,7 +21,7 @@ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2018 Joyent, Inc. */ #include <sys/types.h> @@ -74,6 +74,11 @@ #include <sys/waitq.h> #include <sys/cpucaps.h> #include <sys/kiconv.h> +#include <sys/ht.h> + +#ifndef STACK_GROWTH_DOWN +#error Stacks do not grow downward; 3b2 zombie attack detected! +#endif struct kmem_cache *thread_cache; /* cache of free threads */ struct kmem_cache *lwp_cache; /* cache of free lwps */ @@ -372,7 +377,7 @@ thread_create( if (stksize <= sizeof (kthread_t) + PTR24_ALIGN) cmn_err(CE_PANIC, "thread_create: proposed stack size" " too small to hold thread."); -#ifdef STACK_GROWTH_DOWN + stksize -= SA(sizeof (kthread_t) + PTR24_ALIGN - 1); stksize &= -PTR24_ALIGN; /* make thread aligned */ t = (kthread_t *)(stk + stksize); @@ -381,13 +386,6 @@ thread_create( audit_thread_create(t); t->t_stk = stk + stksize; t->t_stkbase = stk; -#else /* stack grows to larger addresses */ - stksize -= SA(sizeof (kthread_t)); - t = (kthread_t *)(stk); - bzero(t, sizeof (kthread_t)); - t->t_stk = stk + sizeof (kthread_t); - t->t_stkbase = stk + stksize + sizeof (kthread_t); -#endif /* STACK_GROWTH_DOWN */ t->t_flag |= T_TALLOCSTK; t->t_swap = stk; } else { @@ -400,13 +398,8 @@ thread_create( * Initialize t_stk to the kernel stack pointer to use * upon entry to the kernel */ -#ifdef STACK_GROWTH_DOWN t->t_stk = stk + stksize; t->t_stkbase = stk; -#else - t->t_stk = stk; /* 3b2-like */ - t->t_stkbase = stk + stksize; -#endif /* STACK_GROWTH_DOWN */ } if (kmem_stackinfo != 0) { @@ -515,8 +508,8 @@ thread_create( if (CPU->cpu_part == &cp_default) t->t_cpu = CPU; else - t->t_cpu = disp_lowpri_cpu(cp_default.cp_cpulist, t->t_lpl, - t->t_pri, NULL); + t->t_cpu = disp_lowpri_cpu(cp_default.cp_cpulist, t, + t->t_pri); t->t_disp_queue = t->t_cpu->cpu_disp; kpreempt_enable(); @@ -589,6 +582,9 @@ thread_exit(void) if ((t->t_proc_flag & TP_ZTHREAD) != 0) cmn_err(CE_PANIC, "thread_exit: zthread_exit() not called"); + if ((t->t_flag & T_SPLITSTK) != 0) + cmn_err(CE_PANIC, "thread_exit: called when stack is split"); + tsd_exit(); /* Clean up this thread's TSD */ kcpc_passivate(); /* clean up performance counter state */ @@ -791,6 +787,11 @@ thread_free(kthread_t *t) nthread--; mutex_exit(&pidlock); + if (t->t_name != NULL) { + kmem_free(t->t_name, THREAD_NAME_MAX); + t->t_name = NULL; + } + /* * Free thread, lwp and stack. This needs to be done carefully, since * if T_TALLOCSTK is set, the thread is part of the stack. @@ -1049,8 +1050,44 @@ installctx( ctx->exit_op = exit; ctx->free_op = free; ctx->arg = arg; - ctx->next = t->t_ctx; + ctx->save_ts = 0; + ctx->restore_ts = 0; + + /* + * Keep ctxops in a doubly-linked list to allow traversal in both + * directions. Using only the newest-to-oldest ordering was adequate + * previously, but reversing the order for restore_op actions is + * necessary if later-added ctxops depends on earlier ones. + * + * One example of such a dependency: Hypervisor software handling the + * guest FPU expects that it save FPU state prior to host FPU handling + * and consequently handle the guest logic _after_ the host FPU has + * been restored. + * + * The t_ctx member points to the most recently added ctxop or is NULL + * if no ctxops are associated with the thread. The 'next' pointers + * form a loop of the ctxops in newest-to-oldest order. The 'prev' + * pointers form a loop in the reverse direction, where t_ctx->prev is + * the oldest entry associated with the thread. + * + * The protection of kpreempt_disable is required to safely perform the + * list insertion, since there are inconsistent states between some of + * the pointer assignments. + */ + kpreempt_disable(); + if (t->t_ctx == NULL) { + ctx->next = ctx; + ctx->prev = ctx; + } else { + struct ctxop *head = t->t_ctx, *tail = t->t_ctx->prev; + + ctx->next = head; + ctx->prev = tail; + head->prev = ctx; + tail->next = ctx; + } t->t_ctx = ctx; + kpreempt_enable(); } /* @@ -1067,7 +1104,7 @@ removectx( void (*exit)(void *), void (*free)(void *, int)) { - struct ctxop *ctx, *prev_ctx; + struct ctxop *ctx, *head; /* * The incoming kthread_t (which is the thread for which the @@ -1092,17 +1129,31 @@ removectx( * and the target thread from racing with each other during lwp exit. */ mutex_enter(&t->t_ctx_lock); - prev_ctx = NULL; kpreempt_disable(); - for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) { + + if (t->t_ctx == NULL) { + mutex_exit(&t->t_ctx_lock); + kpreempt_enable(); + return (0); + } + + ctx = head = t->t_ctx; + do { if (ctx->save_op == save && ctx->restore_op == restore && ctx->fork_op == fork && ctx->lwp_create_op == lwp_create && ctx->exit_op == exit && ctx->free_op == free && ctx->arg == arg) { - if (prev_ctx) - prev_ctx->next = ctx->next; - else + ctx->prev->next = ctx->next; + ctx->next->prev = ctx->prev; + if (ctx->next == ctx) { + /* last remaining item */ + t->t_ctx = NULL; + } else if (ctx == t->t_ctx) { + /* fix up head of list */ t->t_ctx = ctx->next; + } + ctx->next = ctx->prev = NULL; + mutex_exit(&t->t_ctx_lock); if (ctx->free_op != NULL) (ctx->free_op)(ctx->arg, 0); @@ -1110,44 +1161,70 @@ removectx( kpreempt_enable(); return (1); } - prev_ctx = ctx; - } + + ctx = ctx->next; + } while (ctx != head); + mutex_exit(&t->t_ctx_lock); kpreempt_enable(); - return (0); } void savectx(kthread_t *t) { - struct ctxop *ctx; - ASSERT(t == curthread); - for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next) - if (ctx->save_op != NULL) - (ctx->save_op)(ctx->arg); + + if (t->t_ctx != NULL) { + struct ctxop *ctx, *head; + + /* Forward traversal */ + ctx = head = t->t_ctx; + do { + if (ctx->save_op != NULL) { + ctx->save_ts = gethrtime_unscaled(); + (ctx->save_op)(ctx->arg); + } + ctx = ctx->next; + } while (ctx != head); + } } void restorectx(kthread_t *t) { - struct ctxop *ctx; - ASSERT(t == curthread); - for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next) - if (ctx->restore_op != NULL) - (ctx->restore_op)(ctx->arg); + + if (t->t_ctx != NULL) { + struct ctxop *ctx, *tail; + + /* Backward traversal (starting at the tail) */ + ctx = tail = t->t_ctx->prev; + do { + if (ctx->restore_op != NULL) { + ctx->restore_ts = gethrtime_unscaled(); + (ctx->restore_op)(ctx->arg); + } + ctx = ctx->prev; + } while (ctx != tail); + } } void forkctx(kthread_t *t, kthread_t *ct) { - struct ctxop *ctx; - - for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) - if (ctx->fork_op != NULL) - (ctx->fork_op)(t, ct); + if (t->t_ctx != NULL) { + struct ctxop *ctx, *head; + + /* Forward traversal */ + ctx = head = t->t_ctx; + do { + if (ctx->fork_op != NULL) { + (ctx->fork_op)(t, ct); + } + ctx = ctx->next; + } while (ctx != head); + } } /* @@ -1158,11 +1235,18 @@ forkctx(kthread_t *t, kthread_t *ct) void lwp_createctx(kthread_t *t, kthread_t *ct) { - struct ctxop *ctx; - - for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) - if (ctx->lwp_create_op != NULL) - (ctx->lwp_create_op)(t, ct); + if (t->t_ctx != NULL) { + struct ctxop *ctx, *head; + + /* Forward traversal */ + ctx = head = t->t_ctx; + do { + if (ctx->lwp_create_op != NULL) { + (ctx->lwp_create_op)(t, ct); + } + ctx = ctx->next; + } while (ctx != head); + } } /* @@ -1175,11 +1259,18 @@ lwp_createctx(kthread_t *t, kthread_t *ct) void exitctx(kthread_t *t) { - struct ctxop *ctx; - - for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) - if (ctx->exit_op != NULL) - (ctx->exit_op)(t); + if (t->t_ctx != NULL) { + struct ctxop *ctx, *head; + + /* Forward traversal */ + ctx = head = t->t_ctx; + do { + if (ctx->exit_op != NULL) { + (ctx->exit_op)(t); + } + ctx = ctx->next; + } while (ctx != head); + } } /* @@ -1189,14 +1280,21 @@ exitctx(kthread_t *t) void freectx(kthread_t *t, int isexec) { - struct ctxop *ctx; - kpreempt_disable(); - while ((ctx = t->t_ctx) != NULL) { - t->t_ctx = ctx->next; - if (ctx->free_op != NULL) - (ctx->free_op)(ctx->arg, isexec); - kmem_free(ctx, sizeof (struct ctxop)); + if (t->t_ctx != NULL) { + struct ctxop *ctx, *head; + + ctx = head = t->t_ctx; + t->t_ctx = NULL; + do { + struct ctxop *next = ctx->next; + + if (ctx->free_op != NULL) { + (ctx->free_op)(ctx->arg, isexec); + } + kmem_free(ctx, sizeof (struct ctxop)); + ctx = next; + } while (ctx != head); } kpreempt_enable(); } @@ -1211,17 +1309,22 @@ freectx(kthread_t *t, int isexec) void freectx_ctx(struct ctxop *ctx) { - struct ctxop *nctx; + struct ctxop *head = ctx; ASSERT(ctx != NULL); kpreempt_disable(); + + head = ctx; do { - nctx = ctx->next; - if (ctx->free_op != NULL) + struct ctxop *next = ctx->next; + + if (ctx->free_op != NULL) { (ctx->free_op)(ctx->arg, 0); + } kmem_free(ctx, sizeof (struct ctxop)); - } while ((ctx = nctx) != NULL); + ctx = next; + } while (ctx != head); kpreempt_enable(); } @@ -1320,6 +1423,8 @@ thread_unpin() itp = t->t_intr; /* interrupted thread */ t->t_intr = NULL; /* clear interrupt ptr */ + ht_end_intr(); + /* * Get state from interrupt thread for the one * it interrupted. @@ -1883,6 +1988,103 @@ thread_change_pri(kthread_t *t, pri_t disp_pri, int front) return (on_rq); } + +/* + * There are occasions in the kernel when we need much more stack than we + * allocate by default, but we do not wish to have that work done + * asynchronously by another thread. To accommodate these scenarios, we allow + * for a split stack (also known as a "segmented stack") whereby a new stack + * is dynamically allocated and the current thread jumps onto it for purposes + * of executing the specified function. After the specified function returns, + * the stack is deallocated and control is returned to the caller. This + * functionality is implemented by thread_splitstack(), below; there are a few + * constraints on its use: + * + * - The caller must be in a context where it is safe to block for memory. + * - The caller cannot be in a t_onfault context + * - The called function must not call thread_exit() while on the split stack + * + * The code will explicitly panic if these constraints are violated. Notably, + * however, thread_splitstack() _can_ be called on a split stack -- there + * is no limit to the level that split stacks can nest. + * + * When the stack is split, it is constructed such that stack backtraces + * from kernel debuggers continue to function -- though note that DTrace's + * stack() action and stackdepth function will only show the stack up to and + * including thread_splitstack_run(); DTrace explicitly bounds itself to + * pointers that exist within the current declared stack as a safety + * mechanism. + */ +void +thread_splitstack(void (*func)(void *), void *arg, size_t stksize) +{ + kthread_t *t = curthread; + caddr_t ostk, ostkbase, stk; + ushort_t otflag; + + if (t->t_onfault != NULL) + panic("thread_splitstack: called with non-NULL t_onfault"); + + ostk = t->t_stk; + ostkbase = t->t_stkbase; + otflag = t->t_flag; + + stksize = roundup(stksize, PAGESIZE); + + if (stksize < default_stksize) + stksize = default_stksize; + + if (stksize == default_stksize) { + stk = (caddr_t)segkp_cache_get(segkp_thread); + } else { + stksize = roundup(stksize, PAGESIZE); + stk = (caddr_t)segkp_get(segkp, stksize, + (KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED)); + } + + /* + * We're going to lock ourselves before we set T_SPLITSTK to assure + * that we're not swapped out in the meantime. (Note that we don't + * bother to set t_swap, as we're not going to be swapped out.) + */ + thread_lock(t); + + if (!(otflag & T_SPLITSTK)) + t->t_flag |= T_SPLITSTK; + + t->t_stk = stk + stksize; + t->t_stkbase = stk; + + thread_unlock(t); + + /* + * Now actually run on the new (split) stack... + */ + thread_splitstack_run(t->t_stk, func, arg); + + /* + * We're back onto our own stack; lock ourselves and restore our + * pre-split state. + */ + thread_lock(t); + + t->t_stk = ostk; + t->t_stkbase = ostkbase; + + if (!(otflag & T_SPLITSTK)) + t->t_flag &= ~T_SPLITSTK; + + thread_unlock(t); + + /* + * Now that we are entirely back on our own stack, call back into + * the platform layer to perform any platform-specific cleanup. + */ + thread_splitstack_cleanup(); + + segkp_release(segkp, stk); +} + /* * Tunable kmem_stackinfo is set, fill the kernel thread stack with a * specific pattern. @@ -2127,3 +2329,49 @@ stkinfo_percent(caddr_t t_stk, caddr_t t_stkbase, caddr_t sp) } return (percent); } + +/* + * NOTE: This will silently truncate a name > THREAD_NAME_MAX - 1 characters + * long. It is expected that callers (acting on behalf of userland clients) + * will perform any required checks to return the correct error semantics. + * It is also expected callers on behalf of userland clients have done + * any necessary permission checks. + */ +void +thread_setname(kthread_t *t, const char *name) +{ + char *buf = NULL; + + /* + * We optimistically assume that a thread's name will only be set + * once and so allocate memory in preparation of setting t_name. + * If it turns out a name has already been set, we just discard (free) + * the buffer we just allocated and reuse the current buffer + * (as all should be THREAD_NAME_MAX large). + * + * Such an arrangement means over the lifetime of a kthread_t, t_name + * is either NULL or has one value (the address of the buffer holding + * the current thread name). The assumption is that most kthread_t + * instances will not have a name assigned, so dynamically allocating + * the memory should minimize the footprint of this feature, but by + * having the buffer persist for the life of the thread, it simplifies + * usage in highly constrained situations (e.g. dtrace). + */ + if (name != NULL && name[0] != '\0') { + buf = kmem_zalloc(THREAD_NAME_MAX, KM_SLEEP); + (void) strlcpy(buf, name, THREAD_NAME_MAX); + } + + mutex_enter(&ttoproc(t)->p_lock); + if (t->t_name == NULL) { + t->t_name = buf; + } else { + if (buf != NULL) { + (void) strlcpy(t->t_name, name, THREAD_NAME_MAX); + kmem_free(buf, THREAD_NAME_MAX); + } else { + bzero(t->t_name, THREAD_NAME_MAX); + } + } + mutex_exit(&ttoproc(t)->p_lock); +} |