diff options
Diffstat (limited to 'usr/src/uts/common/disp/thread.c')
-rw-r--r-- | usr/src/uts/common/disp/thread.c | 372 |
1 files changed, 283 insertions, 89 deletions
diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c index 854b33798d..d576738e75 100644 --- a/usr/src/uts/common/disp/thread.c +++ b/usr/src/uts/common/disp/thread.c @@ -75,6 +75,11 @@ #include <sys/cpucaps.h> #include <sys/kiconv.h> #include <sys/ctype.h> +#include <sys/ht.h> + +#ifndef STACK_GROWTH_DOWN +#error Stacks do not grow downward; 3b2 zombie attack detected! +#endif struct kmem_cache *thread_cache; /* cache of free threads */ struct kmem_cache *lwp_cache; /* cache of free lwps */ @@ -373,7 +378,7 @@ thread_create( if (stksize <= sizeof (kthread_t) + PTR24_ALIGN) cmn_err(CE_PANIC, "thread_create: proposed stack size" " too small to hold thread."); -#ifdef STACK_GROWTH_DOWN + stksize -= SA(sizeof (kthread_t) + PTR24_ALIGN - 1); stksize &= -PTR24_ALIGN; /* make thread aligned */ t = (kthread_t *)(stk + stksize); @@ -382,13 +387,6 @@ thread_create( audit_thread_create(t); t->t_stk = stk + stksize; t->t_stkbase = stk; -#else /* stack grows to larger addresses */ - stksize -= SA(sizeof (kthread_t)); - t = (kthread_t *)(stk); - bzero(t, sizeof (kthread_t)); - t->t_stk = stk + sizeof (kthread_t); - t->t_stkbase = stk + stksize + sizeof (kthread_t); -#endif /* STACK_GROWTH_DOWN */ t->t_flag |= T_TALLOCSTK; t->t_swap = stk; } else { @@ -401,13 +399,8 @@ thread_create( * Initialize t_stk to the kernel stack pointer to use * upon entry to the kernel */ -#ifdef STACK_GROWTH_DOWN t->t_stk = stk + stksize; t->t_stkbase = stk; -#else - t->t_stk = stk; /* 3b2-like */ - t->t_stkbase = stk + stksize; -#endif /* STACK_GROWTH_DOWN */ } if (kmem_stackinfo != 0) { @@ -487,15 +480,9 @@ thread_create( curthread->t_prev = t; /* - * Threads should never have a NULL t_cpu pointer so assign it - * here. If the thread is being created with state TS_RUN a - * better CPU may be chosen when it is placed on the run queue. - * - * We need to keep kernel preemption disabled when setting all - * three fields to keep them in sync. Also, always create in - * the default partition since that's where kernel threads go - * (if this isn't a kernel thread, t_cpupart will be changed - * in lwp_create before setting the thread runnable). + * We'll always create in the default partition since that's where + * kernel threads go (we'll change this later if needed, in + * lwp_create()). */ t->t_cpupart = &cp_default; @@ -504,20 +491,23 @@ thread_create( * Since the kernel does not (presently) allocate its memory * in a locality aware fashion, the root is an appropriate home. * If this thread is later associated with an lwp, it will have - * it's lgroup re-assigned at that time. + * its lgroup re-assigned at that time. */ lgrp_move_thread(t, &cp_default.cp_lgrploads[LGRP_ROOTID], 1); /* - * Inherit the current cpu. If this cpu isn't part of the chosen - * lgroup, a new cpu will be chosen by cpu_choose when the thread - * is ready to run. + * If the current CPU is in the default cpupart, use it. Otherwise, + * pick one that is; before entering the dispatcher code, we'll + * make sure to keep the invariant that ->t_cpu is set. (In fact, we + * rely on this, in ht_should_run(), in the call tree of + * disp_lowpri_cpu().) */ - if (CPU->cpu_part == &cp_default) + if (CPU->cpu_part == &cp_default) { t->t_cpu = CPU; - else - t->t_cpu = disp_lowpri_cpu(cp_default.cp_cpulist, t->t_lpl, - t->t_pri, NULL); + } else { + t->t_cpu = cp_default.cp_cpulist; + t->t_cpu = disp_lowpri_cpu(t->t_cpu, t, t->t_pri); + } t->t_disp_queue = t->t_cpu->cpu_disp; kpreempt_enable(); @@ -590,6 +580,9 @@ thread_exit(void) if ((t->t_proc_flag & TP_ZTHREAD) != 0) cmn_err(CE_PANIC, "thread_exit: zthread_exit() not called"); + if ((t->t_flag & T_SPLITSTK) != 0) + cmn_err(CE_PANIC, "thread_exit: called when stack is split"); + tsd_exit(); /* Clean up this thread's TSD */ kcpc_passivate(); /* clean up performance counter state */ @@ -870,12 +863,12 @@ thread_zone_destroy(zoneid_t zoneid, void *unused) /* * Guard against race condition in mutex_owner_running: - * thread=owner(mutex) - * <interrupt> - * thread exits mutex - * thread exits - * thread reaped - * thread struct freed + * thread=owner(mutex) + * <interrupt> + * thread exits mutex + * thread exits + * thread reaped + * thread struct freed * cpu = thread->t_cpu <- BAD POINTER DEREFERENCE. * A cross call to all cpus will cause the interrupt handler * to reset the PC if it is in mutex_owner_running, refreshing @@ -932,12 +925,12 @@ thread_reaper() /* * Guard against race condition in mutex_owner_running: - * thread=owner(mutex) - * <interrupt> - * thread exits mutex - * thread exits - * thread reaped - * thread struct freed + * thread=owner(mutex) + * <interrupt> + * thread exits mutex + * thread exits + * thread reaped + * thread struct freed * cpu = thread->t_cpu <- BAD POINTER DEREFERENCE. * A cross call to all cpus will cause the interrupt handler * to reset the PC if it is in mutex_owner_running, refreshing @@ -1055,8 +1048,44 @@ installctx( ctx->exit_op = exit; ctx->free_op = free; ctx->arg = arg; - ctx->next = t->t_ctx; + ctx->save_ts = 0; + ctx->restore_ts = 0; + + /* + * Keep ctxops in a doubly-linked list to allow traversal in both + * directions. Using only the newest-to-oldest ordering was adequate + * previously, but reversing the order for restore_op actions is + * necessary if later-added ctxops depends on earlier ones. + * + * One example of such a dependency: Hypervisor software handling the + * guest FPU expects that it save FPU state prior to host FPU handling + * and consequently handle the guest logic _after_ the host FPU has + * been restored. + * + * The t_ctx member points to the most recently added ctxop or is NULL + * if no ctxops are associated with the thread. The 'next' pointers + * form a loop of the ctxops in newest-to-oldest order. The 'prev' + * pointers form a loop in the reverse direction, where t_ctx->prev is + * the oldest entry associated with the thread. + * + * The protection of kpreempt_disable is required to safely perform the + * list insertion, since there are inconsistent states between some of + * the pointer assignments. + */ + kpreempt_disable(); + if (t->t_ctx == NULL) { + ctx->next = ctx; + ctx->prev = ctx; + } else { + struct ctxop *head = t->t_ctx, *tail = t->t_ctx->prev; + + ctx->next = head; + ctx->prev = tail; + head->prev = ctx; + tail->next = ctx; + } t->t_ctx = ctx; + kpreempt_enable(); } /* @@ -1073,7 +1102,7 @@ removectx( void (*exit)(void *), void (*free)(void *, int)) { - struct ctxop *ctx, *prev_ctx; + struct ctxop *ctx, *head; /* * The incoming kthread_t (which is the thread for which the @@ -1098,17 +1127,31 @@ removectx( * and the target thread from racing with each other during lwp exit. */ mutex_enter(&t->t_ctx_lock); - prev_ctx = NULL; kpreempt_disable(); - for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) { + + if (t->t_ctx == NULL) { + mutex_exit(&t->t_ctx_lock); + kpreempt_enable(); + return (0); + } + + ctx = head = t->t_ctx; + do { if (ctx->save_op == save && ctx->restore_op == restore && ctx->fork_op == fork && ctx->lwp_create_op == lwp_create && ctx->exit_op == exit && ctx->free_op == free && ctx->arg == arg) { - if (prev_ctx) - prev_ctx->next = ctx->next; - else + ctx->prev->next = ctx->next; + ctx->next->prev = ctx->prev; + if (ctx->next == ctx) { + /* last remaining item */ + t->t_ctx = NULL; + } else if (ctx == t->t_ctx) { + /* fix up head of list */ t->t_ctx = ctx->next; + } + ctx->next = ctx->prev = NULL; + mutex_exit(&t->t_ctx_lock); if (ctx->free_op != NULL) (ctx->free_op)(ctx->arg, 0); @@ -1116,44 +1159,70 @@ removectx( kpreempt_enable(); return (1); } - prev_ctx = ctx; - } + + ctx = ctx->next; + } while (ctx != head); + mutex_exit(&t->t_ctx_lock); kpreempt_enable(); - return (0); } void savectx(kthread_t *t) { - struct ctxop *ctx; - ASSERT(t == curthread); - for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next) - if (ctx->save_op != NULL) - (ctx->save_op)(ctx->arg); + + if (t->t_ctx != NULL) { + struct ctxop *ctx, *head; + + /* Forward traversal */ + ctx = head = t->t_ctx; + do { + if (ctx->save_op != NULL) { + ctx->save_ts = gethrtime_unscaled(); + (ctx->save_op)(ctx->arg); + } + ctx = ctx->next; + } while (ctx != head); + } } void restorectx(kthread_t *t) { - struct ctxop *ctx; - ASSERT(t == curthread); - for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next) - if (ctx->restore_op != NULL) - (ctx->restore_op)(ctx->arg); + + if (t->t_ctx != NULL) { + struct ctxop *ctx, *tail; + + /* Backward traversal (starting at the tail) */ + ctx = tail = t->t_ctx->prev; + do { + if (ctx->restore_op != NULL) { + ctx->restore_ts = gethrtime_unscaled(); + (ctx->restore_op)(ctx->arg); + } + ctx = ctx->prev; + } while (ctx != tail); + } } void forkctx(kthread_t *t, kthread_t *ct) { - struct ctxop *ctx; - - for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) - if (ctx->fork_op != NULL) - (ctx->fork_op)(t, ct); + if (t->t_ctx != NULL) { + struct ctxop *ctx, *head; + + /* Forward traversal */ + ctx = head = t->t_ctx; + do { + if (ctx->fork_op != NULL) { + (ctx->fork_op)(t, ct); + } + ctx = ctx->next; + } while (ctx != head); + } } /* @@ -1164,11 +1233,18 @@ forkctx(kthread_t *t, kthread_t *ct) void lwp_createctx(kthread_t *t, kthread_t *ct) { - struct ctxop *ctx; - - for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) - if (ctx->lwp_create_op != NULL) - (ctx->lwp_create_op)(t, ct); + if (t->t_ctx != NULL) { + struct ctxop *ctx, *head; + + /* Forward traversal */ + ctx = head = t->t_ctx; + do { + if (ctx->lwp_create_op != NULL) { + (ctx->lwp_create_op)(t, ct); + } + ctx = ctx->next; + } while (ctx != head); + } } /* @@ -1181,11 +1257,18 @@ lwp_createctx(kthread_t *t, kthread_t *ct) void exitctx(kthread_t *t) { - struct ctxop *ctx; - - for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) - if (ctx->exit_op != NULL) - (ctx->exit_op)(t); + if (t->t_ctx != NULL) { + struct ctxop *ctx, *head; + + /* Forward traversal */ + ctx = head = t->t_ctx; + do { + if (ctx->exit_op != NULL) { + (ctx->exit_op)(t); + } + ctx = ctx->next; + } while (ctx != head); + } } /* @@ -1195,14 +1278,21 @@ exitctx(kthread_t *t) void freectx(kthread_t *t, int isexec) { - struct ctxop *ctx; - kpreempt_disable(); - while ((ctx = t->t_ctx) != NULL) { - t->t_ctx = ctx->next; - if (ctx->free_op != NULL) - (ctx->free_op)(ctx->arg, isexec); - kmem_free(ctx, sizeof (struct ctxop)); + if (t->t_ctx != NULL) { + struct ctxop *ctx, *head; + + ctx = head = t->t_ctx; + t->t_ctx = NULL; + do { + struct ctxop *next = ctx->next; + + if (ctx->free_op != NULL) { + (ctx->free_op)(ctx->arg, isexec); + } + kmem_free(ctx, sizeof (struct ctxop)); + ctx = next; + } while (ctx != head); } kpreempt_enable(); } @@ -1217,17 +1307,22 @@ freectx(kthread_t *t, int isexec) void freectx_ctx(struct ctxop *ctx) { - struct ctxop *nctx; + struct ctxop *head = ctx; ASSERT(ctx != NULL); kpreempt_disable(); + + head = ctx; do { - nctx = ctx->next; - if (ctx->free_op != NULL) + struct ctxop *next = ctx->next; + + if (ctx->free_op != NULL) { (ctx->free_op)(ctx->arg, 0); + } kmem_free(ctx, sizeof (struct ctxop)); - } while ((ctx = nctx) != NULL); + ctx = next; + } while (ctx != head); kpreempt_enable(); } @@ -1326,6 +1421,8 @@ thread_unpin() itp = t->t_intr; /* interrupted thread */ t->t_intr = NULL; /* clear interrupt ptr */ + ht_end_intr(); + /* * Get state from interrupt thread for the one * it interrupted. @@ -1422,7 +1519,7 @@ thread_create_intr(struct cpu *cp) static kmutex_t tsd_mutex; /* linked list spin lock */ static uint_t tsd_nkeys; /* size of destructor array */ /* per-key destructor funcs */ -static void (**tsd_destructor)(void *); +static void (**tsd_destructor)(void *); /* list of tsd_thread's */ static struct tsd_thread *tsd_list; @@ -1889,6 +1986,103 @@ thread_change_pri(kthread_t *t, pri_t disp_pri, int front) return (on_rq); } + +/* + * There are occasions in the kernel when we need much more stack than we + * allocate by default, but we do not wish to have that work done + * asynchronously by another thread. To accommodate these scenarios, we allow + * for a split stack (also known as a "segmented stack") whereby a new stack + * is dynamically allocated and the current thread jumps onto it for purposes + * of executing the specified function. After the specified function returns, + * the stack is deallocated and control is returned to the caller. This + * functionality is implemented by thread_splitstack(), below; there are a few + * constraints on its use: + * + * - The caller must be in a context where it is safe to block for memory. + * - The caller cannot be in a t_onfault context + * - The called function must not call thread_exit() while on the split stack + * + * The code will explicitly panic if these constraints are violated. Notably, + * however, thread_splitstack() _can_ be called on a split stack -- there + * is no limit to the level that split stacks can nest. + * + * When the stack is split, it is constructed such that stack backtraces + * from kernel debuggers continue to function -- though note that DTrace's + * stack() action and stackdepth function will only show the stack up to and + * including thread_splitstack_run(); DTrace explicitly bounds itself to + * pointers that exist within the current declared stack as a safety + * mechanism. + */ +void +thread_splitstack(void (*func)(void *), void *arg, size_t stksize) +{ + kthread_t *t = curthread; + caddr_t ostk, ostkbase, stk; + ushort_t otflag; + + if (t->t_onfault != NULL) + panic("thread_splitstack: called with non-NULL t_onfault"); + + ostk = t->t_stk; + ostkbase = t->t_stkbase; + otflag = t->t_flag; + + stksize = roundup(stksize, PAGESIZE); + + if (stksize < default_stksize) + stksize = default_stksize; + + if (stksize == default_stksize) { + stk = (caddr_t)segkp_cache_get(segkp_thread); + } else { + stksize = roundup(stksize, PAGESIZE); + stk = (caddr_t)segkp_get(segkp, stksize, + (KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED)); + } + + /* + * We're going to lock ourselves before we set T_SPLITSTK to assure + * that we're not swapped out in the meantime. (Note that we don't + * bother to set t_swap, as we're not going to be swapped out.) + */ + thread_lock(t); + + if (!(otflag & T_SPLITSTK)) + t->t_flag |= T_SPLITSTK; + + t->t_stk = stk + stksize; + t->t_stkbase = stk; + + thread_unlock(t); + + /* + * Now actually run on the new (split) stack... + */ + thread_splitstack_run(t->t_stk, func, arg); + + /* + * We're back onto our own stack; lock ourselves and restore our + * pre-split state. + */ + thread_lock(t); + + t->t_stk = ostk; + t->t_stkbase = ostkbase; + + if (!(otflag & T_SPLITSTK)) + t->t_flag &= ~T_SPLITSTK; + + thread_unlock(t); + + /* + * Now that we are entirely back on our own stack, call back into + * the platform layer to perform any platform-specific cleanup. + */ + thread_splitstack_cleanup(); + + segkp_release(segkp, stk); +} + /* * Tunable kmem_stackinfo is set, fill the kernel thread stack with a * specific pattern. |