summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/disp/thread.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/disp/thread.c')
-rw-r--r--usr/src/uts/common/disp/thread.c372
1 files changed, 283 insertions, 89 deletions
diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c
index 854b33798d..d576738e75 100644
--- a/usr/src/uts/common/disp/thread.c
+++ b/usr/src/uts/common/disp/thread.c
@@ -75,6 +75,11 @@
#include <sys/cpucaps.h>
#include <sys/kiconv.h>
#include <sys/ctype.h>
+#include <sys/ht.h>
+
+#ifndef STACK_GROWTH_DOWN
+#error Stacks do not grow downward; 3b2 zombie attack detected!
+#endif
struct kmem_cache *thread_cache; /* cache of free threads */
struct kmem_cache *lwp_cache; /* cache of free lwps */
@@ -373,7 +378,7 @@ thread_create(
if (stksize <= sizeof (kthread_t) + PTR24_ALIGN)
cmn_err(CE_PANIC, "thread_create: proposed stack size"
" too small to hold thread.");
-#ifdef STACK_GROWTH_DOWN
+
stksize -= SA(sizeof (kthread_t) + PTR24_ALIGN - 1);
stksize &= -PTR24_ALIGN; /* make thread aligned */
t = (kthread_t *)(stk + stksize);
@@ -382,13 +387,6 @@ thread_create(
audit_thread_create(t);
t->t_stk = stk + stksize;
t->t_stkbase = stk;
-#else /* stack grows to larger addresses */
- stksize -= SA(sizeof (kthread_t));
- t = (kthread_t *)(stk);
- bzero(t, sizeof (kthread_t));
- t->t_stk = stk + sizeof (kthread_t);
- t->t_stkbase = stk + stksize + sizeof (kthread_t);
-#endif /* STACK_GROWTH_DOWN */
t->t_flag |= T_TALLOCSTK;
t->t_swap = stk;
} else {
@@ -401,13 +399,8 @@ thread_create(
* Initialize t_stk to the kernel stack pointer to use
* upon entry to the kernel
*/
-#ifdef STACK_GROWTH_DOWN
t->t_stk = stk + stksize;
t->t_stkbase = stk;
-#else
- t->t_stk = stk; /* 3b2-like */
- t->t_stkbase = stk + stksize;
-#endif /* STACK_GROWTH_DOWN */
}
if (kmem_stackinfo != 0) {
@@ -487,15 +480,9 @@ thread_create(
curthread->t_prev = t;
/*
- * Threads should never have a NULL t_cpu pointer so assign it
- * here. If the thread is being created with state TS_RUN a
- * better CPU may be chosen when it is placed on the run queue.
- *
- * We need to keep kernel preemption disabled when setting all
- * three fields to keep them in sync. Also, always create in
- * the default partition since that's where kernel threads go
- * (if this isn't a kernel thread, t_cpupart will be changed
- * in lwp_create before setting the thread runnable).
+ * We'll always create in the default partition since that's where
+ * kernel threads go (we'll change this later if needed, in
+ * lwp_create()).
*/
t->t_cpupart = &cp_default;
@@ -504,20 +491,23 @@ thread_create(
* Since the kernel does not (presently) allocate its memory
* in a locality aware fashion, the root is an appropriate home.
* If this thread is later associated with an lwp, it will have
- * it's lgroup re-assigned at that time.
+ * its lgroup re-assigned at that time.
*/
lgrp_move_thread(t, &cp_default.cp_lgrploads[LGRP_ROOTID], 1);
/*
- * Inherit the current cpu. If this cpu isn't part of the chosen
- * lgroup, a new cpu will be chosen by cpu_choose when the thread
- * is ready to run.
+ * If the current CPU is in the default cpupart, use it. Otherwise,
+ * pick one that is; before entering the dispatcher code, we'll
+ * make sure to keep the invariant that ->t_cpu is set. (In fact, we
+ * rely on this, in ht_should_run(), in the call tree of
+ * disp_lowpri_cpu().)
*/
- if (CPU->cpu_part == &cp_default)
+ if (CPU->cpu_part == &cp_default) {
t->t_cpu = CPU;
- else
- t->t_cpu = disp_lowpri_cpu(cp_default.cp_cpulist, t->t_lpl,
- t->t_pri, NULL);
+ } else {
+ t->t_cpu = cp_default.cp_cpulist;
+ t->t_cpu = disp_lowpri_cpu(t->t_cpu, t, t->t_pri);
+ }
t->t_disp_queue = t->t_cpu->cpu_disp;
kpreempt_enable();
@@ -590,6 +580,9 @@ thread_exit(void)
if ((t->t_proc_flag & TP_ZTHREAD) != 0)
cmn_err(CE_PANIC, "thread_exit: zthread_exit() not called");
+ if ((t->t_flag & T_SPLITSTK) != 0)
+ cmn_err(CE_PANIC, "thread_exit: called when stack is split");
+
tsd_exit(); /* Clean up this thread's TSD */
kcpc_passivate(); /* clean up performance counter state */
@@ -870,12 +863,12 @@ thread_zone_destroy(zoneid_t zoneid, void *unused)
/*
* Guard against race condition in mutex_owner_running:
- * thread=owner(mutex)
- * <interrupt>
- * thread exits mutex
- * thread exits
- * thread reaped
- * thread struct freed
+ * thread=owner(mutex)
+ * <interrupt>
+ * thread exits mutex
+ * thread exits
+ * thread reaped
+ * thread struct freed
* cpu = thread->t_cpu <- BAD POINTER DEREFERENCE.
* A cross call to all cpus will cause the interrupt handler
* to reset the PC if it is in mutex_owner_running, refreshing
@@ -932,12 +925,12 @@ thread_reaper()
/*
* Guard against race condition in mutex_owner_running:
- * thread=owner(mutex)
- * <interrupt>
- * thread exits mutex
- * thread exits
- * thread reaped
- * thread struct freed
+ * thread=owner(mutex)
+ * <interrupt>
+ * thread exits mutex
+ * thread exits
+ * thread reaped
+ * thread struct freed
* cpu = thread->t_cpu <- BAD POINTER DEREFERENCE.
* A cross call to all cpus will cause the interrupt handler
* to reset the PC if it is in mutex_owner_running, refreshing
@@ -1055,8 +1048,44 @@ installctx(
ctx->exit_op = exit;
ctx->free_op = free;
ctx->arg = arg;
- ctx->next = t->t_ctx;
+ ctx->save_ts = 0;
+ ctx->restore_ts = 0;
+
+ /*
+ * Keep ctxops in a doubly-linked list to allow traversal in both
+ * directions. Using only the newest-to-oldest ordering was adequate
+ * previously, but reversing the order for restore_op actions is
+ * necessary if later-added ctxops depends on earlier ones.
+ *
+ * One example of such a dependency: Hypervisor software handling the
+ * guest FPU expects that it save FPU state prior to host FPU handling
+ * and consequently handle the guest logic _after_ the host FPU has
+ * been restored.
+ *
+ * The t_ctx member points to the most recently added ctxop or is NULL
+ * if no ctxops are associated with the thread. The 'next' pointers
+ * form a loop of the ctxops in newest-to-oldest order. The 'prev'
+ * pointers form a loop in the reverse direction, where t_ctx->prev is
+ * the oldest entry associated with the thread.
+ *
+ * The protection of kpreempt_disable is required to safely perform the
+ * list insertion, since there are inconsistent states between some of
+ * the pointer assignments.
+ */
+ kpreempt_disable();
+ if (t->t_ctx == NULL) {
+ ctx->next = ctx;
+ ctx->prev = ctx;
+ } else {
+ struct ctxop *head = t->t_ctx, *tail = t->t_ctx->prev;
+
+ ctx->next = head;
+ ctx->prev = tail;
+ head->prev = ctx;
+ tail->next = ctx;
+ }
t->t_ctx = ctx;
+ kpreempt_enable();
}
/*
@@ -1073,7 +1102,7 @@ removectx(
void (*exit)(void *),
void (*free)(void *, int))
{
- struct ctxop *ctx, *prev_ctx;
+ struct ctxop *ctx, *head;
/*
* The incoming kthread_t (which is the thread for which the
@@ -1098,17 +1127,31 @@ removectx(
* and the target thread from racing with each other during lwp exit.
*/
mutex_enter(&t->t_ctx_lock);
- prev_ctx = NULL;
kpreempt_disable();
- for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) {
+
+ if (t->t_ctx == NULL) {
+ mutex_exit(&t->t_ctx_lock);
+ kpreempt_enable();
+ return (0);
+ }
+
+ ctx = head = t->t_ctx;
+ do {
if (ctx->save_op == save && ctx->restore_op == restore &&
ctx->fork_op == fork && ctx->lwp_create_op == lwp_create &&
ctx->exit_op == exit && ctx->free_op == free &&
ctx->arg == arg) {
- if (prev_ctx)
- prev_ctx->next = ctx->next;
- else
+ ctx->prev->next = ctx->next;
+ ctx->next->prev = ctx->prev;
+ if (ctx->next == ctx) {
+ /* last remaining item */
+ t->t_ctx = NULL;
+ } else if (ctx == t->t_ctx) {
+ /* fix up head of list */
t->t_ctx = ctx->next;
+ }
+ ctx->next = ctx->prev = NULL;
+
mutex_exit(&t->t_ctx_lock);
if (ctx->free_op != NULL)
(ctx->free_op)(ctx->arg, 0);
@@ -1116,44 +1159,70 @@ removectx(
kpreempt_enable();
return (1);
}
- prev_ctx = ctx;
- }
+
+ ctx = ctx->next;
+ } while (ctx != head);
+
mutex_exit(&t->t_ctx_lock);
kpreempt_enable();
-
return (0);
}
void
savectx(kthread_t *t)
{
- struct ctxop *ctx;
-
ASSERT(t == curthread);
- for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
- if (ctx->save_op != NULL)
- (ctx->save_op)(ctx->arg);
+
+ if (t->t_ctx != NULL) {
+ struct ctxop *ctx, *head;
+
+ /* Forward traversal */
+ ctx = head = t->t_ctx;
+ do {
+ if (ctx->save_op != NULL) {
+ ctx->save_ts = gethrtime_unscaled();
+ (ctx->save_op)(ctx->arg);
+ }
+ ctx = ctx->next;
+ } while (ctx != head);
+ }
}
void
restorectx(kthread_t *t)
{
- struct ctxop *ctx;
-
ASSERT(t == curthread);
- for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
- if (ctx->restore_op != NULL)
- (ctx->restore_op)(ctx->arg);
+
+ if (t->t_ctx != NULL) {
+ struct ctxop *ctx, *tail;
+
+ /* Backward traversal (starting at the tail) */
+ ctx = tail = t->t_ctx->prev;
+ do {
+ if (ctx->restore_op != NULL) {
+ ctx->restore_ts = gethrtime_unscaled();
+ (ctx->restore_op)(ctx->arg);
+ }
+ ctx = ctx->prev;
+ } while (ctx != tail);
+ }
}
void
forkctx(kthread_t *t, kthread_t *ct)
{
- struct ctxop *ctx;
-
- for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
- if (ctx->fork_op != NULL)
- (ctx->fork_op)(t, ct);
+ if (t->t_ctx != NULL) {
+ struct ctxop *ctx, *head;
+
+ /* Forward traversal */
+ ctx = head = t->t_ctx;
+ do {
+ if (ctx->fork_op != NULL) {
+ (ctx->fork_op)(t, ct);
+ }
+ ctx = ctx->next;
+ } while (ctx != head);
+ }
}
/*
@@ -1164,11 +1233,18 @@ forkctx(kthread_t *t, kthread_t *ct)
void
lwp_createctx(kthread_t *t, kthread_t *ct)
{
- struct ctxop *ctx;
-
- for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
- if (ctx->lwp_create_op != NULL)
- (ctx->lwp_create_op)(t, ct);
+ if (t->t_ctx != NULL) {
+ struct ctxop *ctx, *head;
+
+ /* Forward traversal */
+ ctx = head = t->t_ctx;
+ do {
+ if (ctx->lwp_create_op != NULL) {
+ (ctx->lwp_create_op)(t, ct);
+ }
+ ctx = ctx->next;
+ } while (ctx != head);
+ }
}
/*
@@ -1181,11 +1257,18 @@ lwp_createctx(kthread_t *t, kthread_t *ct)
void
exitctx(kthread_t *t)
{
- struct ctxop *ctx;
-
- for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
- if (ctx->exit_op != NULL)
- (ctx->exit_op)(t);
+ if (t->t_ctx != NULL) {
+ struct ctxop *ctx, *head;
+
+ /* Forward traversal */
+ ctx = head = t->t_ctx;
+ do {
+ if (ctx->exit_op != NULL) {
+ (ctx->exit_op)(t);
+ }
+ ctx = ctx->next;
+ } while (ctx != head);
+ }
}
/*
@@ -1195,14 +1278,21 @@ exitctx(kthread_t *t)
void
freectx(kthread_t *t, int isexec)
{
- struct ctxop *ctx;
-
kpreempt_disable();
- while ((ctx = t->t_ctx) != NULL) {
- t->t_ctx = ctx->next;
- if (ctx->free_op != NULL)
- (ctx->free_op)(ctx->arg, isexec);
- kmem_free(ctx, sizeof (struct ctxop));
+ if (t->t_ctx != NULL) {
+ struct ctxop *ctx, *head;
+
+ ctx = head = t->t_ctx;
+ t->t_ctx = NULL;
+ do {
+ struct ctxop *next = ctx->next;
+
+ if (ctx->free_op != NULL) {
+ (ctx->free_op)(ctx->arg, isexec);
+ }
+ kmem_free(ctx, sizeof (struct ctxop));
+ ctx = next;
+ } while (ctx != head);
}
kpreempt_enable();
}
@@ -1217,17 +1307,22 @@ freectx(kthread_t *t, int isexec)
void
freectx_ctx(struct ctxop *ctx)
{
- struct ctxop *nctx;
+ struct ctxop *head = ctx;
ASSERT(ctx != NULL);
kpreempt_disable();
+
+ head = ctx;
do {
- nctx = ctx->next;
- if (ctx->free_op != NULL)
+ struct ctxop *next = ctx->next;
+
+ if (ctx->free_op != NULL) {
(ctx->free_op)(ctx->arg, 0);
+ }
kmem_free(ctx, sizeof (struct ctxop));
- } while ((ctx = nctx) != NULL);
+ ctx = next;
+ } while (ctx != head);
kpreempt_enable();
}
@@ -1326,6 +1421,8 @@ thread_unpin()
itp = t->t_intr; /* interrupted thread */
t->t_intr = NULL; /* clear interrupt ptr */
+ ht_end_intr();
+
/*
* Get state from interrupt thread for the one
* it interrupted.
@@ -1422,7 +1519,7 @@ thread_create_intr(struct cpu *cp)
static kmutex_t tsd_mutex; /* linked list spin lock */
static uint_t tsd_nkeys; /* size of destructor array */
/* per-key destructor funcs */
-static void (**tsd_destructor)(void *);
+static void (**tsd_destructor)(void *);
/* list of tsd_thread's */
static struct tsd_thread *tsd_list;
@@ -1889,6 +1986,103 @@ thread_change_pri(kthread_t *t, pri_t disp_pri, int front)
return (on_rq);
}
+
+/*
+ * There are occasions in the kernel when we need much more stack than we
+ * allocate by default, but we do not wish to have that work done
+ * asynchronously by another thread. To accommodate these scenarios, we allow
+ * for a split stack (also known as a "segmented stack") whereby a new stack
+ * is dynamically allocated and the current thread jumps onto it for purposes
+ * of executing the specified function. After the specified function returns,
+ * the stack is deallocated and control is returned to the caller. This
+ * functionality is implemented by thread_splitstack(), below; there are a few
+ * constraints on its use:
+ *
+ * - The caller must be in a context where it is safe to block for memory.
+ * - The caller cannot be in a t_onfault context
+ * - The called function must not call thread_exit() while on the split stack
+ *
+ * The code will explicitly panic if these constraints are violated. Notably,
+ * however, thread_splitstack() _can_ be called on a split stack -- there
+ * is no limit to the level that split stacks can nest.
+ *
+ * When the stack is split, it is constructed such that stack backtraces
+ * from kernel debuggers continue to function -- though note that DTrace's
+ * stack() action and stackdepth function will only show the stack up to and
+ * including thread_splitstack_run(); DTrace explicitly bounds itself to
+ * pointers that exist within the current declared stack as a safety
+ * mechanism.
+ */
+void
+thread_splitstack(void (*func)(void *), void *arg, size_t stksize)
+{
+ kthread_t *t = curthread;
+ caddr_t ostk, ostkbase, stk;
+ ushort_t otflag;
+
+ if (t->t_onfault != NULL)
+ panic("thread_splitstack: called with non-NULL t_onfault");
+
+ ostk = t->t_stk;
+ ostkbase = t->t_stkbase;
+ otflag = t->t_flag;
+
+ stksize = roundup(stksize, PAGESIZE);
+
+ if (stksize < default_stksize)
+ stksize = default_stksize;
+
+ if (stksize == default_stksize) {
+ stk = (caddr_t)segkp_cache_get(segkp_thread);
+ } else {
+ stksize = roundup(stksize, PAGESIZE);
+ stk = (caddr_t)segkp_get(segkp, stksize,
+ (KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED));
+ }
+
+ /*
+ * We're going to lock ourselves before we set T_SPLITSTK to assure
+ * that we're not swapped out in the meantime. (Note that we don't
+ * bother to set t_swap, as we're not going to be swapped out.)
+ */
+ thread_lock(t);
+
+ if (!(otflag & T_SPLITSTK))
+ t->t_flag |= T_SPLITSTK;
+
+ t->t_stk = stk + stksize;
+ t->t_stkbase = stk;
+
+ thread_unlock(t);
+
+ /*
+ * Now actually run on the new (split) stack...
+ */
+ thread_splitstack_run(t->t_stk, func, arg);
+
+ /*
+ * We're back onto our own stack; lock ourselves and restore our
+ * pre-split state.
+ */
+ thread_lock(t);
+
+ t->t_stk = ostk;
+ t->t_stkbase = ostkbase;
+
+ if (!(otflag & T_SPLITSTK))
+ t->t_flag &= ~T_SPLITSTK;
+
+ thread_unlock(t);
+
+ /*
+ * Now that we are entirely back on our own stack, call back into
+ * the platform layer to perform any platform-specific cleanup.
+ */
+ thread_splitstack_cleanup();
+
+ segkp_release(segkp, stk);
+}
+
/*
* Tunable kmem_stackinfo is set, fill the kernel thread stack with a
* specific pattern.