summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/disp/thread.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/disp/thread.c')
-rw-r--r--usr/src/uts/common/disp/thread.c372
1 files changed, 310 insertions, 62 deletions
diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c
index f2685af534..c923ba5d1a 100644
--- a/usr/src/uts/common/disp/thread.c
+++ b/usr/src/uts/common/disp/thread.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2018 Joyent, Inc.
*/
#include <sys/types.h>
@@ -74,6 +74,11 @@
#include <sys/waitq.h>
#include <sys/cpucaps.h>
#include <sys/kiconv.h>
+#include <sys/ht.h>
+
+#ifndef STACK_GROWTH_DOWN
+#error Stacks do not grow downward; 3b2 zombie attack detected!
+#endif
struct kmem_cache *thread_cache; /* cache of free threads */
struct kmem_cache *lwp_cache; /* cache of free lwps */
@@ -372,7 +377,7 @@ thread_create(
if (stksize <= sizeof (kthread_t) + PTR24_ALIGN)
cmn_err(CE_PANIC, "thread_create: proposed stack size"
" too small to hold thread.");
-#ifdef STACK_GROWTH_DOWN
+
stksize -= SA(sizeof (kthread_t) + PTR24_ALIGN - 1);
stksize &= -PTR24_ALIGN; /* make thread aligned */
t = (kthread_t *)(stk + stksize);
@@ -381,13 +386,6 @@ thread_create(
audit_thread_create(t);
t->t_stk = stk + stksize;
t->t_stkbase = stk;
-#else /* stack grows to larger addresses */
- stksize -= SA(sizeof (kthread_t));
- t = (kthread_t *)(stk);
- bzero(t, sizeof (kthread_t));
- t->t_stk = stk + sizeof (kthread_t);
- t->t_stkbase = stk + stksize + sizeof (kthread_t);
-#endif /* STACK_GROWTH_DOWN */
t->t_flag |= T_TALLOCSTK;
t->t_swap = stk;
} else {
@@ -400,13 +398,8 @@ thread_create(
* Initialize t_stk to the kernel stack pointer to use
* upon entry to the kernel
*/
-#ifdef STACK_GROWTH_DOWN
t->t_stk = stk + stksize;
t->t_stkbase = stk;
-#else
- t->t_stk = stk; /* 3b2-like */
- t->t_stkbase = stk + stksize;
-#endif /* STACK_GROWTH_DOWN */
}
if (kmem_stackinfo != 0) {
@@ -515,8 +508,8 @@ thread_create(
if (CPU->cpu_part == &cp_default)
t->t_cpu = CPU;
else
- t->t_cpu = disp_lowpri_cpu(cp_default.cp_cpulist, t->t_lpl,
- t->t_pri, NULL);
+ t->t_cpu = disp_lowpri_cpu(cp_default.cp_cpulist, t,
+ t->t_pri);
t->t_disp_queue = t->t_cpu->cpu_disp;
kpreempt_enable();
@@ -589,6 +582,9 @@ thread_exit(void)
if ((t->t_proc_flag & TP_ZTHREAD) != 0)
cmn_err(CE_PANIC, "thread_exit: zthread_exit() not called");
+ if ((t->t_flag & T_SPLITSTK) != 0)
+ cmn_err(CE_PANIC, "thread_exit: called when stack is split");
+
tsd_exit(); /* Clean up this thread's TSD */
kcpc_passivate(); /* clean up performance counter state */
@@ -791,6 +787,11 @@ thread_free(kthread_t *t)
nthread--;
mutex_exit(&pidlock);
+ if (t->t_name != NULL) {
+ kmem_free(t->t_name, THREAD_NAME_MAX);
+ t->t_name = NULL;
+ }
+
/*
* Free thread, lwp and stack. This needs to be done carefully, since
* if T_TALLOCSTK is set, the thread is part of the stack.
@@ -1049,8 +1050,44 @@ installctx(
ctx->exit_op = exit;
ctx->free_op = free;
ctx->arg = arg;
- ctx->next = t->t_ctx;
+ ctx->save_ts = 0;
+ ctx->restore_ts = 0;
+
+ /*
+ * Keep ctxops in a doubly-linked list to allow traversal in both
+ * directions. Using only the newest-to-oldest ordering was adequate
+ * previously, but reversing the order for restore_op actions is
+ * necessary if later-added ctxops depends on earlier ones.
+ *
+ * One example of such a dependency: Hypervisor software handling the
+ * guest FPU expects that it save FPU state prior to host FPU handling
+ * and consequently handle the guest logic _after_ the host FPU has
+ * been restored.
+ *
+ * The t_ctx member points to the most recently added ctxop or is NULL
+ * if no ctxops are associated with the thread. The 'next' pointers
+ * form a loop of the ctxops in newest-to-oldest order. The 'prev'
+ * pointers form a loop in the reverse direction, where t_ctx->prev is
+ * the oldest entry associated with the thread.
+ *
+ * The protection of kpreempt_disable is required to safely perform the
+ * list insertion, since there are inconsistent states between some of
+ * the pointer assignments.
+ */
+ kpreempt_disable();
+ if (t->t_ctx == NULL) {
+ ctx->next = ctx;
+ ctx->prev = ctx;
+ } else {
+ struct ctxop *head = t->t_ctx, *tail = t->t_ctx->prev;
+
+ ctx->next = head;
+ ctx->prev = tail;
+ head->prev = ctx;
+ tail->next = ctx;
+ }
t->t_ctx = ctx;
+ kpreempt_enable();
}
/*
@@ -1067,7 +1104,7 @@ removectx(
void (*exit)(void *),
void (*free)(void *, int))
{
- struct ctxop *ctx, *prev_ctx;
+ struct ctxop *ctx, *head;
/*
* The incoming kthread_t (which is the thread for which the
@@ -1092,17 +1129,31 @@ removectx(
* and the target thread from racing with each other during lwp exit.
*/
mutex_enter(&t->t_ctx_lock);
- prev_ctx = NULL;
kpreempt_disable();
- for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) {
+
+ if (t->t_ctx == NULL) {
+ mutex_exit(&t->t_ctx_lock);
+ kpreempt_enable();
+ return (0);
+ }
+
+ ctx = head = t->t_ctx;
+ do {
if (ctx->save_op == save && ctx->restore_op == restore &&
ctx->fork_op == fork && ctx->lwp_create_op == lwp_create &&
ctx->exit_op == exit && ctx->free_op == free &&
ctx->arg == arg) {
- if (prev_ctx)
- prev_ctx->next = ctx->next;
- else
+ ctx->prev->next = ctx->next;
+ ctx->next->prev = ctx->prev;
+ if (ctx->next == ctx) {
+ /* last remaining item */
+ t->t_ctx = NULL;
+ } else if (ctx == t->t_ctx) {
+ /* fix up head of list */
t->t_ctx = ctx->next;
+ }
+ ctx->next = ctx->prev = NULL;
+
mutex_exit(&t->t_ctx_lock);
if (ctx->free_op != NULL)
(ctx->free_op)(ctx->arg, 0);
@@ -1110,44 +1161,70 @@ removectx(
kpreempt_enable();
return (1);
}
- prev_ctx = ctx;
- }
+
+ ctx = ctx->next;
+ } while (ctx != head);
+
mutex_exit(&t->t_ctx_lock);
kpreempt_enable();
-
return (0);
}
void
savectx(kthread_t *t)
{
- struct ctxop *ctx;
-
ASSERT(t == curthread);
- for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
- if (ctx->save_op != NULL)
- (ctx->save_op)(ctx->arg);
+
+ if (t->t_ctx != NULL) {
+ struct ctxop *ctx, *head;
+
+ /* Forward traversal */
+ ctx = head = t->t_ctx;
+ do {
+ if (ctx->save_op != NULL) {
+ ctx->save_ts = gethrtime_unscaled();
+ (ctx->save_op)(ctx->arg);
+ }
+ ctx = ctx->next;
+ } while (ctx != head);
+ }
}
void
restorectx(kthread_t *t)
{
- struct ctxop *ctx;
-
ASSERT(t == curthread);
- for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
- if (ctx->restore_op != NULL)
- (ctx->restore_op)(ctx->arg);
+
+ if (t->t_ctx != NULL) {
+ struct ctxop *ctx, *tail;
+
+ /* Backward traversal (starting at the tail) */
+ ctx = tail = t->t_ctx->prev;
+ do {
+ if (ctx->restore_op != NULL) {
+ ctx->restore_ts = gethrtime_unscaled();
+ (ctx->restore_op)(ctx->arg);
+ }
+ ctx = ctx->prev;
+ } while (ctx != tail);
+ }
}
void
forkctx(kthread_t *t, kthread_t *ct)
{
- struct ctxop *ctx;
-
- for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
- if (ctx->fork_op != NULL)
- (ctx->fork_op)(t, ct);
+ if (t->t_ctx != NULL) {
+ struct ctxop *ctx, *head;
+
+ /* Forward traversal */
+ ctx = head = t->t_ctx;
+ do {
+ if (ctx->fork_op != NULL) {
+ (ctx->fork_op)(t, ct);
+ }
+ ctx = ctx->next;
+ } while (ctx != head);
+ }
}
/*
@@ -1158,11 +1235,18 @@ forkctx(kthread_t *t, kthread_t *ct)
void
lwp_createctx(kthread_t *t, kthread_t *ct)
{
- struct ctxop *ctx;
-
- for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
- if (ctx->lwp_create_op != NULL)
- (ctx->lwp_create_op)(t, ct);
+ if (t->t_ctx != NULL) {
+ struct ctxop *ctx, *head;
+
+ /* Forward traversal */
+ ctx = head = t->t_ctx;
+ do {
+ if (ctx->lwp_create_op != NULL) {
+ (ctx->lwp_create_op)(t, ct);
+ }
+ ctx = ctx->next;
+ } while (ctx != head);
+ }
}
/*
@@ -1175,11 +1259,18 @@ lwp_createctx(kthread_t *t, kthread_t *ct)
void
exitctx(kthread_t *t)
{
- struct ctxop *ctx;
-
- for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
- if (ctx->exit_op != NULL)
- (ctx->exit_op)(t);
+ if (t->t_ctx != NULL) {
+ struct ctxop *ctx, *head;
+
+ /* Forward traversal */
+ ctx = head = t->t_ctx;
+ do {
+ if (ctx->exit_op != NULL) {
+ (ctx->exit_op)(t);
+ }
+ ctx = ctx->next;
+ } while (ctx != head);
+ }
}
/*
@@ -1189,14 +1280,21 @@ exitctx(kthread_t *t)
void
freectx(kthread_t *t, int isexec)
{
- struct ctxop *ctx;
-
kpreempt_disable();
- while ((ctx = t->t_ctx) != NULL) {
- t->t_ctx = ctx->next;
- if (ctx->free_op != NULL)
- (ctx->free_op)(ctx->arg, isexec);
- kmem_free(ctx, sizeof (struct ctxop));
+ if (t->t_ctx != NULL) {
+ struct ctxop *ctx, *head;
+
+ ctx = head = t->t_ctx;
+ t->t_ctx = NULL;
+ do {
+ struct ctxop *next = ctx->next;
+
+ if (ctx->free_op != NULL) {
+ (ctx->free_op)(ctx->arg, isexec);
+ }
+ kmem_free(ctx, sizeof (struct ctxop));
+ ctx = next;
+ } while (ctx != head);
}
kpreempt_enable();
}
@@ -1211,17 +1309,22 @@ freectx(kthread_t *t, int isexec)
void
freectx_ctx(struct ctxop *ctx)
{
- struct ctxop *nctx;
+ struct ctxop *head = ctx;
ASSERT(ctx != NULL);
kpreempt_disable();
+
+ head = ctx;
do {
- nctx = ctx->next;
- if (ctx->free_op != NULL)
+ struct ctxop *next = ctx->next;
+
+ if (ctx->free_op != NULL) {
(ctx->free_op)(ctx->arg, 0);
+ }
kmem_free(ctx, sizeof (struct ctxop));
- } while ((ctx = nctx) != NULL);
+ ctx = next;
+ } while (ctx != head);
kpreempt_enable();
}
@@ -1320,6 +1423,8 @@ thread_unpin()
itp = t->t_intr; /* interrupted thread */
t->t_intr = NULL; /* clear interrupt ptr */
+ ht_end_intr();
+
/*
* Get state from interrupt thread for the one
* it interrupted.
@@ -1883,6 +1988,103 @@ thread_change_pri(kthread_t *t, pri_t disp_pri, int front)
return (on_rq);
}
+
+/*
+ * There are occasions in the kernel when we need much more stack than we
+ * allocate by default, but we do not wish to have that work done
+ * asynchronously by another thread. To accommodate these scenarios, we allow
+ * for a split stack (also known as a "segmented stack") whereby a new stack
+ * is dynamically allocated and the current thread jumps onto it for purposes
+ * of executing the specified function. After the specified function returns,
+ * the stack is deallocated and control is returned to the caller. This
+ * functionality is implemented by thread_splitstack(), below; there are a few
+ * constraints on its use:
+ *
+ * - The caller must be in a context where it is safe to block for memory.
+ * - The caller cannot be in a t_onfault context
+ * - The called function must not call thread_exit() while on the split stack
+ *
+ * The code will explicitly panic if these constraints are violated. Notably,
+ * however, thread_splitstack() _can_ be called on a split stack -- there
+ * is no limit to the level that split stacks can nest.
+ *
+ * When the stack is split, it is constructed such that stack backtraces
+ * from kernel debuggers continue to function -- though note that DTrace's
+ * stack() action and stackdepth function will only show the stack up to and
+ * including thread_splitstack_run(); DTrace explicitly bounds itself to
+ * pointers that exist within the current declared stack as a safety
+ * mechanism.
+ */
+void
+thread_splitstack(void (*func)(void *), void *arg, size_t stksize)
+{
+ kthread_t *t = curthread;
+ caddr_t ostk, ostkbase, stk;
+ ushort_t otflag;
+
+ if (t->t_onfault != NULL)
+ panic("thread_splitstack: called with non-NULL t_onfault");
+
+ ostk = t->t_stk;
+ ostkbase = t->t_stkbase;
+ otflag = t->t_flag;
+
+ stksize = roundup(stksize, PAGESIZE);
+
+ if (stksize < default_stksize)
+ stksize = default_stksize;
+
+ if (stksize == default_stksize) {
+ stk = (caddr_t)segkp_cache_get(segkp_thread);
+ } else {
+ stksize = roundup(stksize, PAGESIZE);
+ stk = (caddr_t)segkp_get(segkp, stksize,
+ (KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED));
+ }
+
+ /*
+ * We're going to lock ourselves before we set T_SPLITSTK to assure
+ * that we're not swapped out in the meantime. (Note that we don't
+ * bother to set t_swap, as we're not going to be swapped out.)
+ */
+ thread_lock(t);
+
+ if (!(otflag & T_SPLITSTK))
+ t->t_flag |= T_SPLITSTK;
+
+ t->t_stk = stk + stksize;
+ t->t_stkbase = stk;
+
+ thread_unlock(t);
+
+ /*
+ * Now actually run on the new (split) stack...
+ */
+ thread_splitstack_run(t->t_stk, func, arg);
+
+ /*
+ * We're back onto our own stack; lock ourselves and restore our
+ * pre-split state.
+ */
+ thread_lock(t);
+
+ t->t_stk = ostk;
+ t->t_stkbase = ostkbase;
+
+ if (!(otflag & T_SPLITSTK))
+ t->t_flag &= ~T_SPLITSTK;
+
+ thread_unlock(t);
+
+ /*
+ * Now that we are entirely back on our own stack, call back into
+ * the platform layer to perform any platform-specific cleanup.
+ */
+ thread_splitstack_cleanup();
+
+ segkp_release(segkp, stk);
+}
+
/*
* Tunable kmem_stackinfo is set, fill the kernel thread stack with a
* specific pattern.
@@ -2127,3 +2329,49 @@ stkinfo_percent(caddr_t t_stk, caddr_t t_stkbase, caddr_t sp)
}
return (percent);
}
+
+/*
+ * NOTE: This will silently truncate a name > THREAD_NAME_MAX - 1 characters
+ * long. It is expected that callers (acting on behalf of userland clients)
+ * will perform any required checks to return the correct error semantics.
+ * It is also expected callers on behalf of userland clients have done
+ * any necessary permission checks.
+ */
+void
+thread_setname(kthread_t *t, const char *name)
+{
+ char *buf = NULL;
+
+ /*
+ * We optimistically assume that a thread's name will only be set
+ * once and so allocate memory in preparation of setting t_name.
+ * If it turns out a name has already been set, we just discard (free)
+ * the buffer we just allocated and reuse the current buffer
+ * (as all should be THREAD_NAME_MAX large).
+ *
+ * Such an arrangement means over the lifetime of a kthread_t, t_name
+ * is either NULL or has one value (the address of the buffer holding
+ * the current thread name). The assumption is that most kthread_t
+ * instances will not have a name assigned, so dynamically allocating
+ * the memory should minimize the footprint of this feature, but by
+ * having the buffer persist for the life of the thread, it simplifies
+ * usage in highly constrained situations (e.g. dtrace).
+ */
+ if (name != NULL && name[0] != '\0') {
+ buf = kmem_zalloc(THREAD_NAME_MAX, KM_SLEEP);
+ (void) strlcpy(buf, name, THREAD_NAME_MAX);
+ }
+
+ mutex_enter(&ttoproc(t)->p_lock);
+ if (t->t_name == NULL) {
+ t->t_name = buf;
+ } else {
+ if (buf != NULL) {
+ (void) strlcpy(t->t_name, name, THREAD_NAME_MAX);
+ kmem_free(buf, THREAD_NAME_MAX);
+ } else {
+ bzero(t->t_name, THREAD_NAME_MAX);
+ }
+ }
+ mutex_exit(&ttoproc(t)->p_lock);
+}