summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/disp/thread.c78
-rw-r--r--usr/src/uts/common/os/cpu.c8
-rw-r--r--usr/src/uts/common/os/lwp.c23
-rw-r--r--usr/src/uts/common/os/mutex.c282
-rw-r--r--usr/src/uts/common/sys/mutex.h13
-rw-r--r--usr/src/uts/i86pc/os/intr.c17
-rw-r--r--usr/src/uts/intel/ia32/ml/lock_prim.s101
-rw-r--r--usr/src/uts/intel/sys/mutex_impl.h30
-rw-r--r--usr/src/uts/sparc/v9/ml/lock_prim.s97
-rw-r--r--usr/src/uts/sparc/v9/sys/mutex_impl.h24
-rw-r--r--usr/src/uts/sun4u/ml/mach_locore.s19
-rw-r--r--usr/src/uts/sun4u/opl/os/opl.c91
-rw-r--r--usr/src/uts/sun4u/serengeti/os/serengeti.c5
-rw-r--r--usr/src/uts/sun4u/starcat/os/starcat.c5
-rw-r--r--usr/src/uts/sun4v/cpu/niagara.c23
-rw-r--r--usr/src/uts/sun4v/cpu/niagara2.c5
-rw-r--r--usr/src/uts/sun4v/ml/mach_locore.s19
-rw-r--r--usr/src/uts/sun4v/os/fillsysinfo.c39
-rw-r--r--usr/src/uts/sun4v/os/mach_startup.c10
-rw-r--r--usr/src/uts/sun4v/sys/cpu_module.h4
20 files changed, 660 insertions, 233 deletions
diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c
index ee2d80834d..928b594602 100644
--- a/usr/src/uts/common/disp/thread.c
+++ b/usr/src/uts/common/disp/thread.c
@@ -842,6 +842,21 @@ thread_zone_destroy(zoneid_t zoneid, void *unused)
mutex_exit(&reaplock);
/*
+ * Guard against race condition in mutex_owner_running:
+ * thread=owner(mutex)
+ * <interrupt>
+ * thread exits mutex
+ * thread exits
+ * thread reaped
+ * thread struct freed
+ * cpu = thread->t_cpu <- BAD POINTER DEREFERENCE.
+ * A cross call to all cpus will cause the interrupt handler
+ * to reset the PC if it is in mutex_owner_running, refreshing
+ * stale thread pointers.
+ */
+ mutex_sync(); /* sync with mutex code */
+
+ /*
* Reap threads
*/
thread_reap_list(t);
@@ -874,6 +889,12 @@ thread_reaper()
cv_wait(&reaper_cv, &reaplock);
CALLB_CPR_SAFE_END(&cprinfo, &reaplock);
}
+ /*
+ * mutex_sync() needs to be called when reaping, but
+ * not too often. We limit reaping rate to once
+ * per second. Reaplimit is max rate at which threads can
+ * be freed. Does not impact thread destruction/creation.
+ */
t = thread_deathrow;
l = lwp_deathrow;
thread_deathrow = NULL;
@@ -883,6 +904,20 @@ thread_reaper()
mutex_exit(&reaplock);
/*
+ * Guard against race condition in mutex_owner_running:
+ * thread=owner(mutex)
+ * <interrupt>
+ * thread exits mutex
+ * thread exits
+ * thread reaped
+ * thread struct freed
+ * cpu = thread->t_cpu <- BAD POINTER DEREFERENCE.
+ * A cross call to all cpus will cause the interrupt handler
+ * to reset the PC if it is in mutex_owner_running, refreshing
+ * stale thread pointers.
+ */
+ mutex_sync(); /* sync with mutex code */
+ /*
* Reap threads
*/
thread_reap_list(t);
@@ -891,13 +926,32 @@ thread_reaper()
* Reap lwps
*/
thread_reap_list(l);
+ delay(hz);
}
}
/*
+ * This is called by lwpcreate, etc.() to put a lwp_deathrow thread onto
+ * thread_deathrow. The thread's state is changed already TS_FREE to indicate
+ * that is reapable. The thread already holds the reaplock, and was already
+ * freed.
+ */
+void
+reapq_move_lq_to_tq(kthread_t *t)
+{
+ ASSERT(t->t_state == TS_FREE);
+ ASSERT(MUTEX_HELD(&reaplock));
+ t->t_forw = thread_deathrow;
+ thread_deathrow = t;
+ thread_reapcnt++;
+ if (lwp_reapcnt + thread_reapcnt > reaplimit)
+ cv_signal(&reaper_cv); /* wake the reaper */
+}
+
+/*
* This is called by resume() to put a zombie thread onto deathrow.
* The thread's state is changed to TS_FREE to indicate that is reapable.
- * This is called from the idle thread so it must not block (just spin).
+ * This is called from the idle thread so it must not block - just spin.
*/
void
reapq_add(kthread_t *t)
@@ -1118,6 +1172,28 @@ freectx(kthread_t *t, int isexec)
}
/*
+ * freectx_ctx is called from lwp_create() when lwp is reused from
+ * lwp_deathrow and its thread structure is added to thread_deathrow.
+ * The thread structure to which this ctx was attached may be already
+ * freed by the thread reaper so free_op implementations shouldn't rely
+ * on thread structure to which this ctx was attached still being around.
+ */
+void
+freectx_ctx(struct ctxop *ctx)
+{
+ struct ctxop *nctx;
+
+ ASSERT(ctx != NULL);
+
+ do {
+ nctx = ctx->next;
+ if (ctx->free_op != NULL)
+ (ctx->free_op)(ctx->arg, 0);
+ kmem_free(ctx, sizeof (struct ctxop));
+ } while ((ctx = nctx) != NULL);
+}
+
+/*
* Set the thread running; arrange for it to be swapped in if necessary.
*/
void
diff --git a/usr/src/uts/common/os/cpu.c b/usr/src/uts/common/os/cpu.c
index 13cf752b45..92286f7163 100644
--- a/usr/src/uts/common/os/cpu.c
+++ b/usr/src/uts/common/os/cpu.c
@@ -58,7 +58,7 @@
#include <sys/msacct.h>
#include <sys/time.h>
#include <sys/archsystm.h>
-#if defined(__x86)
+#if defined(__x86) || defined(__amd64)
#include <sys/x86_archext.h>
#endif
@@ -728,6 +728,11 @@ weakbinding_start(void)
weakbindingbarrier = 0;
}
+void
+null_xcall(void)
+{
+}
+
/*
* This routine is called to place the CPUs in a safe place so that
* one of them can be taken off line or placed on line. What we are
@@ -2797,6 +2802,7 @@ cpu_destroy_bound_threads(cpu_t *cp)
mutex_exit(&pidlock);
+ mutex_sync();
for (t = tlist; t != NULL; t = tnext) {
tnext = t->t_next;
thread_free(t);
diff --git a/usr/src/uts/common/os/lwp.c b/usr/src/uts/common/os/lwp.c
index a925f979a4..a9f1aa2588 100644
--- a/usr/src/uts/common/os/lwp.c
+++ b/usr/src/uts/common/os/lwp.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -61,6 +61,8 @@
#include <sys/brand.h>
void *segkp_lwp; /* cookie for pool of segkp resources */
+extern void reapq_move_lq_to_tq(kthread_t *);
+extern void freectx_ctx(struct ctxop *);
/*
* Create a thread that appears to be stopped at sys_rtt.
@@ -88,6 +90,7 @@ lwp_create(void (*proc)(), caddr_t arg, size_t len, proc_t *p,
int i;
int rctlfail = 0;
boolean_t branded = 0;
+ struct ctxop *ctx = NULL;
mutex_enter(&p->p_lock);
mutex_enter(&p->p_zone->zone_nlwps_lock);
@@ -136,14 +139,18 @@ lwp_create(void (*proc)(), caddr_t arg, size_t len, proc_t *p,
lwp_reapcnt--;
lwpdata = t->t_swap;
lwp = t->t_lwp;
- }
- mutex_exit(&reaplock);
- if (t) {
+ ctx = t->t_ctx;
t->t_swap = NULL;
- lwp_stk_fini(t->t_lwp);
t->t_lwp = NULL;
- t->t_forw = NULL;
- thread_free(t);
+ t->t_ctx = NULL;
+ reapq_move_lq_to_tq(t);
+ }
+ mutex_exit(&reaplock);
+ if (lwp != NULL) {
+ lwp_stk_fini(lwp);
+ }
+ if (ctx != NULL) {
+ freectx_ctx(ctx);
}
}
if (lwpdata == NULL &&
@@ -250,7 +257,7 @@ grow:
ldp->ld_next = ldp + 1;
new_hashsz = (new_dirsz + 2) / 2;
new_hash = kmem_zalloc(new_hashsz * sizeof (lwpdir_t *),
- KM_SLEEP);
+ KM_SLEEP);
mutex_enter(&p->p_lock);
if (p == curproc)
diff --git a/usr/src/uts/common/os/mutex.c b/usr/src/uts/common/os/mutex.c
index e935436bf6..ab6df83ad1 100644
--- a/usr/src/uts/common/os/mutex.c
+++ b/usr/src/uts/common/os/mutex.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -102,7 +102,8 @@
*
* set waiters bit
* membar #StoreLoad (via membar_enter())
- * check CPU_THREAD for each CPU; abort if owner running
+ * check CPU_THREAD for owner's t_cpu
+ * continue if owner running
* membar #LoadLoad (via membar_consumer())
* check owner and waiters bit; abort if either changed
* block
@@ -133,7 +134,9 @@
*
* The only requirements of code outside the mutex implementation are
* (1) mutex_exit() preemption fixup in interrupt handlers or trap return,
- * and (2) a membar #StoreLoad after setting CPU_THREAD in resume().
+ * (2) a membar #StoreLoad after setting CPU_THREAD in resume(),
+ * (3) mutex_owner_running() preemption fixup in interrupt handlers
+ * or trap returns.
* Note: idle threads cannot grab adaptive locks (since they cannot block),
* so the membar may be safely omitted when resuming an idle thread.
*
@@ -199,27 +202,9 @@
* much reduction in memory traffic, but reduces the potential idle time.
* The theory of the exponential delay code is to start with a short
* delay loop and double the waiting time on each iteration, up to
- * a preselected maximum. The BACKOFF_BASE provides the equivalent
- * of 2 to 3 memory references delay for US-III+ and US-IV architectures.
- * The BACKOFF_CAP is the equivalent of 50 to 100 memory references of
- * time (less than 12 microseconds for a 1000 MHz system).
- *
- * To determine appropriate BACKOFF_BASE and BACKOFF_CAP values,
- * studies on US-III+ and US-IV systems using 1 to 66 threads were
- * done. A range of possible values were studied.
- * Performance differences below 10 threads were not large. For
- * systems with more threads, substantial increases in total lock
- * throughput was observed with the given values. For cases where
- * more than 20 threads were waiting on the same lock, lock throughput
- * increased by a factor of 5 or more using the backoff algorithm.
- *
- * Some platforms may provide their own platform specific delay code,
- * using plat_lock_delay(backoff). If it is available, plat_lock_delay
- * is executed instead of the default delay code.
+ * a preselected maximum.
*/
-#pragma weak plat_lock_delay
-
#include <sys/param.h>
#include <sys/time.h>
#include <sys/cpuvar.h>
@@ -236,9 +221,8 @@
#include <sys/cpu.h>
#include <sys/stack.h>
#include <sys/archsystm.h>
-
-#define BACKOFF_BASE 50
-#define BACKOFF_CAP 1600
+#include <sys/machsystm.h>
+#include <sys/x_call.h>
/*
* The sobj_ops vector exports a set of functions needed when a thread
@@ -268,6 +252,89 @@ mutex_panic(char *msg, mutex_impl_t *lp)
msg, lp, MUTEX_OWNER(&panic_mutex), curthread);
}
+/* "tunables" for per-platform backoff constants. */
+uint_t mutex_backoff_cap = 0;
+ushort_t mutex_backoff_base = MUTEX_BACKOFF_BASE;
+ushort_t mutex_cap_factor = MUTEX_CAP_FACTOR;
+uchar_t mutex_backoff_shift = MUTEX_BACKOFF_SHIFT;
+
+void
+mutex_sync(void)
+{
+ MUTEX_SYNC();
+}
+
+/* calculate the backoff interval */
+static uint_t
+default_lock_backoff(uint_t backoff)
+{
+ uint_t cap; /* backoff cap calculated */
+
+ if (backoff == 0) {
+ backoff = mutex_backoff_base;
+ /* first call just sets the base */
+ return (backoff);
+ }
+
+ /* set cap */
+ if (mutex_backoff_cap == 0) {
+ /*
+ * For a contended lock, in the worst case a load + cas may
+ * be queued at the controller for each contending CPU.
+ * Therefore, to avoid queueing, the accesses for all CPUS must
+ * be spread out in time over an interval of (ncpu *
+ * cap-factor). Maximum backoff is set to this value, and
+ * actual backoff is a random number from 0 to the current max.
+ */
+ cap = ncpus_online * mutex_cap_factor;
+ } else {
+ cap = mutex_backoff_cap;
+ }
+
+ /* calculate new backoff value */
+ backoff <<= mutex_backoff_shift; /* increase backoff */
+ if (backoff > cap) {
+ if (cap < mutex_backoff_base)
+ backoff = mutex_backoff_base;
+ else
+ backoff = cap;
+ }
+
+ return (backoff);
+}
+
+/*
+ * default delay function for mutexes.
+ */
+static void
+default_lock_delay(uint_t backoff)
+{
+ ulong_t rnd; /* random factor */
+ uint_t cur_backoff; /* calculated backoff */
+ uint_t backctr;
+
+ /*
+ * Modify backoff by a random amount to avoid lockstep, and to
+ * make it probable that some thread gets a small backoff, and
+ * re-checks quickly
+ */
+ rnd = (((long)curthread >> PTR24_LSB) ^ (long)MUTEX_GETTICK());
+ cur_backoff = (uint_t)(rnd % (backoff - mutex_backoff_base + 1)) +
+ mutex_backoff_base;
+
+ /*
+ * Delay before trying
+ * to touch the mutex data structure.
+ */
+ for (backctr = cur_backoff; backctr; backctr--) {
+ MUTEX_DELAY();
+ };
+}
+
+uint_t (*mutex_lock_backoff)(uint_t) = default_lock_backoff;
+void (*mutex_lock_delay)(uint_t) = default_lock_delay;
+void (*mutex_delay)(void) = mutex_delay_default;
+
/*
* mutex_vector_enter() is called from the assembly mutex_enter() routine
* if the lock is held or is not of type MUTEX_ADAPTIVE.
@@ -276,15 +343,15 @@ void
mutex_vector_enter(mutex_impl_t *lp)
{
kthread_id_t owner;
+ kthread_id_t lastowner = MUTEX_NO_OWNER; /* track owner changes */
hrtime_t sleep_time = 0; /* how long we slept */
uint_t spin_count = 0; /* how many times we spun */
- cpu_t *cpup, *last_cpu;
- extern cpu_t *cpu_list;
+ cpu_t *cpup;
turnstile_t *ts;
volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp;
- int backoff; /* current backoff */
- int backctr; /* ctr for backoff */
+ uint_t backoff = 0; /* current backoff */
int sleep_count = 0;
+ int changecnt = 0; /* count of owner changes */
ASSERT_STACK_ALIGNED();
@@ -314,42 +381,31 @@ mutex_vector_enter(mutex_impl_t *lp)
CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1);
- if (&plat_lock_delay) {
- backoff = 0;
- } else {
- backoff = BACKOFF_BASE;
- }
-
+ backoff = mutex_lock_backoff(0); /* set base backoff */
for (;;) {
-spin:
spin_count++;
- /*
- * Add an exponential backoff delay before trying again
- * to touch the mutex data structure.
- * the spin_count test and call to nulldev are to prevent
- * the compiler optimizer from eliminating the delay loop.
- */
- if (&plat_lock_delay) {
- plat_lock_delay(&backoff);
- } else {
- for (backctr = backoff; backctr; backctr--) {
- if (!spin_count) (void) nulldev();
- }; /* delay */
- backoff = backoff << 1; /* double it */
- if (backoff > BACKOFF_CAP) {
- backoff = BACKOFF_CAP;
- }
-
- SMT_PAUSE();
- }
+ mutex_lock_delay(backoff); /* backoff delay */
if (panicstr)
return;
if ((owner = MUTEX_OWNER(vlp)) == NULL) {
- if (mutex_adaptive_tryenter(lp))
+ if (mutex_adaptive_tryenter(lp)) {
break;
+ }
+ /* increase backoff only on failed attempt. */
+ backoff = mutex_lock_backoff(backoff);
+ changecnt++;
continue;
+ } else if (lastowner != owner) {
+ lastowner = owner;
+ backoff = mutex_lock_backoff(backoff);
+ changecnt++;
+ }
+
+ if (changecnt >= ncpus_online) {
+ backoff = mutex_lock_backoff(0);
+ changecnt = 0;
}
if (owner == curthread)
@@ -362,26 +418,9 @@ spin:
if (owner == MUTEX_NO_OWNER)
continue;
- /*
- * When searching the other CPUs, start with the one where
- * we last saw the owner thread. If owner is running, spin.
- *
- * We must disable preemption at this point to guarantee
- * that the list doesn't change while we traverse it
- * without the cpu_lock mutex. While preemption is
- * disabled, we must revalidate our cached cpu pointer.
- */
- kpreempt_disable();
- if (cpup->cpu_next == NULL)
- cpup = cpu_list;
- last_cpu = cpup; /* mark end of search */
- do {
- if (cpup->cpu_thread == owner) {
- kpreempt_enable();
- goto spin;
- }
- } while ((cpup = cpup->cpu_next) != last_cpu);
- kpreempt_enable();
+ if (mutex_owner_running(lp) != NULL) {
+ continue;
+ }
/*
* The owner appears not to be running, so block.
@@ -394,19 +433,11 @@ spin:
/*
* Recheck whether owner is running after waiters bit hits
* global visibility (above). If owner is running, spin.
- *
- * Since we are at ipl DISP_LEVEL, kernel preemption is
- * disabled, however we still need to revalidate our cached
- * cpu pointer to make sure the cpu hasn't been deleted.
*/
- if (cpup->cpu_next == NULL)
- last_cpu = cpup = cpu_list;
- do {
- if (cpup->cpu_thread == owner) {
- turnstile_exit(lp);
- goto spin;
- }
- } while ((cpup = cpup->cpu_next) != last_cpu);
+ if (mutex_owner_running(lp) != NULL) {
+ turnstile_exit(lp);
+ continue;
+ }
membar_consumer();
/*
@@ -418,6 +449,8 @@ spin:
&mutex_sobj_ops, NULL, NULL);
sleep_time += gethrtime();
sleep_count++;
+ /* reset backoff after turnstile */
+ backoff = mutex_lock_backoff(0);
} else {
turnstile_exit(lp);
}
@@ -436,9 +469,10 @@ spin:
/*
* We do not count a sleep as a spin.
*/
- if (spin_count > sleep_count)
+ if (spin_count > sleep_count) {
LOCKSTAT_RECORD(LS_MUTEX_ENTER_SPIN, lp,
spin_count - sleep_count);
+ }
LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp);
}
@@ -585,8 +619,8 @@ void
lock_set_spin(lock_t *lp)
{
int spin_count = 1;
- int backoff; /* current backoff */
- int backctr; /* ctr for backoff */
+ int loop_count = 0;
+ uint_t backoff = 0; /* current backoff */
if (panicstr)
return;
@@ -594,36 +628,19 @@ lock_set_spin(lock_t *lp)
if (ncpus == 1)
panic("lock_set: %p lock held and only one CPU", lp);
- if (&plat_lock_delay) {
- backoff = 0;
- } else {
- backoff = BACKOFF_BASE;
- }
-
while (LOCK_HELD(lp) || !lock_spin_try(lp)) {
if (panicstr)
return;
spin_count++;
- /*
- * Add an exponential backoff delay before trying again
- * to touch the mutex data structure.
- * the spin_count test and call to nulldev are to prevent
- * the compiler optimizer from eliminating the delay loop.
- */
- if (&plat_lock_delay) {
- plat_lock_delay(&backoff);
- } else {
- /* delay */
- for (backctr = backoff; backctr; backctr--) {
- if (!spin_count) (void) nulldev();
- }
+ loop_count++;
- backoff = backoff << 1; /* double it */
- if (backoff > BACKOFF_CAP) {
- backoff = BACKOFF_CAP;
- }
- SMT_PAUSE();
+ if (ncpus_online == loop_count) {
+ backoff = mutex_lock_backoff(0);
+ loop_count = 0;
+ } else {
+ backoff = mutex_lock_backoff(backoff);
}
+ mutex_lock_delay(backoff);
}
if (spin_count) {
@@ -637,8 +654,8 @@ void
lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil)
{
int spin_count = 1;
- int backoff; /* current backoff */
- int backctr; /* ctr for backoff */
+ int loop_count = 0;
+ uint_t backoff = 0; /* current backoff */
if (panicstr)
return;
@@ -648,38 +665,23 @@ lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil)
ASSERT(new_pil > LOCK_LEVEL);
- if (&plat_lock_delay) {
- backoff = 0;
- } else {
- backoff = BACKOFF_BASE;
- }
do {
splx(old_pil);
while (LOCK_HELD(lp)) {
+ spin_count++;
+ loop_count++;
+
if (panicstr) {
*old_pil_addr = (ushort_t)splr(new_pil);
return;
}
- spin_count++;
- /*
- * Add an exponential backoff delay before trying again
- * to touch the mutex data structure.
- * spin_count test and call to nulldev are to prevent
- * compiler optimizer from eliminating the delay loop.
- */
- if (&plat_lock_delay) {
- plat_lock_delay(&backoff);
+ if (ncpus_online == loop_count) {
+ backoff = mutex_lock_backoff(0);
+ loop_count = 0;
} else {
- for (backctr = backoff; backctr; backctr--) {
- if (!spin_count) (void) nulldev();
- }
- backoff = backoff << 1; /* double it */
- if (backoff > BACKOFF_CAP) {
- backoff = BACKOFF_CAP;
- }
-
- SMT_PAUSE();
+ backoff = mutex_lock_backoff(backoff);
}
+ mutex_lock_delay(backoff);
}
old_pil = splr(new_pil);
} while (!lock_spin_try(lp));
diff --git a/usr/src/uts/common/sys/mutex.h b/usr/src/uts/common/sys/mutex.h
index 60e81e88f8..53d1e28e15 100644
--- a/usr/src/uts/common/sys/mutex.h
+++ b/usr/src/uts/common/sys/mutex.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -83,7 +83,16 @@ extern int mutex_tryenter(kmutex_t *);
extern void mutex_exit(kmutex_t *);
extern int mutex_owned(kmutex_t *);
extern struct _kthread *mutex_owner(kmutex_t *);
-extern void plat_lock_delay(int *);
+
+extern ushort_t mutex_backoff_base;
+extern uint_t mutex_backoff_cap;
+extern ushort_t mutex_cap_factor;
+extern uchar_t mutex_backoff_shift;
+extern void (*mutex_lock_delay)(uint_t);
+extern uint_t (*mutex_lock_backoff)(uint_t);
+extern void (*mutex_delay)(void);
+extern void mutex_delay_default(void);
+extern void mutex_sync(void);
#endif /* _KERNEL */
diff --git a/usr/src/uts/i86pc/os/intr.c b/usr/src/uts/i86pc/os/intr.c
index f68b96acd1..12d4304973 100644
--- a/usr/src/uts/i86pc/os/intr.c
+++ b/usr/src/uts/i86pc/os/intr.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -999,6 +999,8 @@ sys_rtt_common(struct regs *rp)
kthread_t *tp;
extern void mutex_exit_critical_start();
extern long mutex_exit_critical_size;
+ extern void mutex_owner_running_critical_start();
+ extern long mutex_owner_running_critical_size;
loop:
@@ -1076,6 +1078,19 @@ loop:
mutex_exit_critical_size) {
rp->r_pc = (greg_t)mutex_exit_critical_start;
}
+
+ /*
+ * If we interrupted the mutex_owner_running() critical region we
+ * must reset the PC back to the beginning to prevent dereferencing
+ * of a freed thread pointer. See the comments in mutex_owner_running
+ * for details.
+ */
+ if ((uintptr_t)rp->r_pc -
+ (uintptr_t)mutex_owner_running_critical_start <
+ mutex_owner_running_critical_size) {
+ rp->r_pc = (greg_t)mutex_owner_running_critical_start;
+ }
+
return (0);
}
diff --git a/usr/src/uts/intel/ia32/ml/lock_prim.s b/usr/src/uts/intel/ia32/ml/lock_prim.s
index 8dc51e3eeb..884ca02de8 100644
--- a/usr/src/uts/intel/ia32/ml/lock_prim.s
+++ b/usr/src/uts/intel/ia32/ml/lock_prim.s
@@ -30,11 +30,11 @@
#include <sys/thread.h>
#include <sys/cpuvar.h>
#include <vm/page.h>
-#include <sys/mutex_impl.h>
#else /* __lint */
#include "assym.h"
#endif /* __lint */
+#include <sys/mutex_impl.h>
#include <sys/asm_linkage.h>
#include <sys/asm_misc.h>
#include <sys/regset.h>
@@ -665,6 +665,34 @@ mutex_exit(kmutex_t *lp)
ret
SET_SIZE(mutex_adaptive_tryenter)
+ .globl mutex_owner_running_critical_start
+
+ ENTRY(mutex_owner_running)
+mutex_owner_running_critical_start:
+ movq (%rdi), %r11 /* get owner field */
+ andq $MUTEX_THREAD, %r11 /* remove waiters bit */
+ cmpq $0, %r11 /* if free, skip */
+ je 1f /* go return 0 */
+ movq T_CPU(%r11), %r8 /* get owner->t_cpu */
+ movq CPU_THREAD(%r8), %r9 /* get t_cpu->cpu_thread */
+.mutex_owner_running_critical_end:
+ cmpq %r11, %r9 /* owner == running thread? */
+ je 2f /* yes, go return cpu */
+1:
+ xorq %rax, %rax /* return 0 */
+ ret
+2:
+ movq %r8, %rax /* return cpu */
+ ret
+ SET_SIZE(mutex_owner_running)
+
+ .globl mutex_owner_running_critical_size
+ .type mutex_owner_running_critical_size, @object
+ .align CPTRSIZE
+mutex_owner_running_critical_size:
+ .quad .mutex_owner_running_critical_end - mutex_owner_running_critical_start
+ SET_SIZE(mutex_owner_running_critical_size)
+
.globl mutex_exit_critical_start
ENTRY(mutex_exit)
@@ -806,7 +834,36 @@ mutex_exit_critical_size:
ret
SET_SIZE(mutex_adaptive_tryenter)
- .globl mutex_exit_critical_size
+ .globl mutex_owner_running_critical_start
+
+ ENTRY(mutex_owner_running)
+mutex_owner_running_critical_start:
+ movl 4(%esp), %eax /* get owner field */
+ movl (%eax), %eax
+ andl $MUTEX_THREAD, %eax /* remove waiters bit */
+ cmpl $0, %eax /* if free, skip */
+ je 1f /* go return 0 */
+ movl T_CPU(%eax), %ecx /* get owner->t_cpu */
+ movl CPU_THREAD(%ecx), %edx /* get t_cpu->cpu_thread */
+.mutex_owner_running_critical_end:
+ cmpl %eax, %edx /* owner == running thread? */
+ je 2f /* yes, go return cpu */
+1:
+ xorl %eax, %eax /* return 0 */
+ ret
+2:
+ movl %ecx, %eax /* return cpu */
+ ret
+
+ SET_SIZE(mutex_owner_running)
+
+ .globl mutex_owner_running_critical_size
+ .type mutex_owner_running_critical_size, @object
+ .align CPTRSIZE
+mutex_owner_running_critical_size:
+ .long .mutex_owner_running_critical_end - mutex_owner_running_critical_start
+ SET_SIZE(mutex_owner_running_critical_size)
+
.globl mutex_exit_critical_start
ENTRY(mutex_exit)
@@ -1398,3 +1455,43 @@ thread_onproc(kthread_id_t t, cpu_t *cp)
#endif /* !__amd64 */
#endif /* __lint */
+
+/*
+ * mutex_delay_default(void)
+ * Spins for approx a few hundred processor cycles and returns to caller.
+ */
+
+#if defined(lint) || defined(__lint)
+
+void
+mutex_delay_default(void)
+{}
+
+#else /* __lint */
+
+#if defined(__amd64)
+
+ ENTRY(mutex_delay_default)
+ movq $92,%r11
+0: decq %r11
+ jg 0b
+ ret
+ SET_SIZE(mutex_delay_default)
+
+#else
+
+ ENTRY(mutex_delay_default)
+ push %ebp
+ movl %esp,%ebp
+ andl $-16,%esp
+ push %ebx
+ movl $93,%ebx
+0: decl %ebx
+ jg 0b
+ pop %ebx
+ leave
+ ret
+ SET_SIZE(mutex_delay_default)
+
+#endif /* !__amd64 */
+#endif /* __lint */
diff --git a/usr/src/uts/intel/sys/mutex_impl.h b/usr/src/uts/intel/sys/mutex_impl.h
index bcab84a979..c8cff15c2a 100644
--- a/usr/src/uts/intel/sys/mutex_impl.h
+++ b/usr/src/uts/intel/sys/mutex_impl.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -98,8 +97,31 @@ typedef union mutex_impl {
#define MUTEX_DESTROY(lp) \
(lp)->m_owner = ((uintptr_t)curthread | MUTEX_DEAD)
+/* mutex backoff delay macro and constants */
+#define MUTEX_BACKOFF_BASE 1
+#define MUTEX_BACKOFF_SHIFT 2
+#define MUTEX_CAP_FACTOR 64
+#define MUTEX_DELAY() { \
+ mutex_delay(); \
+ SMT_PAUSE(); \
+ }
+
+/* low overhead clock read */
+#define MUTEX_GETTICK() tsc_read()
+extern void null_xcall(void);
+#define MUTEX_SYNC() { \
+ cpuset_t set; \
+ CPUSET_ALL(set); \
+ xc_call(0, 0, 0, X_CALL_HIPRI, set, \
+ (xc_func_t)null_xcall); \
+ }
extern int mutex_adaptive_tryenter(mutex_impl_t *);
+extern void *mutex_owner_running(mutex_impl_t *);
+
+#else /* _ASM */
+
+#define MUTEX_THREAD -0x8
#endif /* _ASM */
diff --git a/usr/src/uts/sparc/v9/ml/lock_prim.s b/usr/src/uts/sparc/v9/ml/lock_prim.s
index 064e11dc32..4a58bacd01 100644
--- a/usr/src/uts/sparc/v9/ml/lock_prim.s
+++ b/usr/src/uts/sparc/v9/ml/lock_prim.s
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -377,6 +376,11 @@ void
mutex_exit(kmutex_t *lp)
{}
+/* ARGSUSED */
+void *
+mutex_owner_running(mutex_impl_t *lp)
+{ return (NULL); }
+
#else
.align 32
ENTRY(mutex_enter)
@@ -419,8 +423,12 @@ mutex_exit(kmutex_t *lp)
mov %g0, %o0
SET_SIZE(mutex_adaptive_tryenter)
+ ! these need to be together and cache aligned for performance.
+ .align 64
.global mutex_exit_critical_size
.global mutex_exit_critical_start
+ .global mutex_owner_running_critical_size
+ .global mutex_owner_running_critical_start
mutex_exit_critical_size = .mutex_exit_critical_end - mutex_exit_critical_start
@@ -441,6 +449,30 @@ mutex_exit_critical_start: ! If we are interrupted, restart here
nop
SET_SIZE(mutex_exit)
+mutex_owner_running_critical_size = .mutex_owner_running_critical_end - mutex_owner_running_critical_start
+
+ .align 32
+
+ ENTRY(mutex_owner_running)
+mutex_owner_running_critical_start: ! If interrupted restart here
+ ldn [%o0], %o1 ! get the owner field
+ and %o1, MUTEX_THREAD, %o1 ! remove the waiters bit if any
+ brz,pn %o1, 1f ! if so, drive on ...
+ nop
+ ldn [%o1+T_CPU], %o2 ! get owner->t_cpu
+ ldn [%o2+CPU_THREAD], %o3 ! get owner->t_cpu->cpu_thread
+.mutex_owner_running_critical_end: ! for pil_interrupt() hook
+ cmp %o1, %o3 ! owner == running thread?
+ be,a,pt %xcc, 2f ! yes, go return cpu
+ nop
+1:
+ retl
+ mov %g0, %o0 ! return 0 (owner not running)
+2:
+ retl
+ mov %o2, %o0 ! owner running, return cpu
+ SET_SIZE(mutex_owner_running)
+
#endif /* lint */
/*
@@ -729,3 +761,60 @@ thread_onproc(kthread_id_t t, cpu_t *cp)
SET_SIZE(thread_onproc)
#endif /* lint */
+
+/* delay function used in some mutex code - just do 3 nop cas ops */
+#if defined(lint)
+
+/* ARGSUSED */
+void
+cas_delay(void *addr)
+{}
+#else /* lint */
+ ENTRY(cas_delay)
+ casx [%o0], %g0, %g0
+ casx [%o0], %g0, %g0
+ retl
+ casx [%o0], %g0, %g0
+ SET_SIZE(cas_delay)
+#endif /* lint */
+
+#if defined(lint)
+
+/*
+ * alternative delay function for some niagara processors. The rd
+ * instruction uses less resources than casx on those cpus.
+ */
+/* ARGSUSED */
+void
+rdccr_delay(void)
+{}
+#else /* lint */
+ ENTRY(rdccr_delay)
+ rd %ccr, %g0
+ rd %ccr, %g0
+ retl
+ rd %ccr, %g0
+ SET_SIZE(rdccr_delay)
+#endif /* lint */
+
+/*
+ * mutex_delay_default(void)
+ * Spins for approx a few hundred processor cycles and returns to caller.
+ */
+#if defined(lint)
+
+void
+mutex_delay_default(void)
+{}
+
+#else /* lint */
+
+ ENTRY(mutex_delay_default)
+ mov 72,%o0
+1: brgz %o0, 1b
+ dec %o0
+ retl
+ nop
+ SET_SIZE(mutex_delay_default)
+
+#endif /* lint */
diff --git a/usr/src/uts/sparc/v9/sys/mutex_impl.h b/usr/src/uts/sparc/v9/sys/mutex_impl.h
index d1236685fe..70b717eecf 100644
--- a/usr/src/uts/sparc/v9/sys/mutex_impl.h
+++ b/usr/src/uts/sparc/v9/sys/mutex_impl.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 1991-1998,2003 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -38,6 +37,7 @@
extern "C" {
#endif
+#define MUTEX_THREAD (-0x8)
#ifndef _ASM
/*
@@ -96,7 +96,23 @@ typedef union mutex_impl {
#define MUTEX_DESTROY(lp) \
(lp)->m_owner = ((uintptr_t)curthread | MUTEX_DEAD)
+#define MUTEX_BACKOFF_BASE 1
+#define MUTEX_BACKOFF_SHIFT 1
+#define MUTEX_CAP_FACTOR 8
+#define MUTEX_DELAY() { \
+ mutex_delay(); \
+ }
+
+/* low-overhead clock read */
+extern u_longlong_t gettick(void);
+#define MUTEX_GETTICK() gettick()
+extern void null_xcall(void);
+#define MUTEX_SYNC() xc_all((xcfunc_t *)null_xcall, 0, 0)
+
+extern void cas_delay(void *);
+extern void rdccr_delay(void);
extern int mutex_adaptive_tryenter(mutex_impl_t *);
+extern void *mutex_owner_running(mutex_impl_t *);
#endif /* _ASM */
diff --git a/usr/src/uts/sun4u/ml/mach_locore.s b/usr/src/uts/sun4u/ml/mach_locore.s
index 9393dd2f7a..4447483f26 100644
--- a/usr/src/uts/sun4u/ml/mach_locore.s
+++ b/usr/src/uts/sun4u/ml/mach_locore.s
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -802,6 +802,23 @@ rtt_ctx_end:
movg %xcc, %o0, %l6 ! if current is lower, drop old pil
1:
!
+ ! If we interrupted the mutex_owner_running() critical region we
+ ! must reset the PC and nPC back to the beginning to prevent missed
+ ! wakeups. See the comments in mutex_owner_running() for details.
+ !
+ ldn [%l7 + PC_OFF], %l0
+ set mutex_owner_running_critical_start, %l1
+ sub %l0, %l1, %l0
+ cmp %l0, mutex_owner_running_critical_size
+ bgeu,pt %xcc, 2f
+ mov THREAD_REG, %l0
+ stn %l1, [%l7 + PC_OFF] ! restart mutex_owner_running()
+ add %l1, 4, %l1
+ ba,pt %xcc, common_rtt
+ stn %l1, [%l7 + nPC_OFF]
+
+2:
+ !
! If we interrupted the mutex_exit() critical region we must reset
! the PC and nPC back to the beginning to prevent missed wakeups.
! See the comments in mutex_exit() for details.
diff --git a/usr/src/uts/sun4u/opl/os/opl.c b/usr/src/uts/sun4u/opl/os/opl.c
index f33b231117..071f00c142 100644
--- a/usr/src/uts/sun4u/opl/os/opl.c
+++ b/usr/src/uts/sun4u/opl/os/opl.c
@@ -102,19 +102,17 @@ static void pass2xscf_thread();
* Note FF/DC out-of-order instruction engine takes only a
* single cycle to execute each spin loop
* for comparison, Panther takes 6 cycles for same loop
- * 1500 approx nsec for OPL sleep instruction
- * if spin count = OPL_BOFF_SLEEP*OPL_BOFF_SPIN then
- * spin time should be equal to OPL_BOFF_TM nsecs
- * Listed values tuned for 2.15GHz to 2.4GHz systems
+ * OPL_BOFF_SPIN = base spin loop, roughly one memory reference time
+ * OPL_BOFF_TM = approx nsec for OPL sleep instruction (1600 for OPL-C)
+ * OPL_BOFF_SLEEP = approx number of SPIN iterations to equal one sleep
+ * OPL_BOFF_MAX_SCALE - scaling factor for max backoff based on active cpus
+ * Listed values tuned for 2.15GHz to 2.64GHz systems
* Value may change for future systems
*/
-#define OPL_BOFF_SPIN 720
-#define OPL_BOFF_BASE 1
-#define OPL_BOFF_SLEEP 5
-#define OPL_BOFF_CAP1 20
-#define OPL_BOFF_CAP2 60
-#define OPL_BOFF_MAX (40 * OPL_BOFF_SLEEP)
-#define OPL_BOFF_TM 1500
+#define OPL_BOFF_SPIN 7
+#define OPL_BOFF_SLEEP 4
+#define OPL_BOFF_TM 1600
+#define OPL_BOFF_MAX_SCALE 8
#define OPL_CLOCK_TICK_THRESHOLD 128
#define OPL_CLOCK_TICK_NCPUS 64
@@ -946,6 +944,9 @@ plat_startup_memlist(caddr_t alloc_base)
return (tmp_alloc_base);
}
+/* need to forward declare these */
+static void plat_lock_delay(uint_t);
+
void
startup_platform(void)
{
@@ -953,6 +954,8 @@ startup_platform(void)
clock_tick_threshold = OPL_CLOCK_TICK_THRESHOLD;
if (clock_tick_ncpus == 0)
clock_tick_ncpus = OPL_CLOCK_TICK_NCPUS;
+ mutex_lock_delay = plat_lock_delay;
+ mutex_cap_factor = OPL_BOFF_MAX_SCALE;
}
void
@@ -997,13 +1000,12 @@ plat_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
}
void
-plat_lock_delay(int *backoff)
+plat_lock_delay(uint_t backoff)
{
int i;
- int cnt;
- int flag;
+ uint_t cnt, remcnt;
int ctr;
- hrtime_t delay_start;
+ hrtime_t delay_start, rem_delay;
/*
* Platform specific lock delay code for OPL
*
@@ -1012,32 +1014,26 @@ plat_lock_delay(int *backoff)
* but is too large of granularity for the initial backoff.
*/
- if (*backoff == 0) *backoff = OPL_BOFF_BASE;
-
- flag = !*backoff;
-
- if (*backoff < OPL_BOFF_CAP1) {
+ if (backoff < 100) {
/*
* If desired backoff is long enough,
* use sleep for most of it
*/
- for (cnt = *backoff; cnt >= OPL_BOFF_SLEEP;
+ for (cnt = backoff;
+ cnt >= OPL_BOFF_SLEEP;
cnt -= OPL_BOFF_SLEEP) {
cpu_smt_pause();
}
/*
* spin for small remainder of backoff
- *
- * fake call to nulldev included to prevent
- * compiler from optimizing out the spin loop
*/
for (ctr = cnt * OPL_BOFF_SPIN; ctr; ctr--) {
- if (flag) (void) nulldev();
+ mutex_delay_default();
}
} else {
- /* backoff is very large. Fill it by sleeping */
+ /* backoff is large. Fill it by sleeping */
delay_start = gethrtime();
- cnt = *backoff/OPL_BOFF_SLEEP;
+ cnt = backoff / OPL_BOFF_SLEEP;
/*
* use sleep instructions for delay
*/
@@ -1050,40 +1046,19 @@ plat_lock_delay(int *backoff)
* then the sleep ends immediately with a minimum time of
* 42 clocks. We check gethrtime to insure we have
* waited long enough. And we include both a short
- * spin loop and a sleep for any final delay time.
+ * spin loop and a sleep for repeated delay times.
*/
- while ((gethrtime() - delay_start) < cnt * OPL_BOFF_TM) {
- cpu_smt_pause();
- for (ctr = OPL_BOFF_SPIN; ctr; ctr--) {
- if (flag) (void) nulldev();
+ rem_delay = gethrtime() - delay_start;
+ while (rem_delay < cnt * OPL_BOFF_TM) {
+ remcnt = cnt - (rem_delay / OPL_BOFF_TM);
+ for (i = 0; i < remcnt; i++) {
+ cpu_smt_pause();
+ for (ctr = OPL_BOFF_SPIN; ctr; ctr--) {
+ mutex_delay_default();
+ }
}
- }
- }
-
- /*
- * We adjust the backoff in three linear stages
- * The initial stage has small increases as this phase is
- * usually handle locks with light contention. We don't want
- * to have a long backoff on a lock that is available.
- *
- * In the second stage, we are in transition, unsure whether
- * the lock is under heavy contention. As the failures to
- * obtain the lock increase, we back off further.
- *
- * For the final stage, we are in a heavily contended or
- * long held long so we want to reduce the number of tries.
- */
- if (*backoff < OPL_BOFF_CAP1) {
- *backoff += 1;
- } else {
- if (*backoff < OPL_BOFF_CAP2) {
- *backoff += OPL_BOFF_SLEEP;
- } else {
- *backoff += 2 * OPL_BOFF_SLEEP;
- }
- if (*backoff > OPL_BOFF_MAX) {
- *backoff = OPL_BOFF_MAX;
+ rem_delay = gethrtime() - delay_start;
}
}
}
diff --git a/usr/src/uts/sun4u/serengeti/os/serengeti.c b/usr/src/uts/sun4u/serengeti/os/serengeti.c
index 847384abee..fae7be521a 100644
--- a/usr/src/uts/sun4u/serengeti/os/serengeti.c
+++ b/usr/src/uts/sun4u/serengeti/os/serengeti.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -1297,6 +1297,9 @@ cpu_sgn_update(ushort_t sig, uchar_t state, uchar_t sub_state, int cpuid)
void
startup_platform(void)
{
+ /* set per-platform constants for mutex backoff */
+ mutex_backoff_base = 1;
+ mutex_cap_factor = 32;
}
/*
diff --git a/usr/src/uts/sun4u/starcat/os/starcat.c b/usr/src/uts/sun4u/starcat/os/starcat.c
index c00976dfc9..1a90d6adb5 100644
--- a/usr/src/uts/sun4u/starcat/os/starcat.c
+++ b/usr/src/uts/sun4u/starcat/os/starcat.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -1271,6 +1271,9 @@ starcat_dr_name(char *name)
void
startup_platform(void)
{
+ /* set per platform constants for mutex backoff */
+ mutex_backoff_base = 2;
+ mutex_cap_factor = 64;
}
/*
diff --git a/usr/src/uts/sun4v/cpu/niagara.c b/usr/src/uts/sun4v/cpu/niagara.c
index 3e797806fd..339c5373da 100644
--- a/usr/src/uts/sun4v/cpu/niagara.c
+++ b/usr/src/uts/sun4v/cpu/niagara.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -58,6 +58,7 @@
#include <sys/niagararegs.h>
#include <sys/trapstat.h>
#include <sys/hsvc.h>
+#include <sys/mutex_impl.h>
#define NI_MMU_PAGESIZE_MASK ((1 << TTE8K) | (1 << TTE64K) | (1 << TTE4M) \
| (1 << TTE256M))
@@ -199,6 +200,24 @@ cpu_map_exec_units(struct cpu *cp)
cp->cpu_m.cpu_mpipe = 0;
}
+void
+cpu_mutex_delay(void)
+{
+ /*
+ * Dummy is the thread-private target of the cas. If multiple strands
+ * have the same kernel call stack, dummy could fall at the same VA and
+ * hence the same L2 cache bank. To avoid this, create multiple dummy
+ * words spread across several cache lines.
+ */
+ struct {
+ long val;
+ long pad[7];
+ } dummy[4];
+
+ long *ptr = &(dummy[CPU->cpu_seqid & 0x03].val);
+ cas_delay(ptr);
+}
+
static int niagara_cpucnt;
void
@@ -212,6 +231,8 @@ cpu_init_private(struct cpu *cp)
if ((niagara_cpucnt++ == 0) && (niagara_hsvc_available == B_TRUE))
niagara_kstat_init();
+
+ mutex_delay = cpu_mutex_delay;
}
/*ARGSUSED*/
diff --git a/usr/src/uts/sun4v/cpu/niagara2.c b/usr/src/uts/sun4v/cpu/niagara2.c
index 18bacaa79f..17dd536659 100644
--- a/usr/src/uts/sun4v/cpu/niagara2.c
+++ b/usr/src/uts/sun4v/cpu/niagara2.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -59,6 +59,7 @@
#include <sys/niagara2regs.h>
#include <sys/hsvc.h>
#include <sys/trapstat.h>
+#include <sys/mutex_impl.h>
uint_t root_phys_addr_lo_mask = 0xffffffffU;
#if defined(NIAGARA2_IMPL)
@@ -226,6 +227,8 @@ cpu_init_private(struct cpu *cp)
if ((cpucnt++ == 0) && (cpu_hsvc_available == B_TRUE))
(void) niagara_kstat_init();
+
+ mutex_delay = rdccr_delay;
}
/*ARGSUSED*/
diff --git a/usr/src/uts/sun4v/ml/mach_locore.s b/usr/src/uts/sun4v/ml/mach_locore.s
index 8e391ed480..0e970cd33c 100644
--- a/usr/src/uts/sun4v/ml/mach_locore.s
+++ b/usr/src/uts/sun4v/ml/mach_locore.s
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -958,6 +958,23 @@ rtt_fill_end:
movg %xcc, %o0, %l6 ! if current is lower, drop old pil
1:
!
+ ! If we interrupted the mutex_owner_running() critical region we
+ ! must reset ! the PC and nPC back to the beginning to prevent missed
+ ! wakeups. ! See the comments in mutex_exit() for details.
+ !
+ ldn [%l7 + PC_OFF], %l0
+ set mutex_owner_running_critical_start, %l1
+ sub %l0, %l1, %l0
+ cmp %l0, mutex_owner_running_critical_size
+ bgeu,pt %xcc, 2f
+ mov THREAD_REG, %l0
+ stn %l1, [%l7 + PC_OFF] ! restart mutex_owner_running()
+ add %l1, 4, %l1
+ ba,pt %xcc, common_rtt
+ stn %l1, [%l7 + nPC_OFF]
+
+2:
+ !
! If we interrupted the mutex_exit() critical region we must reset
! the PC and nPC back to the beginning to prevent missed wakeups.
! See the comments in mutex_exit() for details.
diff --git a/usr/src/uts/sun4v/os/fillsysinfo.c b/usr/src/uts/sun4v/os/fillsysinfo.c
index 90390d5401..3cc50b8c63 100644
--- a/usr/src/uts/sun4v/os/fillsysinfo.c
+++ b/usr/src/uts/sun4v/os/fillsysinfo.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -74,10 +74,13 @@ static int get_l2_cache_info(md_t *, mde_cookie_t, uint64_t *, uint64_t *,
static void get_q_sizes(md_t *, mde_cookie_t);
static void get_va_bits(md_t *, mde_cookie_t);
static size_t get_ra_limit(md_t *);
+static int get_l2_cache_node_count(md_t *);
uint64_t system_clock_freq;
uint_t niommu_tsbs = 0;
+static int n_l2_caches = 0;
+
/* prevent compilation with VAC defined */
#ifdef VAC
#error "The sun4v architecture does not support VAC"
@@ -402,6 +405,9 @@ cpu_setup_common(char **cpu_module_isa_set)
for (i = 0; i < nocpus; i++)
fill_cpu(mdp, cpulist[i]);
+ /* setup l2 cache count. */
+ n_l2_caches = get_l2_cache_node_count(mdp);
+
setup_chip_mappings(mdp);
setup_exec_unit_mappings(mdp);
@@ -792,6 +798,37 @@ get_va_bits(md_t *mdp, mde_cookie_t cpu_node_cookie)
va_bits = DEFAULT_VA_ADDRESS_SPACE_BITS;
}
+int
+l2_cache_node_count(void)
+{
+ return (n_l2_caches);
+}
+
+/*
+ * count the number of l2 caches.
+ */
+int
+get_l2_cache_node_count(md_t *mdp)
+{
+ int i;
+ mde_cookie_t *cachenodes;
+ uint64_t level;
+ int n_cachenodes = md_alloc_scan_dag(mdp, md_root_node(mdp),
+ "cache", "fwd", &cachenodes);
+ int l2_caches = 0;
+
+ for (i = 0; i < n_cachenodes; i++) {
+ if (md_get_prop_val(mdp, cachenodes[i], "level", &level) != 0) {
+ level = 0;
+ }
+ if (level == 2) {
+ l2_caches++;
+ }
+ }
+ md_free_scan_dag(mdp, &cachenodes);
+ return (l2_caches);
+}
+
/*
* This routine returns the L2 cache information such as -- associativity,
* size and linesize.
diff --git a/usr/src/uts/sun4v/os/mach_startup.c b/usr/src/uts/sun4v/os/mach_startup.c
index 333212b4f5..7446aff433 100644
--- a/usr/src/uts/sun4v/os/mach_startup.c
+++ b/usr/src/uts/sun4v/os/mach_startup.c
@@ -39,6 +39,8 @@
#include <sys/traptrace.h>
#include <sys/modctl.h>
#include <sys/ldoms.h>
+#include <sys/cpu_module.h>
+#include <sys/mutex_impl.h>
#include <vm/vm_dep.h>
#ifdef TRAPTRACE
@@ -306,6 +308,14 @@ startup_platform(void)
clock_tick_threshold = SUN4V_CLOCK_TICK_THRESHOLD;
if (clock_tick_ncpus == 0)
clock_tick_ncpus = SUN4V_CLOCK_TICK_NCPUS;
+ /* set per-platform constants for mutex_backoff */
+ mutex_backoff_base = 1;
+ mutex_cap_factor = 4;
+ if (l2_cache_node_count() > 1) {
+ /* VF for example */
+ mutex_backoff_base = 2;
+ mutex_cap_factor = 16;
+ }
}
/*
diff --git a/usr/src/uts/sun4v/sys/cpu_module.h b/usr/src/uts/sun4v/sys/cpu_module.h
index b7254aba6c..6ebb761d6d 100644
--- a/usr/src/uts/sun4v/sys/cpu_module.h
+++ b/usr/src/uts/sun4v/sys/cpu_module.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -160,6 +160,8 @@ void cpu_trapstat_data(void *buf, uint_t pgszs);
| (1 << TTE4M))
void cpu_setup_common(char **);
+int l2_cache_node_count(void);
+
void fill_cpu(md_t *, mde_cookie_t);
int setup_cpu_common(int);
int cleanup_cpu_common(int);