diff options
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/disp/thread.c | 78 | ||||
-rw-r--r-- | usr/src/uts/common/os/cpu.c | 8 | ||||
-rw-r--r-- | usr/src/uts/common/os/lwp.c | 23 | ||||
-rw-r--r-- | usr/src/uts/common/os/mutex.c | 282 | ||||
-rw-r--r-- | usr/src/uts/common/sys/mutex.h | 13 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/intr.c | 17 | ||||
-rw-r--r-- | usr/src/uts/intel/ia32/ml/lock_prim.s | 101 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/mutex_impl.h | 30 | ||||
-rw-r--r-- | usr/src/uts/sparc/v9/ml/lock_prim.s | 97 | ||||
-rw-r--r-- | usr/src/uts/sparc/v9/sys/mutex_impl.h | 24 | ||||
-rw-r--r-- | usr/src/uts/sun4u/ml/mach_locore.s | 19 | ||||
-rw-r--r-- | usr/src/uts/sun4u/opl/os/opl.c | 91 | ||||
-rw-r--r-- | usr/src/uts/sun4u/serengeti/os/serengeti.c | 5 | ||||
-rw-r--r-- | usr/src/uts/sun4u/starcat/os/starcat.c | 5 | ||||
-rw-r--r-- | usr/src/uts/sun4v/cpu/niagara.c | 23 | ||||
-rw-r--r-- | usr/src/uts/sun4v/cpu/niagara2.c | 5 | ||||
-rw-r--r-- | usr/src/uts/sun4v/ml/mach_locore.s | 19 | ||||
-rw-r--r-- | usr/src/uts/sun4v/os/fillsysinfo.c | 39 | ||||
-rw-r--r-- | usr/src/uts/sun4v/os/mach_startup.c | 10 | ||||
-rw-r--r-- | usr/src/uts/sun4v/sys/cpu_module.h | 4 |
20 files changed, 660 insertions, 233 deletions
diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c index ee2d80834d..928b594602 100644 --- a/usr/src/uts/common/disp/thread.c +++ b/usr/src/uts/common/disp/thread.c @@ -842,6 +842,21 @@ thread_zone_destroy(zoneid_t zoneid, void *unused) mutex_exit(&reaplock); /* + * Guard against race condition in mutex_owner_running: + * thread=owner(mutex) + * <interrupt> + * thread exits mutex + * thread exits + * thread reaped + * thread struct freed + * cpu = thread->t_cpu <- BAD POINTER DEREFERENCE. + * A cross call to all cpus will cause the interrupt handler + * to reset the PC if it is in mutex_owner_running, refreshing + * stale thread pointers. + */ + mutex_sync(); /* sync with mutex code */ + + /* * Reap threads */ thread_reap_list(t); @@ -874,6 +889,12 @@ thread_reaper() cv_wait(&reaper_cv, &reaplock); CALLB_CPR_SAFE_END(&cprinfo, &reaplock); } + /* + * mutex_sync() needs to be called when reaping, but + * not too often. We limit reaping rate to once + * per second. Reaplimit is max rate at which threads can + * be freed. Does not impact thread destruction/creation. + */ t = thread_deathrow; l = lwp_deathrow; thread_deathrow = NULL; @@ -883,6 +904,20 @@ thread_reaper() mutex_exit(&reaplock); /* + * Guard against race condition in mutex_owner_running: + * thread=owner(mutex) + * <interrupt> + * thread exits mutex + * thread exits + * thread reaped + * thread struct freed + * cpu = thread->t_cpu <- BAD POINTER DEREFERENCE. + * A cross call to all cpus will cause the interrupt handler + * to reset the PC if it is in mutex_owner_running, refreshing + * stale thread pointers. + */ + mutex_sync(); /* sync with mutex code */ + /* * Reap threads */ thread_reap_list(t); @@ -891,13 +926,32 @@ thread_reaper() * Reap lwps */ thread_reap_list(l); + delay(hz); } } /* + * This is called by lwpcreate, etc.() to put a lwp_deathrow thread onto + * thread_deathrow. The thread's state is changed already TS_FREE to indicate + * that is reapable. The thread already holds the reaplock, and was already + * freed. + */ +void +reapq_move_lq_to_tq(kthread_t *t) +{ + ASSERT(t->t_state == TS_FREE); + ASSERT(MUTEX_HELD(&reaplock)); + t->t_forw = thread_deathrow; + thread_deathrow = t; + thread_reapcnt++; + if (lwp_reapcnt + thread_reapcnt > reaplimit) + cv_signal(&reaper_cv); /* wake the reaper */ +} + +/* * This is called by resume() to put a zombie thread onto deathrow. * The thread's state is changed to TS_FREE to indicate that is reapable. - * This is called from the idle thread so it must not block (just spin). + * This is called from the idle thread so it must not block - just spin. */ void reapq_add(kthread_t *t) @@ -1118,6 +1172,28 @@ freectx(kthread_t *t, int isexec) } /* + * freectx_ctx is called from lwp_create() when lwp is reused from + * lwp_deathrow and its thread structure is added to thread_deathrow. + * The thread structure to which this ctx was attached may be already + * freed by the thread reaper so free_op implementations shouldn't rely + * on thread structure to which this ctx was attached still being around. + */ +void +freectx_ctx(struct ctxop *ctx) +{ + struct ctxop *nctx; + + ASSERT(ctx != NULL); + + do { + nctx = ctx->next; + if (ctx->free_op != NULL) + (ctx->free_op)(ctx->arg, 0); + kmem_free(ctx, sizeof (struct ctxop)); + } while ((ctx = nctx) != NULL); +} + +/* * Set the thread running; arrange for it to be swapped in if necessary. */ void diff --git a/usr/src/uts/common/os/cpu.c b/usr/src/uts/common/os/cpu.c index 13cf752b45..92286f7163 100644 --- a/usr/src/uts/common/os/cpu.c +++ b/usr/src/uts/common/os/cpu.c @@ -58,7 +58,7 @@ #include <sys/msacct.h> #include <sys/time.h> #include <sys/archsystm.h> -#if defined(__x86) +#if defined(__x86) || defined(__amd64) #include <sys/x86_archext.h> #endif @@ -728,6 +728,11 @@ weakbinding_start(void) weakbindingbarrier = 0; } +void +null_xcall(void) +{ +} + /* * This routine is called to place the CPUs in a safe place so that * one of them can be taken off line or placed on line. What we are @@ -2797,6 +2802,7 @@ cpu_destroy_bound_threads(cpu_t *cp) mutex_exit(&pidlock); + mutex_sync(); for (t = tlist; t != NULL; t = tnext) { tnext = t->t_next; thread_free(t); diff --git a/usr/src/uts/common/os/lwp.c b/usr/src/uts/common/os/lwp.c index a925f979a4..a9f1aa2588 100644 --- a/usr/src/uts/common/os/lwp.c +++ b/usr/src/uts/common/os/lwp.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -61,6 +61,8 @@ #include <sys/brand.h> void *segkp_lwp; /* cookie for pool of segkp resources */ +extern void reapq_move_lq_to_tq(kthread_t *); +extern void freectx_ctx(struct ctxop *); /* * Create a thread that appears to be stopped at sys_rtt. @@ -88,6 +90,7 @@ lwp_create(void (*proc)(), caddr_t arg, size_t len, proc_t *p, int i; int rctlfail = 0; boolean_t branded = 0; + struct ctxop *ctx = NULL; mutex_enter(&p->p_lock); mutex_enter(&p->p_zone->zone_nlwps_lock); @@ -136,14 +139,18 @@ lwp_create(void (*proc)(), caddr_t arg, size_t len, proc_t *p, lwp_reapcnt--; lwpdata = t->t_swap; lwp = t->t_lwp; - } - mutex_exit(&reaplock); - if (t) { + ctx = t->t_ctx; t->t_swap = NULL; - lwp_stk_fini(t->t_lwp); t->t_lwp = NULL; - t->t_forw = NULL; - thread_free(t); + t->t_ctx = NULL; + reapq_move_lq_to_tq(t); + } + mutex_exit(&reaplock); + if (lwp != NULL) { + lwp_stk_fini(lwp); + } + if (ctx != NULL) { + freectx_ctx(ctx); } } if (lwpdata == NULL && @@ -250,7 +257,7 @@ grow: ldp->ld_next = ldp + 1; new_hashsz = (new_dirsz + 2) / 2; new_hash = kmem_zalloc(new_hashsz * sizeof (lwpdir_t *), - KM_SLEEP); + KM_SLEEP); mutex_enter(&p->p_lock); if (p == curproc) diff --git a/usr/src/uts/common/os/mutex.c b/usr/src/uts/common/os/mutex.c index e935436bf6..ab6df83ad1 100644 --- a/usr/src/uts/common/os/mutex.c +++ b/usr/src/uts/common/os/mutex.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -102,7 +102,8 @@ * * set waiters bit * membar #StoreLoad (via membar_enter()) - * check CPU_THREAD for each CPU; abort if owner running + * check CPU_THREAD for owner's t_cpu + * continue if owner running * membar #LoadLoad (via membar_consumer()) * check owner and waiters bit; abort if either changed * block @@ -133,7 +134,9 @@ * * The only requirements of code outside the mutex implementation are * (1) mutex_exit() preemption fixup in interrupt handlers or trap return, - * and (2) a membar #StoreLoad after setting CPU_THREAD in resume(). + * (2) a membar #StoreLoad after setting CPU_THREAD in resume(), + * (3) mutex_owner_running() preemption fixup in interrupt handlers + * or trap returns. * Note: idle threads cannot grab adaptive locks (since they cannot block), * so the membar may be safely omitted when resuming an idle thread. * @@ -199,27 +202,9 @@ * much reduction in memory traffic, but reduces the potential idle time. * The theory of the exponential delay code is to start with a short * delay loop and double the waiting time on each iteration, up to - * a preselected maximum. The BACKOFF_BASE provides the equivalent - * of 2 to 3 memory references delay for US-III+ and US-IV architectures. - * The BACKOFF_CAP is the equivalent of 50 to 100 memory references of - * time (less than 12 microseconds for a 1000 MHz system). - * - * To determine appropriate BACKOFF_BASE and BACKOFF_CAP values, - * studies on US-III+ and US-IV systems using 1 to 66 threads were - * done. A range of possible values were studied. - * Performance differences below 10 threads were not large. For - * systems with more threads, substantial increases in total lock - * throughput was observed with the given values. For cases where - * more than 20 threads were waiting on the same lock, lock throughput - * increased by a factor of 5 or more using the backoff algorithm. - * - * Some platforms may provide their own platform specific delay code, - * using plat_lock_delay(backoff). If it is available, plat_lock_delay - * is executed instead of the default delay code. + * a preselected maximum. */ -#pragma weak plat_lock_delay - #include <sys/param.h> #include <sys/time.h> #include <sys/cpuvar.h> @@ -236,9 +221,8 @@ #include <sys/cpu.h> #include <sys/stack.h> #include <sys/archsystm.h> - -#define BACKOFF_BASE 50 -#define BACKOFF_CAP 1600 +#include <sys/machsystm.h> +#include <sys/x_call.h> /* * The sobj_ops vector exports a set of functions needed when a thread @@ -268,6 +252,89 @@ mutex_panic(char *msg, mutex_impl_t *lp) msg, lp, MUTEX_OWNER(&panic_mutex), curthread); } +/* "tunables" for per-platform backoff constants. */ +uint_t mutex_backoff_cap = 0; +ushort_t mutex_backoff_base = MUTEX_BACKOFF_BASE; +ushort_t mutex_cap_factor = MUTEX_CAP_FACTOR; +uchar_t mutex_backoff_shift = MUTEX_BACKOFF_SHIFT; + +void +mutex_sync(void) +{ + MUTEX_SYNC(); +} + +/* calculate the backoff interval */ +static uint_t +default_lock_backoff(uint_t backoff) +{ + uint_t cap; /* backoff cap calculated */ + + if (backoff == 0) { + backoff = mutex_backoff_base; + /* first call just sets the base */ + return (backoff); + } + + /* set cap */ + if (mutex_backoff_cap == 0) { + /* + * For a contended lock, in the worst case a load + cas may + * be queued at the controller for each contending CPU. + * Therefore, to avoid queueing, the accesses for all CPUS must + * be spread out in time over an interval of (ncpu * + * cap-factor). Maximum backoff is set to this value, and + * actual backoff is a random number from 0 to the current max. + */ + cap = ncpus_online * mutex_cap_factor; + } else { + cap = mutex_backoff_cap; + } + + /* calculate new backoff value */ + backoff <<= mutex_backoff_shift; /* increase backoff */ + if (backoff > cap) { + if (cap < mutex_backoff_base) + backoff = mutex_backoff_base; + else + backoff = cap; + } + + return (backoff); +} + +/* + * default delay function for mutexes. + */ +static void +default_lock_delay(uint_t backoff) +{ + ulong_t rnd; /* random factor */ + uint_t cur_backoff; /* calculated backoff */ + uint_t backctr; + + /* + * Modify backoff by a random amount to avoid lockstep, and to + * make it probable that some thread gets a small backoff, and + * re-checks quickly + */ + rnd = (((long)curthread >> PTR24_LSB) ^ (long)MUTEX_GETTICK()); + cur_backoff = (uint_t)(rnd % (backoff - mutex_backoff_base + 1)) + + mutex_backoff_base; + + /* + * Delay before trying + * to touch the mutex data structure. + */ + for (backctr = cur_backoff; backctr; backctr--) { + MUTEX_DELAY(); + }; +} + +uint_t (*mutex_lock_backoff)(uint_t) = default_lock_backoff; +void (*mutex_lock_delay)(uint_t) = default_lock_delay; +void (*mutex_delay)(void) = mutex_delay_default; + /* * mutex_vector_enter() is called from the assembly mutex_enter() routine * if the lock is held or is not of type MUTEX_ADAPTIVE. @@ -276,15 +343,15 @@ void mutex_vector_enter(mutex_impl_t *lp) { kthread_id_t owner; + kthread_id_t lastowner = MUTEX_NO_OWNER; /* track owner changes */ hrtime_t sleep_time = 0; /* how long we slept */ uint_t spin_count = 0; /* how many times we spun */ - cpu_t *cpup, *last_cpu; - extern cpu_t *cpu_list; + cpu_t *cpup; turnstile_t *ts; volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp; - int backoff; /* current backoff */ - int backctr; /* ctr for backoff */ + uint_t backoff = 0; /* current backoff */ int sleep_count = 0; + int changecnt = 0; /* count of owner changes */ ASSERT_STACK_ALIGNED(); @@ -314,42 +381,31 @@ mutex_vector_enter(mutex_impl_t *lp) CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1); - if (&plat_lock_delay) { - backoff = 0; - } else { - backoff = BACKOFF_BASE; - } - + backoff = mutex_lock_backoff(0); /* set base backoff */ for (;;) { -spin: spin_count++; - /* - * Add an exponential backoff delay before trying again - * to touch the mutex data structure. - * the spin_count test and call to nulldev are to prevent - * the compiler optimizer from eliminating the delay loop. - */ - if (&plat_lock_delay) { - plat_lock_delay(&backoff); - } else { - for (backctr = backoff; backctr; backctr--) { - if (!spin_count) (void) nulldev(); - }; /* delay */ - backoff = backoff << 1; /* double it */ - if (backoff > BACKOFF_CAP) { - backoff = BACKOFF_CAP; - } - - SMT_PAUSE(); - } + mutex_lock_delay(backoff); /* backoff delay */ if (panicstr) return; if ((owner = MUTEX_OWNER(vlp)) == NULL) { - if (mutex_adaptive_tryenter(lp)) + if (mutex_adaptive_tryenter(lp)) { break; + } + /* increase backoff only on failed attempt. */ + backoff = mutex_lock_backoff(backoff); + changecnt++; continue; + } else if (lastowner != owner) { + lastowner = owner; + backoff = mutex_lock_backoff(backoff); + changecnt++; + } + + if (changecnt >= ncpus_online) { + backoff = mutex_lock_backoff(0); + changecnt = 0; } if (owner == curthread) @@ -362,26 +418,9 @@ spin: if (owner == MUTEX_NO_OWNER) continue; - /* - * When searching the other CPUs, start with the one where - * we last saw the owner thread. If owner is running, spin. - * - * We must disable preemption at this point to guarantee - * that the list doesn't change while we traverse it - * without the cpu_lock mutex. While preemption is - * disabled, we must revalidate our cached cpu pointer. - */ - kpreempt_disable(); - if (cpup->cpu_next == NULL) - cpup = cpu_list; - last_cpu = cpup; /* mark end of search */ - do { - if (cpup->cpu_thread == owner) { - kpreempt_enable(); - goto spin; - } - } while ((cpup = cpup->cpu_next) != last_cpu); - kpreempt_enable(); + if (mutex_owner_running(lp) != NULL) { + continue; + } /* * The owner appears not to be running, so block. @@ -394,19 +433,11 @@ spin: /* * Recheck whether owner is running after waiters bit hits * global visibility (above). If owner is running, spin. - * - * Since we are at ipl DISP_LEVEL, kernel preemption is - * disabled, however we still need to revalidate our cached - * cpu pointer to make sure the cpu hasn't been deleted. */ - if (cpup->cpu_next == NULL) - last_cpu = cpup = cpu_list; - do { - if (cpup->cpu_thread == owner) { - turnstile_exit(lp); - goto spin; - } - } while ((cpup = cpup->cpu_next) != last_cpu); + if (mutex_owner_running(lp) != NULL) { + turnstile_exit(lp); + continue; + } membar_consumer(); /* @@ -418,6 +449,8 @@ spin: &mutex_sobj_ops, NULL, NULL); sleep_time += gethrtime(); sleep_count++; + /* reset backoff after turnstile */ + backoff = mutex_lock_backoff(0); } else { turnstile_exit(lp); } @@ -436,9 +469,10 @@ spin: /* * We do not count a sleep as a spin. */ - if (spin_count > sleep_count) + if (spin_count > sleep_count) { LOCKSTAT_RECORD(LS_MUTEX_ENTER_SPIN, lp, spin_count - sleep_count); + } LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp); } @@ -585,8 +619,8 @@ void lock_set_spin(lock_t *lp) { int spin_count = 1; - int backoff; /* current backoff */ - int backctr; /* ctr for backoff */ + int loop_count = 0; + uint_t backoff = 0; /* current backoff */ if (panicstr) return; @@ -594,36 +628,19 @@ lock_set_spin(lock_t *lp) if (ncpus == 1) panic("lock_set: %p lock held and only one CPU", lp); - if (&plat_lock_delay) { - backoff = 0; - } else { - backoff = BACKOFF_BASE; - } - while (LOCK_HELD(lp) || !lock_spin_try(lp)) { if (panicstr) return; spin_count++; - /* - * Add an exponential backoff delay before trying again - * to touch the mutex data structure. - * the spin_count test and call to nulldev are to prevent - * the compiler optimizer from eliminating the delay loop. - */ - if (&plat_lock_delay) { - plat_lock_delay(&backoff); - } else { - /* delay */ - for (backctr = backoff; backctr; backctr--) { - if (!spin_count) (void) nulldev(); - } + loop_count++; - backoff = backoff << 1; /* double it */ - if (backoff > BACKOFF_CAP) { - backoff = BACKOFF_CAP; - } - SMT_PAUSE(); + if (ncpus_online == loop_count) { + backoff = mutex_lock_backoff(0); + loop_count = 0; + } else { + backoff = mutex_lock_backoff(backoff); } + mutex_lock_delay(backoff); } if (spin_count) { @@ -637,8 +654,8 @@ void lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil) { int spin_count = 1; - int backoff; /* current backoff */ - int backctr; /* ctr for backoff */ + int loop_count = 0; + uint_t backoff = 0; /* current backoff */ if (panicstr) return; @@ -648,38 +665,23 @@ lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil) ASSERT(new_pil > LOCK_LEVEL); - if (&plat_lock_delay) { - backoff = 0; - } else { - backoff = BACKOFF_BASE; - } do { splx(old_pil); while (LOCK_HELD(lp)) { + spin_count++; + loop_count++; + if (panicstr) { *old_pil_addr = (ushort_t)splr(new_pil); return; } - spin_count++; - /* - * Add an exponential backoff delay before trying again - * to touch the mutex data structure. - * spin_count test and call to nulldev are to prevent - * compiler optimizer from eliminating the delay loop. - */ - if (&plat_lock_delay) { - plat_lock_delay(&backoff); + if (ncpus_online == loop_count) { + backoff = mutex_lock_backoff(0); + loop_count = 0; } else { - for (backctr = backoff; backctr; backctr--) { - if (!spin_count) (void) nulldev(); - } - backoff = backoff << 1; /* double it */ - if (backoff > BACKOFF_CAP) { - backoff = BACKOFF_CAP; - } - - SMT_PAUSE(); + backoff = mutex_lock_backoff(backoff); } + mutex_lock_delay(backoff); } old_pil = splr(new_pil); } while (!lock_spin_try(lp)); diff --git a/usr/src/uts/common/sys/mutex.h b/usr/src/uts/common/sys/mutex.h index 60e81e88f8..53d1e28e15 100644 --- a/usr/src/uts/common/sys/mutex.h +++ b/usr/src/uts/common/sys/mutex.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -83,7 +83,16 @@ extern int mutex_tryenter(kmutex_t *); extern void mutex_exit(kmutex_t *); extern int mutex_owned(kmutex_t *); extern struct _kthread *mutex_owner(kmutex_t *); -extern void plat_lock_delay(int *); + +extern ushort_t mutex_backoff_base; +extern uint_t mutex_backoff_cap; +extern ushort_t mutex_cap_factor; +extern uchar_t mutex_backoff_shift; +extern void (*mutex_lock_delay)(uint_t); +extern uint_t (*mutex_lock_backoff)(uint_t); +extern void (*mutex_delay)(void); +extern void mutex_delay_default(void); +extern void mutex_sync(void); #endif /* _KERNEL */ diff --git a/usr/src/uts/i86pc/os/intr.c b/usr/src/uts/i86pc/os/intr.c index f68b96acd1..12d4304973 100644 --- a/usr/src/uts/i86pc/os/intr.c +++ b/usr/src/uts/i86pc/os/intr.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -999,6 +999,8 @@ sys_rtt_common(struct regs *rp) kthread_t *tp; extern void mutex_exit_critical_start(); extern long mutex_exit_critical_size; + extern void mutex_owner_running_critical_start(); + extern long mutex_owner_running_critical_size; loop: @@ -1076,6 +1078,19 @@ loop: mutex_exit_critical_size) { rp->r_pc = (greg_t)mutex_exit_critical_start; } + + /* + * If we interrupted the mutex_owner_running() critical region we + * must reset the PC back to the beginning to prevent dereferencing + * of a freed thread pointer. See the comments in mutex_owner_running + * for details. + */ + if ((uintptr_t)rp->r_pc - + (uintptr_t)mutex_owner_running_critical_start < + mutex_owner_running_critical_size) { + rp->r_pc = (greg_t)mutex_owner_running_critical_start; + } + return (0); } diff --git a/usr/src/uts/intel/ia32/ml/lock_prim.s b/usr/src/uts/intel/ia32/ml/lock_prim.s index 8dc51e3eeb..884ca02de8 100644 --- a/usr/src/uts/intel/ia32/ml/lock_prim.s +++ b/usr/src/uts/intel/ia32/ml/lock_prim.s @@ -30,11 +30,11 @@ #include <sys/thread.h> #include <sys/cpuvar.h> #include <vm/page.h> -#include <sys/mutex_impl.h> #else /* __lint */ #include "assym.h" #endif /* __lint */ +#include <sys/mutex_impl.h> #include <sys/asm_linkage.h> #include <sys/asm_misc.h> #include <sys/regset.h> @@ -665,6 +665,34 @@ mutex_exit(kmutex_t *lp) ret SET_SIZE(mutex_adaptive_tryenter) + .globl mutex_owner_running_critical_start + + ENTRY(mutex_owner_running) +mutex_owner_running_critical_start: + movq (%rdi), %r11 /* get owner field */ + andq $MUTEX_THREAD, %r11 /* remove waiters bit */ + cmpq $0, %r11 /* if free, skip */ + je 1f /* go return 0 */ + movq T_CPU(%r11), %r8 /* get owner->t_cpu */ + movq CPU_THREAD(%r8), %r9 /* get t_cpu->cpu_thread */ +.mutex_owner_running_critical_end: + cmpq %r11, %r9 /* owner == running thread? */ + je 2f /* yes, go return cpu */ +1: + xorq %rax, %rax /* return 0 */ + ret +2: + movq %r8, %rax /* return cpu */ + ret + SET_SIZE(mutex_owner_running) + + .globl mutex_owner_running_critical_size + .type mutex_owner_running_critical_size, @object + .align CPTRSIZE +mutex_owner_running_critical_size: + .quad .mutex_owner_running_critical_end - mutex_owner_running_critical_start + SET_SIZE(mutex_owner_running_critical_size) + .globl mutex_exit_critical_start ENTRY(mutex_exit) @@ -806,7 +834,36 @@ mutex_exit_critical_size: ret SET_SIZE(mutex_adaptive_tryenter) - .globl mutex_exit_critical_size + .globl mutex_owner_running_critical_start + + ENTRY(mutex_owner_running) +mutex_owner_running_critical_start: + movl 4(%esp), %eax /* get owner field */ + movl (%eax), %eax + andl $MUTEX_THREAD, %eax /* remove waiters bit */ + cmpl $0, %eax /* if free, skip */ + je 1f /* go return 0 */ + movl T_CPU(%eax), %ecx /* get owner->t_cpu */ + movl CPU_THREAD(%ecx), %edx /* get t_cpu->cpu_thread */ +.mutex_owner_running_critical_end: + cmpl %eax, %edx /* owner == running thread? */ + je 2f /* yes, go return cpu */ +1: + xorl %eax, %eax /* return 0 */ + ret +2: + movl %ecx, %eax /* return cpu */ + ret + + SET_SIZE(mutex_owner_running) + + .globl mutex_owner_running_critical_size + .type mutex_owner_running_critical_size, @object + .align CPTRSIZE +mutex_owner_running_critical_size: + .long .mutex_owner_running_critical_end - mutex_owner_running_critical_start + SET_SIZE(mutex_owner_running_critical_size) + .globl mutex_exit_critical_start ENTRY(mutex_exit) @@ -1398,3 +1455,43 @@ thread_onproc(kthread_id_t t, cpu_t *cp) #endif /* !__amd64 */ #endif /* __lint */ + +/* + * mutex_delay_default(void) + * Spins for approx a few hundred processor cycles and returns to caller. + */ + +#if defined(lint) || defined(__lint) + +void +mutex_delay_default(void) +{} + +#else /* __lint */ + +#if defined(__amd64) + + ENTRY(mutex_delay_default) + movq $92,%r11 +0: decq %r11 + jg 0b + ret + SET_SIZE(mutex_delay_default) + +#else + + ENTRY(mutex_delay_default) + push %ebp + movl %esp,%ebp + andl $-16,%esp + push %ebx + movl $93,%ebx +0: decl %ebx + jg 0b + pop %ebx + leave + ret + SET_SIZE(mutex_delay_default) + +#endif /* !__amd64 */ +#endif /* __lint */ diff --git a/usr/src/uts/intel/sys/mutex_impl.h b/usr/src/uts/intel/sys/mutex_impl.h index bcab84a979..c8cff15c2a 100644 --- a/usr/src/uts/intel/sys/mutex_impl.h +++ b/usr/src/uts/intel/sys/mutex_impl.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -98,8 +97,31 @@ typedef union mutex_impl { #define MUTEX_DESTROY(lp) \ (lp)->m_owner = ((uintptr_t)curthread | MUTEX_DEAD) +/* mutex backoff delay macro and constants */ +#define MUTEX_BACKOFF_BASE 1 +#define MUTEX_BACKOFF_SHIFT 2 +#define MUTEX_CAP_FACTOR 64 +#define MUTEX_DELAY() { \ + mutex_delay(); \ + SMT_PAUSE(); \ + } + +/* low overhead clock read */ +#define MUTEX_GETTICK() tsc_read() +extern void null_xcall(void); +#define MUTEX_SYNC() { \ + cpuset_t set; \ + CPUSET_ALL(set); \ + xc_call(0, 0, 0, X_CALL_HIPRI, set, \ + (xc_func_t)null_xcall); \ + } extern int mutex_adaptive_tryenter(mutex_impl_t *); +extern void *mutex_owner_running(mutex_impl_t *); + +#else /* _ASM */ + +#define MUTEX_THREAD -0x8 #endif /* _ASM */ diff --git a/usr/src/uts/sparc/v9/ml/lock_prim.s b/usr/src/uts/sparc/v9/ml/lock_prim.s index 064e11dc32..4a58bacd01 100644 --- a/usr/src/uts/sparc/v9/ml/lock_prim.s +++ b/usr/src/uts/sparc/v9/ml/lock_prim.s @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -377,6 +376,11 @@ void mutex_exit(kmutex_t *lp) {} +/* ARGSUSED */ +void * +mutex_owner_running(mutex_impl_t *lp) +{ return (NULL); } + #else .align 32 ENTRY(mutex_enter) @@ -419,8 +423,12 @@ mutex_exit(kmutex_t *lp) mov %g0, %o0 SET_SIZE(mutex_adaptive_tryenter) + ! these need to be together and cache aligned for performance. + .align 64 .global mutex_exit_critical_size .global mutex_exit_critical_start + .global mutex_owner_running_critical_size + .global mutex_owner_running_critical_start mutex_exit_critical_size = .mutex_exit_critical_end - mutex_exit_critical_start @@ -441,6 +449,30 @@ mutex_exit_critical_start: ! If we are interrupted, restart here nop SET_SIZE(mutex_exit) +mutex_owner_running_critical_size = .mutex_owner_running_critical_end - mutex_owner_running_critical_start + + .align 32 + + ENTRY(mutex_owner_running) +mutex_owner_running_critical_start: ! If interrupted restart here + ldn [%o0], %o1 ! get the owner field + and %o1, MUTEX_THREAD, %o1 ! remove the waiters bit if any + brz,pn %o1, 1f ! if so, drive on ... + nop + ldn [%o1+T_CPU], %o2 ! get owner->t_cpu + ldn [%o2+CPU_THREAD], %o3 ! get owner->t_cpu->cpu_thread +.mutex_owner_running_critical_end: ! for pil_interrupt() hook + cmp %o1, %o3 ! owner == running thread? + be,a,pt %xcc, 2f ! yes, go return cpu + nop +1: + retl + mov %g0, %o0 ! return 0 (owner not running) +2: + retl + mov %o2, %o0 ! owner running, return cpu + SET_SIZE(mutex_owner_running) + #endif /* lint */ /* @@ -729,3 +761,60 @@ thread_onproc(kthread_id_t t, cpu_t *cp) SET_SIZE(thread_onproc) #endif /* lint */ + +/* delay function used in some mutex code - just do 3 nop cas ops */ +#if defined(lint) + +/* ARGSUSED */ +void +cas_delay(void *addr) +{} +#else /* lint */ + ENTRY(cas_delay) + casx [%o0], %g0, %g0 + casx [%o0], %g0, %g0 + retl + casx [%o0], %g0, %g0 + SET_SIZE(cas_delay) +#endif /* lint */ + +#if defined(lint) + +/* + * alternative delay function for some niagara processors. The rd + * instruction uses less resources than casx on those cpus. + */ +/* ARGSUSED */ +void +rdccr_delay(void) +{} +#else /* lint */ + ENTRY(rdccr_delay) + rd %ccr, %g0 + rd %ccr, %g0 + retl + rd %ccr, %g0 + SET_SIZE(rdccr_delay) +#endif /* lint */ + +/* + * mutex_delay_default(void) + * Spins for approx a few hundred processor cycles and returns to caller. + */ +#if defined(lint) + +void +mutex_delay_default(void) +{} + +#else /* lint */ + + ENTRY(mutex_delay_default) + mov 72,%o0 +1: brgz %o0, 1b + dec %o0 + retl + nop + SET_SIZE(mutex_delay_default) + +#endif /* lint */ diff --git a/usr/src/uts/sparc/v9/sys/mutex_impl.h b/usr/src/uts/sparc/v9/sys/mutex_impl.h index d1236685fe..70b717eecf 100644 --- a/usr/src/uts/sparc/v9/sys/mutex_impl.h +++ b/usr/src/uts/sparc/v9/sys/mutex_impl.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 1991-1998,2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -38,6 +37,7 @@ extern "C" { #endif +#define MUTEX_THREAD (-0x8) #ifndef _ASM /* @@ -96,7 +96,23 @@ typedef union mutex_impl { #define MUTEX_DESTROY(lp) \ (lp)->m_owner = ((uintptr_t)curthread | MUTEX_DEAD) +#define MUTEX_BACKOFF_BASE 1 +#define MUTEX_BACKOFF_SHIFT 1 +#define MUTEX_CAP_FACTOR 8 +#define MUTEX_DELAY() { \ + mutex_delay(); \ + } + +/* low-overhead clock read */ +extern u_longlong_t gettick(void); +#define MUTEX_GETTICK() gettick() +extern void null_xcall(void); +#define MUTEX_SYNC() xc_all((xcfunc_t *)null_xcall, 0, 0) + +extern void cas_delay(void *); +extern void rdccr_delay(void); extern int mutex_adaptive_tryenter(mutex_impl_t *); +extern void *mutex_owner_running(mutex_impl_t *); #endif /* _ASM */ diff --git a/usr/src/uts/sun4u/ml/mach_locore.s b/usr/src/uts/sun4u/ml/mach_locore.s index 9393dd2f7a..4447483f26 100644 --- a/usr/src/uts/sun4u/ml/mach_locore.s +++ b/usr/src/uts/sun4u/ml/mach_locore.s @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -802,6 +802,23 @@ rtt_ctx_end: movg %xcc, %o0, %l6 ! if current is lower, drop old pil 1: ! + ! If we interrupted the mutex_owner_running() critical region we + ! must reset the PC and nPC back to the beginning to prevent missed + ! wakeups. See the comments in mutex_owner_running() for details. + ! + ldn [%l7 + PC_OFF], %l0 + set mutex_owner_running_critical_start, %l1 + sub %l0, %l1, %l0 + cmp %l0, mutex_owner_running_critical_size + bgeu,pt %xcc, 2f + mov THREAD_REG, %l0 + stn %l1, [%l7 + PC_OFF] ! restart mutex_owner_running() + add %l1, 4, %l1 + ba,pt %xcc, common_rtt + stn %l1, [%l7 + nPC_OFF] + +2: + ! ! If we interrupted the mutex_exit() critical region we must reset ! the PC and nPC back to the beginning to prevent missed wakeups. ! See the comments in mutex_exit() for details. diff --git a/usr/src/uts/sun4u/opl/os/opl.c b/usr/src/uts/sun4u/opl/os/opl.c index f33b231117..071f00c142 100644 --- a/usr/src/uts/sun4u/opl/os/opl.c +++ b/usr/src/uts/sun4u/opl/os/opl.c @@ -102,19 +102,17 @@ static void pass2xscf_thread(); * Note FF/DC out-of-order instruction engine takes only a * single cycle to execute each spin loop * for comparison, Panther takes 6 cycles for same loop - * 1500 approx nsec for OPL sleep instruction - * if spin count = OPL_BOFF_SLEEP*OPL_BOFF_SPIN then - * spin time should be equal to OPL_BOFF_TM nsecs - * Listed values tuned for 2.15GHz to 2.4GHz systems + * OPL_BOFF_SPIN = base spin loop, roughly one memory reference time + * OPL_BOFF_TM = approx nsec for OPL sleep instruction (1600 for OPL-C) + * OPL_BOFF_SLEEP = approx number of SPIN iterations to equal one sleep + * OPL_BOFF_MAX_SCALE - scaling factor for max backoff based on active cpus + * Listed values tuned for 2.15GHz to 2.64GHz systems * Value may change for future systems */ -#define OPL_BOFF_SPIN 720 -#define OPL_BOFF_BASE 1 -#define OPL_BOFF_SLEEP 5 -#define OPL_BOFF_CAP1 20 -#define OPL_BOFF_CAP2 60 -#define OPL_BOFF_MAX (40 * OPL_BOFF_SLEEP) -#define OPL_BOFF_TM 1500 +#define OPL_BOFF_SPIN 7 +#define OPL_BOFF_SLEEP 4 +#define OPL_BOFF_TM 1600 +#define OPL_BOFF_MAX_SCALE 8 #define OPL_CLOCK_TICK_THRESHOLD 128 #define OPL_CLOCK_TICK_NCPUS 64 @@ -946,6 +944,9 @@ plat_startup_memlist(caddr_t alloc_base) return (tmp_alloc_base); } +/* need to forward declare these */ +static void plat_lock_delay(uint_t); + void startup_platform(void) { @@ -953,6 +954,8 @@ startup_platform(void) clock_tick_threshold = OPL_CLOCK_TICK_THRESHOLD; if (clock_tick_ncpus == 0) clock_tick_ncpus = OPL_CLOCK_TICK_NCPUS; + mutex_lock_delay = plat_lock_delay; + mutex_cap_factor = OPL_BOFF_MAX_SCALE; } void @@ -997,13 +1000,12 @@ plat_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) } void -plat_lock_delay(int *backoff) +plat_lock_delay(uint_t backoff) { int i; - int cnt; - int flag; + uint_t cnt, remcnt; int ctr; - hrtime_t delay_start; + hrtime_t delay_start, rem_delay; /* * Platform specific lock delay code for OPL * @@ -1012,32 +1014,26 @@ plat_lock_delay(int *backoff) * but is too large of granularity for the initial backoff. */ - if (*backoff == 0) *backoff = OPL_BOFF_BASE; - - flag = !*backoff; - - if (*backoff < OPL_BOFF_CAP1) { + if (backoff < 100) { /* * If desired backoff is long enough, * use sleep for most of it */ - for (cnt = *backoff; cnt >= OPL_BOFF_SLEEP; + for (cnt = backoff; + cnt >= OPL_BOFF_SLEEP; cnt -= OPL_BOFF_SLEEP) { cpu_smt_pause(); } /* * spin for small remainder of backoff - * - * fake call to nulldev included to prevent - * compiler from optimizing out the spin loop */ for (ctr = cnt * OPL_BOFF_SPIN; ctr; ctr--) { - if (flag) (void) nulldev(); + mutex_delay_default(); } } else { - /* backoff is very large. Fill it by sleeping */ + /* backoff is large. Fill it by sleeping */ delay_start = gethrtime(); - cnt = *backoff/OPL_BOFF_SLEEP; + cnt = backoff / OPL_BOFF_SLEEP; /* * use sleep instructions for delay */ @@ -1050,40 +1046,19 @@ plat_lock_delay(int *backoff) * then the sleep ends immediately with a minimum time of * 42 clocks. We check gethrtime to insure we have * waited long enough. And we include both a short - * spin loop and a sleep for any final delay time. + * spin loop and a sleep for repeated delay times. */ - while ((gethrtime() - delay_start) < cnt * OPL_BOFF_TM) { - cpu_smt_pause(); - for (ctr = OPL_BOFF_SPIN; ctr; ctr--) { - if (flag) (void) nulldev(); + rem_delay = gethrtime() - delay_start; + while (rem_delay < cnt * OPL_BOFF_TM) { + remcnt = cnt - (rem_delay / OPL_BOFF_TM); + for (i = 0; i < remcnt; i++) { + cpu_smt_pause(); + for (ctr = OPL_BOFF_SPIN; ctr; ctr--) { + mutex_delay_default(); + } } - } - } - - /* - * We adjust the backoff in three linear stages - * The initial stage has small increases as this phase is - * usually handle locks with light contention. We don't want - * to have a long backoff on a lock that is available. - * - * In the second stage, we are in transition, unsure whether - * the lock is under heavy contention. As the failures to - * obtain the lock increase, we back off further. - * - * For the final stage, we are in a heavily contended or - * long held long so we want to reduce the number of tries. - */ - if (*backoff < OPL_BOFF_CAP1) { - *backoff += 1; - } else { - if (*backoff < OPL_BOFF_CAP2) { - *backoff += OPL_BOFF_SLEEP; - } else { - *backoff += 2 * OPL_BOFF_SLEEP; - } - if (*backoff > OPL_BOFF_MAX) { - *backoff = OPL_BOFF_MAX; + rem_delay = gethrtime() - delay_start; } } } diff --git a/usr/src/uts/sun4u/serengeti/os/serengeti.c b/usr/src/uts/sun4u/serengeti/os/serengeti.c index 847384abee..fae7be521a 100644 --- a/usr/src/uts/sun4u/serengeti/os/serengeti.c +++ b/usr/src/uts/sun4u/serengeti/os/serengeti.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1297,6 +1297,9 @@ cpu_sgn_update(ushort_t sig, uchar_t state, uchar_t sub_state, int cpuid) void startup_platform(void) { + /* set per-platform constants for mutex backoff */ + mutex_backoff_base = 1; + mutex_cap_factor = 32; } /* diff --git a/usr/src/uts/sun4u/starcat/os/starcat.c b/usr/src/uts/sun4u/starcat/os/starcat.c index c00976dfc9..1a90d6adb5 100644 --- a/usr/src/uts/sun4u/starcat/os/starcat.c +++ b/usr/src/uts/sun4u/starcat/os/starcat.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1271,6 +1271,9 @@ starcat_dr_name(char *name) void startup_platform(void) { + /* set per platform constants for mutex backoff */ + mutex_backoff_base = 2; + mutex_cap_factor = 64; } /* diff --git a/usr/src/uts/sun4v/cpu/niagara.c b/usr/src/uts/sun4v/cpu/niagara.c index 3e797806fd..339c5373da 100644 --- a/usr/src/uts/sun4v/cpu/niagara.c +++ b/usr/src/uts/sun4v/cpu/niagara.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -58,6 +58,7 @@ #include <sys/niagararegs.h> #include <sys/trapstat.h> #include <sys/hsvc.h> +#include <sys/mutex_impl.h> #define NI_MMU_PAGESIZE_MASK ((1 << TTE8K) | (1 << TTE64K) | (1 << TTE4M) \ | (1 << TTE256M)) @@ -199,6 +200,24 @@ cpu_map_exec_units(struct cpu *cp) cp->cpu_m.cpu_mpipe = 0; } +void +cpu_mutex_delay(void) +{ + /* + * Dummy is the thread-private target of the cas. If multiple strands + * have the same kernel call stack, dummy could fall at the same VA and + * hence the same L2 cache bank. To avoid this, create multiple dummy + * words spread across several cache lines. + */ + struct { + long val; + long pad[7]; + } dummy[4]; + + long *ptr = &(dummy[CPU->cpu_seqid & 0x03].val); + cas_delay(ptr); +} + static int niagara_cpucnt; void @@ -212,6 +231,8 @@ cpu_init_private(struct cpu *cp) if ((niagara_cpucnt++ == 0) && (niagara_hsvc_available == B_TRUE)) niagara_kstat_init(); + + mutex_delay = cpu_mutex_delay; } /*ARGSUSED*/ diff --git a/usr/src/uts/sun4v/cpu/niagara2.c b/usr/src/uts/sun4v/cpu/niagara2.c index 18bacaa79f..17dd536659 100644 --- a/usr/src/uts/sun4v/cpu/niagara2.c +++ b/usr/src/uts/sun4v/cpu/niagara2.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -59,6 +59,7 @@ #include <sys/niagara2regs.h> #include <sys/hsvc.h> #include <sys/trapstat.h> +#include <sys/mutex_impl.h> uint_t root_phys_addr_lo_mask = 0xffffffffU; #if defined(NIAGARA2_IMPL) @@ -226,6 +227,8 @@ cpu_init_private(struct cpu *cp) if ((cpucnt++ == 0) && (cpu_hsvc_available == B_TRUE)) (void) niagara_kstat_init(); + + mutex_delay = rdccr_delay; } /*ARGSUSED*/ diff --git a/usr/src/uts/sun4v/ml/mach_locore.s b/usr/src/uts/sun4v/ml/mach_locore.s index 8e391ed480..0e970cd33c 100644 --- a/usr/src/uts/sun4v/ml/mach_locore.s +++ b/usr/src/uts/sun4v/ml/mach_locore.s @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -958,6 +958,23 @@ rtt_fill_end: movg %xcc, %o0, %l6 ! if current is lower, drop old pil 1: ! + ! If we interrupted the mutex_owner_running() critical region we + ! must reset ! the PC and nPC back to the beginning to prevent missed + ! wakeups. ! See the comments in mutex_exit() for details. + ! + ldn [%l7 + PC_OFF], %l0 + set mutex_owner_running_critical_start, %l1 + sub %l0, %l1, %l0 + cmp %l0, mutex_owner_running_critical_size + bgeu,pt %xcc, 2f + mov THREAD_REG, %l0 + stn %l1, [%l7 + PC_OFF] ! restart mutex_owner_running() + add %l1, 4, %l1 + ba,pt %xcc, common_rtt + stn %l1, [%l7 + nPC_OFF] + +2: + ! ! If we interrupted the mutex_exit() critical region we must reset ! the PC and nPC back to the beginning to prevent missed wakeups. ! See the comments in mutex_exit() for details. diff --git a/usr/src/uts/sun4v/os/fillsysinfo.c b/usr/src/uts/sun4v/os/fillsysinfo.c index 90390d5401..3cc50b8c63 100644 --- a/usr/src/uts/sun4v/os/fillsysinfo.c +++ b/usr/src/uts/sun4v/os/fillsysinfo.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -74,10 +74,13 @@ static int get_l2_cache_info(md_t *, mde_cookie_t, uint64_t *, uint64_t *, static void get_q_sizes(md_t *, mde_cookie_t); static void get_va_bits(md_t *, mde_cookie_t); static size_t get_ra_limit(md_t *); +static int get_l2_cache_node_count(md_t *); uint64_t system_clock_freq; uint_t niommu_tsbs = 0; +static int n_l2_caches = 0; + /* prevent compilation with VAC defined */ #ifdef VAC #error "The sun4v architecture does not support VAC" @@ -402,6 +405,9 @@ cpu_setup_common(char **cpu_module_isa_set) for (i = 0; i < nocpus; i++) fill_cpu(mdp, cpulist[i]); + /* setup l2 cache count. */ + n_l2_caches = get_l2_cache_node_count(mdp); + setup_chip_mappings(mdp); setup_exec_unit_mappings(mdp); @@ -792,6 +798,37 @@ get_va_bits(md_t *mdp, mde_cookie_t cpu_node_cookie) va_bits = DEFAULT_VA_ADDRESS_SPACE_BITS; } +int +l2_cache_node_count(void) +{ + return (n_l2_caches); +} + +/* + * count the number of l2 caches. + */ +int +get_l2_cache_node_count(md_t *mdp) +{ + int i; + mde_cookie_t *cachenodes; + uint64_t level; + int n_cachenodes = md_alloc_scan_dag(mdp, md_root_node(mdp), + "cache", "fwd", &cachenodes); + int l2_caches = 0; + + for (i = 0; i < n_cachenodes; i++) { + if (md_get_prop_val(mdp, cachenodes[i], "level", &level) != 0) { + level = 0; + } + if (level == 2) { + l2_caches++; + } + } + md_free_scan_dag(mdp, &cachenodes); + return (l2_caches); +} + /* * This routine returns the L2 cache information such as -- associativity, * size and linesize. diff --git a/usr/src/uts/sun4v/os/mach_startup.c b/usr/src/uts/sun4v/os/mach_startup.c index 333212b4f5..7446aff433 100644 --- a/usr/src/uts/sun4v/os/mach_startup.c +++ b/usr/src/uts/sun4v/os/mach_startup.c @@ -39,6 +39,8 @@ #include <sys/traptrace.h> #include <sys/modctl.h> #include <sys/ldoms.h> +#include <sys/cpu_module.h> +#include <sys/mutex_impl.h> #include <vm/vm_dep.h> #ifdef TRAPTRACE @@ -306,6 +308,14 @@ startup_platform(void) clock_tick_threshold = SUN4V_CLOCK_TICK_THRESHOLD; if (clock_tick_ncpus == 0) clock_tick_ncpus = SUN4V_CLOCK_TICK_NCPUS; + /* set per-platform constants for mutex_backoff */ + mutex_backoff_base = 1; + mutex_cap_factor = 4; + if (l2_cache_node_count() > 1) { + /* VF for example */ + mutex_backoff_base = 2; + mutex_cap_factor = 16; + } } /* diff --git a/usr/src/uts/sun4v/sys/cpu_module.h b/usr/src/uts/sun4v/sys/cpu_module.h index b7254aba6c..6ebb761d6d 100644 --- a/usr/src/uts/sun4v/sys/cpu_module.h +++ b/usr/src/uts/sun4v/sys/cpu_module.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -160,6 +160,8 @@ void cpu_trapstat_data(void *buf, uint_t pgszs); | (1 << TTE4M)) void cpu_setup_common(char **); +int l2_cache_node_count(void); + void fill_cpu(md_t *, mde_cookie_t); int setup_cpu_common(int); int cleanup_cpu_common(int); |