summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorpm145316 <none@none>2007-03-28 15:01:23 -0700
committerpm145316 <none@none>2007-03-28 15:01:23 -0700
commite603b7d4a53c0b52084ca06218e6eed01ae7d6f1 (patch)
tree780405a07ef169a898812fb3b792b4e9397405e9 /usr/src
parente86227432bc577be1ae10ffa1471a3c8831e4b72 (diff)
downloadillumos-gate-e603b7d4a53c0b52084ca06218e6eed01ae7d6f1.tar.gz
6486343 poor mutex performance on large OPL machines
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/os/mutex.c86
-rw-r--r--usr/src/uts/common/sys/mutex.h10
-rw-r--r--usr/src/uts/sun4u/opl/os/opl.c113
3 files changed, 177 insertions, 32 deletions
diff --git a/usr/src/uts/common/os/mutex.c b/usr/src/uts/common/os/mutex.c
index 5559bf52dc..0b95f1f326 100644
--- a/usr/src/uts/common/os/mutex.c
+++ b/usr/src/uts/common/os/mutex.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -212,8 +212,14 @@
* throughput was observed with the given values. For cases where
* more than 20 threads were waiting on the same lock, lock throughput
* increased by a factor of 5 or more using the backoff algorithm.
+ *
+ * Some platforms may provide their own platform specific delay code,
+ * using plat_lock_delay(backoff). If it is available, plat_lock_delay
+ * is executed instead of the default delay code.
*/
+#pragma weak plat_lock_delay
+
#include <sys/param.h>
#include <sys/time.h>
#include <sys/cpuvar.h>
@@ -307,7 +313,11 @@ mutex_vector_enter(mutex_impl_t *lp)
CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1);
- backoff = BACKOFF_BASE;
+ if (&plat_lock_delay) {
+ backoff = 0;
+ } else {
+ backoff = BACKOFF_BASE;
+ }
for (;;) {
spin:
@@ -318,15 +328,19 @@ spin:
* the spin_count test and call to nulldev are to prevent
* the compiler optimizer from eliminating the delay loop.
*/
- for (backctr = backoff; backctr; backctr--) {
- if (!spin_count) (void) nulldev();
- }; /* delay */
- backoff = backoff << 1; /* double it */
- if (backoff > BACKOFF_CAP) {
- backoff = BACKOFF_CAP;
- }
+ if (&plat_lock_delay) {
+ plat_lock_delay(&backoff);
+ } else {
+ for (backctr = backoff; backctr; backctr--) {
+ if (!spin_count) (void) nulldev();
+ }; /* delay */
+ backoff = backoff << 1; /* double it */
+ if (backoff > BACKOFF_CAP) {
+ backoff = BACKOFF_CAP;
+ }
- SMT_PAUSE();
+ SMT_PAUSE();
+ }
if (panicstr)
return;
@@ -579,7 +593,12 @@ lock_set_spin(lock_t *lp)
if (ncpus == 1)
panic("lock_set: %p lock held and only one CPU", lp);
- backoff = BACKOFF_BASE;
+ if (&plat_lock_delay) {
+ backoff = 0;
+ } else {
+ backoff = BACKOFF_BASE;
+ }
+
while (LOCK_HELD(lp) || !lock_spin_try(lp)) {
if (panicstr)
return;
@@ -590,15 +609,20 @@ lock_set_spin(lock_t *lp)
* the spin_count test and call to nulldev are to prevent
* the compiler optimizer from eliminating the delay loop.
*/
- for (backctr = backoff; backctr; backctr--) { /* delay */
- if (!spin_count) (void) nulldev();
- }
+ if (&plat_lock_delay) {
+ plat_lock_delay(&backoff);
+ } else {
+ /* delay */
+ for (backctr = backoff; backctr; backctr--) {
+ if (!spin_count) (void) nulldev();
+ }
- backoff = backoff << 1; /* double it */
- if (backoff > BACKOFF_CAP) {
- backoff = BACKOFF_CAP;
+ backoff = backoff << 1; /* double it */
+ if (backoff > BACKOFF_CAP) {
+ backoff = BACKOFF_CAP;
+ }
+ SMT_PAUSE();
}
- SMT_PAUSE();
}
if (spin_count) {
@@ -623,7 +647,11 @@ lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil)
ASSERT(new_pil > LOCK_LEVEL);
- backoff = BACKOFF_BASE;
+ if (&plat_lock_delay) {
+ backoff = 0;
+ } else {
+ backoff = BACKOFF_BASE;
+ }
do {
splx(old_pil);
while (LOCK_HELD(lp)) {
@@ -638,15 +666,19 @@ lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil)
* spin_count test and call to nulldev are to prevent
* compiler optimizer from eliminating the delay loop.
*/
- for (backctr = backoff; backctr; backctr--) {
- if (!spin_count) (void) nulldev();
+ if (&plat_lock_delay) {
+ plat_lock_delay(&backoff);
+ } else {
+ for (backctr = backoff; backctr; backctr--) {
+ if (!spin_count) (void) nulldev();
+ }
+ backoff = backoff << 1; /* double it */
+ if (backoff > BACKOFF_CAP) {
+ backoff = BACKOFF_CAP;
+ }
+
+ SMT_PAUSE();
}
- backoff = backoff << 1; /* double it */
- if (backoff > BACKOFF_CAP) {
- backoff = BACKOFF_CAP;
- }
-
- SMT_PAUSE();
}
old_pil = splr(new_pil);
} while (!lock_spin_try(lp));
diff --git a/usr/src/uts/common/sys/mutex.h b/usr/src/uts/common/sys/mutex.h
index a754467ad5..60e81e88f8 100644
--- a/usr/src/uts/common/sys/mutex.h
+++ b/usr/src/uts/common/sys/mutex.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,8 +19,8 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 1991-1998 by Sun Microsystems, Inc.
- * All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
*/
#ifndef _SYS_MUTEX_H
@@ -84,6 +83,7 @@ extern int mutex_tryenter(kmutex_t *);
extern void mutex_exit(kmutex_t *);
extern int mutex_owned(kmutex_t *);
extern struct _kthread *mutex_owner(kmutex_t *);
+extern void plat_lock_delay(int *);
#endif /* _KERNEL */
diff --git a/usr/src/uts/sun4u/opl/os/opl.c b/usr/src/uts/sun4u/opl/os/opl.c
index c8d8931447..0a544bf629 100644
--- a/usr/src/uts/sun4u/opl/os/opl.c
+++ b/usr/src/uts/sun4u/opl/os/opl.c
@@ -46,6 +46,8 @@
#include <sys/lgrp.h>
#include <sys/memnode.h>
#include <sys/sysmacros.h>
+#include <sys/time.h>
+#include <sys/cpu.h>
#include <vm/vm_dep.h>
int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *);
@@ -88,6 +90,24 @@ static opl_model_info_t *opl_cur_model = NULL;
static struct memlist *opl_memlist_per_board(struct memlist *ml);
+/*
+ * Note FF/DC out-of-order instruction engine takes only a
+ * single cycle to execute each spin loop
+ * for comparison, Panther takes 6 cycles for same loop
+ * 1500 approx nsec for OPL sleep instruction
+ * if spin count = OPL_BOFF_SLEEP*OPL_BOFF_SPIN then
+ * spin time should be equal to OPL_BOFF_TM nsecs
+ * Listed values tuned for 2.15GHz to 2.4GHz systems
+ * Value may change for future systems
+ */
+#define OPL_BOFF_SPIN 720
+#define OPL_BOFF_BASE 1
+#define OPL_BOFF_SLEEP 5
+#define OPL_BOFF_CAP1 20
+#define OPL_BOFF_CAP2 60
+#define OPL_BOFF_MAX (40 * OPL_BOFF_SLEEP)
+#define OPL_BOFF_TM 1500
+
int
set_platform_max_ncpus(void)
{
@@ -998,3 +1018,96 @@ plat_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
}
return (opl_get_mem_addr(unum, sid, offset, addrp));
}
+
+void
+plat_lock_delay(int *backoff)
+{
+ int i;
+ int cnt;
+ int flag;
+ int ctr;
+ hrtime_t delay_start;
+ /*
+ * Platform specific lock delay code for OPL
+ *
+ * Using staged linear increases in the delay.
+ * The sleep instruction is the preferred method of delay,
+ * but is too large of granularity for the initial backoff.
+ */
+
+ if (*backoff == 0) *backoff = OPL_BOFF_BASE;
+
+ flag = !*backoff;
+
+ if (*backoff < OPL_BOFF_CAP1) {
+ /*
+ * If desired backoff is long enough,
+ * use sleep for most of it
+ */
+ for (cnt = *backoff;
+ cnt >= OPL_BOFF_SLEEP;
+ cnt -= OPL_BOFF_SLEEP) {
+ cpu_smt_pause();
+ }
+ /*
+ * spin for small remainder of backoff
+ *
+ * fake call to nulldev included to prevent
+ * compiler from optimizing out the spin loop
+ */
+ for (ctr = cnt * OPL_BOFF_SPIN; ctr; ctr--) {
+ if (flag) (void) nulldev();
+ }
+ } else {
+ /* backoff is very large. Fill it by sleeping */
+ delay_start = gethrtime();
+ cnt = *backoff/OPL_BOFF_SLEEP;
+ /*
+ * use sleep instructions for delay
+ */
+ for (i = 0; i < cnt; i++) {
+ cpu_smt_pause();
+ }
+
+ /*
+ * Note: if the other strand executes a sleep instruction,
+ * then the sleep ends immediately with a minimum time of
+ * 42 clocks. We check gethrtime to insure we have
+ * waited long enough. And we include both a short
+ * spin loop and a sleep for any final delay time.
+ */
+
+ while ((gethrtime() - delay_start) < cnt * OPL_BOFF_TM) {
+ cpu_smt_pause();
+ for (ctr = OPL_BOFF_SPIN; ctr; ctr--) {
+ if (flag) (void) nulldev();
+ }
+ }
+ }
+
+ /*
+ * We adjust the backoff in three linear stages
+ * The initial stage has small increases as this phase is
+ * usually handle locks with light contention. We don't want
+ * to have a long backoff on a lock that is available.
+ *
+ * In the second stage, we are in transition, unsure whether
+ * the lock is under heavy contention. As the failures to
+ * obtain the lock increase, we back off further.
+ *
+ * For the final stage, we are in a heavily contended or
+ * long held long so we want to reduce the number of tries.
+ */
+ if (*backoff < OPL_BOFF_CAP1) {
+ *backoff += 1;
+ } else {
+ if (*backoff < OPL_BOFF_CAP2) {
+ *backoff += OPL_BOFF_SLEEP;
+ } else {
+ *backoff += 2 * OPL_BOFF_SLEEP;
+ }
+ if (*backoff > OPL_BOFF_MAX) {
+ *backoff = OPL_BOFF_MAX;
+ }
+ }
+}