diff options
| author | pm145316 <none@none> | 2007-03-28 15:01:23 -0700 |
|---|---|---|
| committer | pm145316 <none@none> | 2007-03-28 15:01:23 -0700 |
| commit | e603b7d4a53c0b52084ca06218e6eed01ae7d6f1 (patch) | |
| tree | 780405a07ef169a898812fb3b792b4e9397405e9 /usr/src | |
| parent | e86227432bc577be1ae10ffa1471a3c8831e4b72 (diff) | |
| download | illumos-gate-e603b7d4a53c0b52084ca06218e6eed01ae7d6f1.tar.gz | |
6486343 poor mutex performance on large OPL machines
Diffstat (limited to 'usr/src')
| -rw-r--r-- | usr/src/uts/common/os/mutex.c | 86 | ||||
| -rw-r--r-- | usr/src/uts/common/sys/mutex.h | 10 | ||||
| -rw-r--r-- | usr/src/uts/sun4u/opl/os/opl.c | 113 |
3 files changed, 177 insertions, 32 deletions
diff --git a/usr/src/uts/common/os/mutex.c b/usr/src/uts/common/os/mutex.c index 5559bf52dc..0b95f1f326 100644 --- a/usr/src/uts/common/os/mutex.c +++ b/usr/src/uts/common/os/mutex.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -212,8 +212,14 @@ * throughput was observed with the given values. For cases where * more than 20 threads were waiting on the same lock, lock throughput * increased by a factor of 5 or more using the backoff algorithm. + * + * Some platforms may provide their own platform specific delay code, + * using plat_lock_delay(backoff). If it is available, plat_lock_delay + * is executed instead of the default delay code. */ +#pragma weak plat_lock_delay + #include <sys/param.h> #include <sys/time.h> #include <sys/cpuvar.h> @@ -307,7 +313,11 @@ mutex_vector_enter(mutex_impl_t *lp) CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1); - backoff = BACKOFF_BASE; + if (&plat_lock_delay) { + backoff = 0; + } else { + backoff = BACKOFF_BASE; + } for (;;) { spin: @@ -318,15 +328,19 @@ spin: * the spin_count test and call to nulldev are to prevent * the compiler optimizer from eliminating the delay loop. */ - for (backctr = backoff; backctr; backctr--) { - if (!spin_count) (void) nulldev(); - }; /* delay */ - backoff = backoff << 1; /* double it */ - if (backoff > BACKOFF_CAP) { - backoff = BACKOFF_CAP; - } + if (&plat_lock_delay) { + plat_lock_delay(&backoff); + } else { + for (backctr = backoff; backctr; backctr--) { + if (!spin_count) (void) nulldev(); + }; /* delay */ + backoff = backoff << 1; /* double it */ + if (backoff > BACKOFF_CAP) { + backoff = BACKOFF_CAP; + } - SMT_PAUSE(); + SMT_PAUSE(); + } if (panicstr) return; @@ -579,7 +593,12 @@ lock_set_spin(lock_t *lp) if (ncpus == 1) panic("lock_set: %p lock held and only one CPU", lp); - backoff = BACKOFF_BASE; + if (&plat_lock_delay) { + backoff = 0; + } else { + backoff = BACKOFF_BASE; + } + while (LOCK_HELD(lp) || !lock_spin_try(lp)) { if (panicstr) return; @@ -590,15 +609,20 @@ lock_set_spin(lock_t *lp) * the spin_count test and call to nulldev are to prevent * the compiler optimizer from eliminating the delay loop. */ - for (backctr = backoff; backctr; backctr--) { /* delay */ - if (!spin_count) (void) nulldev(); - } + if (&plat_lock_delay) { + plat_lock_delay(&backoff); + } else { + /* delay */ + for (backctr = backoff; backctr; backctr--) { + if (!spin_count) (void) nulldev(); + } - backoff = backoff << 1; /* double it */ - if (backoff > BACKOFF_CAP) { - backoff = BACKOFF_CAP; + backoff = backoff << 1; /* double it */ + if (backoff > BACKOFF_CAP) { + backoff = BACKOFF_CAP; + } + SMT_PAUSE(); } - SMT_PAUSE(); } if (spin_count) { @@ -623,7 +647,11 @@ lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil) ASSERT(new_pil > LOCK_LEVEL); - backoff = BACKOFF_BASE; + if (&plat_lock_delay) { + backoff = 0; + } else { + backoff = BACKOFF_BASE; + } do { splx(old_pil); while (LOCK_HELD(lp)) { @@ -638,15 +666,19 @@ lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil) * spin_count test and call to nulldev are to prevent * compiler optimizer from eliminating the delay loop. */ - for (backctr = backoff; backctr; backctr--) { - if (!spin_count) (void) nulldev(); + if (&plat_lock_delay) { + plat_lock_delay(&backoff); + } else { + for (backctr = backoff; backctr; backctr--) { + if (!spin_count) (void) nulldev(); + } + backoff = backoff << 1; /* double it */ + if (backoff > BACKOFF_CAP) { + backoff = BACKOFF_CAP; + } + + SMT_PAUSE(); } - backoff = backoff << 1; /* double it */ - if (backoff > BACKOFF_CAP) { - backoff = BACKOFF_CAP; - } - - SMT_PAUSE(); } old_pil = splr(new_pil); } while (!lock_spin_try(lp)); diff --git a/usr/src/uts/common/sys/mutex.h b/usr/src/uts/common/sys/mutex.h index a754467ad5..60e81e88f8 100644 --- a/usr/src/uts/common/sys/mutex.h +++ b/usr/src/uts/common/sys/mutex.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,8 +19,8 @@ * CDDL HEADER END */ /* - * Copyright (c) 1991-1998 by Sun Microsystems, Inc. - * All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ #ifndef _SYS_MUTEX_H @@ -84,6 +83,7 @@ extern int mutex_tryenter(kmutex_t *); extern void mutex_exit(kmutex_t *); extern int mutex_owned(kmutex_t *); extern struct _kthread *mutex_owner(kmutex_t *); +extern void plat_lock_delay(int *); #endif /* _KERNEL */ diff --git a/usr/src/uts/sun4u/opl/os/opl.c b/usr/src/uts/sun4u/opl/os/opl.c index c8d8931447..0a544bf629 100644 --- a/usr/src/uts/sun4u/opl/os/opl.c +++ b/usr/src/uts/sun4u/opl/os/opl.c @@ -46,6 +46,8 @@ #include <sys/lgrp.h> #include <sys/memnode.h> #include <sys/sysmacros.h> +#include <sys/time.h> +#include <sys/cpu.h> #include <vm/vm_dep.h> int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *); @@ -88,6 +90,24 @@ static opl_model_info_t *opl_cur_model = NULL; static struct memlist *opl_memlist_per_board(struct memlist *ml); +/* + * Note FF/DC out-of-order instruction engine takes only a + * single cycle to execute each spin loop + * for comparison, Panther takes 6 cycles for same loop + * 1500 approx nsec for OPL sleep instruction + * if spin count = OPL_BOFF_SLEEP*OPL_BOFF_SPIN then + * spin time should be equal to OPL_BOFF_TM nsecs + * Listed values tuned for 2.15GHz to 2.4GHz systems + * Value may change for future systems + */ +#define OPL_BOFF_SPIN 720 +#define OPL_BOFF_BASE 1 +#define OPL_BOFF_SLEEP 5 +#define OPL_BOFF_CAP1 20 +#define OPL_BOFF_CAP2 60 +#define OPL_BOFF_MAX (40 * OPL_BOFF_SLEEP) +#define OPL_BOFF_TM 1500 + int set_platform_max_ncpus(void) { @@ -998,3 +1018,96 @@ plat_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) } return (opl_get_mem_addr(unum, sid, offset, addrp)); } + +void +plat_lock_delay(int *backoff) +{ + int i; + int cnt; + int flag; + int ctr; + hrtime_t delay_start; + /* + * Platform specific lock delay code for OPL + * + * Using staged linear increases in the delay. + * The sleep instruction is the preferred method of delay, + * but is too large of granularity for the initial backoff. + */ + + if (*backoff == 0) *backoff = OPL_BOFF_BASE; + + flag = !*backoff; + + if (*backoff < OPL_BOFF_CAP1) { + /* + * If desired backoff is long enough, + * use sleep for most of it + */ + for (cnt = *backoff; + cnt >= OPL_BOFF_SLEEP; + cnt -= OPL_BOFF_SLEEP) { + cpu_smt_pause(); + } + /* + * spin for small remainder of backoff + * + * fake call to nulldev included to prevent + * compiler from optimizing out the spin loop + */ + for (ctr = cnt * OPL_BOFF_SPIN; ctr; ctr--) { + if (flag) (void) nulldev(); + } + } else { + /* backoff is very large. Fill it by sleeping */ + delay_start = gethrtime(); + cnt = *backoff/OPL_BOFF_SLEEP; + /* + * use sleep instructions for delay + */ + for (i = 0; i < cnt; i++) { + cpu_smt_pause(); + } + + /* + * Note: if the other strand executes a sleep instruction, + * then the sleep ends immediately with a minimum time of + * 42 clocks. We check gethrtime to insure we have + * waited long enough. And we include both a short + * spin loop and a sleep for any final delay time. + */ + + while ((gethrtime() - delay_start) < cnt * OPL_BOFF_TM) { + cpu_smt_pause(); + for (ctr = OPL_BOFF_SPIN; ctr; ctr--) { + if (flag) (void) nulldev(); + } + } + } + + /* + * We adjust the backoff in three linear stages + * The initial stage has small increases as this phase is + * usually handle locks with light contention. We don't want + * to have a long backoff on a lock that is available. + * + * In the second stage, we are in transition, unsure whether + * the lock is under heavy contention. As the failures to + * obtain the lock increase, we back off further. + * + * For the final stage, we are in a heavily contended or + * long held long so we want to reduce the number of tries. + */ + if (*backoff < OPL_BOFF_CAP1) { + *backoff += 1; + } else { + if (*backoff < OPL_BOFF_CAP2) { + *backoff += OPL_BOFF_SLEEP; + } else { + *backoff += 2 * OPL_BOFF_SLEEP; + } + if (*backoff > OPL_BOFF_MAX) { + *backoff = OPL_BOFF_MAX; + } + } +} |
