summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorpm145316 <none@none>2008-05-14 08:14:54 -0700
committerpm145316 <none@none>2008-05-14 08:14:54 -0700
commitcc85acda94d03f2a4939f6bc285d24effc27f179 (patch)
tree7a199867074fe3cebb0ba2d44c08ac19637fe555 /usr/src
parent602ca9ea8f9ce0933f0944601cc5d230e91a950d (diff)
downloadillumos-joyent-cc85acda94d03f2a4939f6bc285d24effc27f179.tar.gz
6625734 Multithreaded apps on M-series are not balanced across lgroups
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/sun4u/opl/os/opl.c47
1 files changed, 38 insertions, 9 deletions
diff --git a/usr/src/uts/sun4u/opl/os/opl.c b/usr/src/uts/sun4u/opl/os/opl.c
index b76fbae513..28df9c1cc3 100644
--- a/usr/src/uts/sun4u/opl/os/opl.c
+++ b/usr/src/uts/sun4u/opl/os/opl.c
@@ -562,22 +562,51 @@ plat_lgrp_init(void)
{
extern uint32_t lgrp_expand_proc_thresh;
extern uint32_t lgrp_expand_proc_diff;
+ const uint_t m = LGRP_LOADAVG_THREAD_MAX;
/*
* Set tuneables for the OPL architecture
*
- * lgrp_expand_proc_thresh is the minimum load on the lgroups
- * this process is currently running on before considering
- * expanding threads to another lgroup.
+ * lgrp_expand_proc_thresh is the threshold load on the set of
+ * lgroups a process is currently using on before considering
+ * adding another lgroup to the set. For Oly-C and Jupiter
+ * systems, there are four sockets per lgroup. Setting
+ * lgrp_expand_proc_thresh to add lgroups when the load reaches
+ * four threads will spread the load when it exceeds one thread
+ * per socket, optimizing memory bandwidth and L2 cache space.
*
- * lgrp_expand_proc_diff determines how much less the remote lgroup
- * must be loaded before expanding to it.
+ * lgrp_expand_proc_diff determines how much less another lgroup
+ * must be loaded before shifting the start location of a thread
+ * to it.
*
- * Since remote latencies can be costly, attempt to keep 3 threads
- * within the same lgroup before expanding to the next lgroup.
+ * lgrp_loadavg_tolerance is the threshold where two lgroups are
+ * considered to have different loads. It is set to be less than
+ * 1% so that even a small residual load will be considered different
+ * from no residual load.
+ *
+ * We note loadavg values are not precise.
+ * Every 1/10 of a second loadavg values are reduced by 5%.
+ * This adjustment can come in the middle of the lgroup selection
+ * process, and for larger parallel apps with many threads can
+ * frequently occur between the start of the second thread
+ * placement and the finish of the last thread placement.
+ * We also must be careful to not use too small of a threshold
+ * since the cumulative decay for 1 second idle time is 40%.
+ * That is, the residual load from completed threads will still
+ * be 60% one second after the proc goes idle or 8% after 5 seconds.
+ *
+ * To allow for lag time in loadavg calculations
+ * remote thresh = 3.75 * LGRP_LOADAVG_THREAD_MAX
+ * local thresh = 0.75 * LGRP_LOADAVG_THREAD_MAX
+ * tolerance = 0.0078 * LGRP_LOADAVG_THREAD_MAX
+ *
+ * The load placement algorithms consider LGRP_LOADAVG_THREAD_MAX
+ * as the equivalent of a load of 1. To make the code more compact,
+ * we set m = LGRP_LOADAVG_THREAD_MAX.
*/
- lgrp_expand_proc_thresh = LGRP_LOADAVG_THREAD_MAX * 3;
- lgrp_expand_proc_diff = LGRP_LOADAVG_THREAD_MAX;
+ lgrp_expand_proc_thresh = (m * 3) + (m >> 1) + (m >> 2);
+ lgrp_expand_proc_diff = (m >> 1) + (m >> 2);
+ lgrp_loadavg_tolerance = (m >> 7);
}
/*