summaryrefslogtreecommitdiff
path: root/qa/pconf/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'qa/pconf/cpu')
-rw-r--r--qa/pconf/cpu/GNUmakefile17
-rw-r--r--qa/pconf/cpu/aggregate_util34
-rw-r--r--qa/pconf/cpu/excess_fpe43
-rw-r--r--qa/pconf/cpu/load_average29
-rw-r--r--qa/pconf/cpu/single_util36
-rw-r--r--qa/pconf/cpu/system_calls29
-rw-r--r--qa/pconf/cpu/system_mode45
7 files changed, 233 insertions, 0 deletions
diff --git a/qa/pconf/cpu/GNUmakefile b/qa/pconf/cpu/GNUmakefile
new file mode 100644
index 0000000..3588b68
--- /dev/null
+++ b/qa/pconf/cpu/GNUmakefile
@@ -0,0 +1,17 @@
+#!gmake
+
+TOPDIR = ../../..
+include $(TOPDIR)/src/include/builddefs
+
+TESTDIR = $(PCP_VAR_DIR)/testsuite/pconf/cpu
+CONFIGS = aggregate_util excess_fpe load_average \
+ single_util system_calls system_mode
+LSRCFILES = $(CONFIGS)
+
+default default_pcp setup:
+
+install install_pcp:
+ $(INSTALL) -m 755 -d $(TESTDIR)
+ $(INSTALL) -m 644 $(CONFIGS) $(TESTDIR)
+
+include $(BUILDRULES)
diff --git a/qa/pconf/cpu/aggregate_util b/qa/pconf/cpu/aggregate_util
new file mode 100644
index 0000000..504264b
--- /dev/null
+++ b/qa/pconf/cpu/aggregate_util
@@ -0,0 +1,34 @@
+#pmieconf-rules 1
+#
+
+rule cpu.aggregate_util
+ default = "$rule$"
+ predicate =
+"some_host (
+ kernel.all.cpu.user $hosts$ +
+ kernel.all.cpu.sys $hosts$ +
+ kernel.all.cpu.intr $hosts$
+ > hinv.ncpu $hosts$ * $threshold$ / 100
+)"
+ enabled = no
+ version = 1
+ help =
+"The average processor utilization over all CPUs exceeded
+threshold percent during the last sample interval.";
+
+string rule
+ default = "aggregate processor saturation"
+ modify = no
+ display = no;
+
+percent threshold
+ default = 90
+ help =
+"Threshold percentage for CPU saturation, in the range 0 (idle)
+to 100 (completely busy), independent of the number of CPUs.";
+
+string action_expand
+ default = %h
+ display = no
+ modify = no;
+
diff --git a/qa/pconf/cpu/excess_fpe b/qa/pconf/cpu/excess_fpe
new file mode 100644
index 0000000..fe166eb
--- /dev/null
+++ b/qa/pconf/cpu/excess_fpe
@@ -0,0 +1,43 @@
+#pmieconf-rules 1
+#
+
+rule cpu.excess_fpe
+ default = "$rule$"
+ predicate =
+"some_host (
+ some_inst (
+ kernel.percpu.cpu.sys $hosts$ > $systime_util$ / 100 &&
+ kernel.percpu.syscall $hosts$ < $syscall_rate$
+ )
+)"
+ enabled = no
+ version = 1
+ help =
+"This predicate attempts to detect processes generating very large
+numbers of floating point exceptions (FPEs). Characteristic of this
+situation is heavy system time coupled with low system call rates
+(exceptions are delivered through the kernel to the process, taking
+some system time, but no system call is serviced on the applications
+behalf.";
+
+string rule
+ default = "possible high floating point exception rate"
+ modify = no
+ display = no;
+
+percent systime_util
+ default = 50
+ help =
+"Threshold percentage for kernel CPU utilization, in the range 0
+(idle) to 100 (completely busy)";
+
+double syscall_rate
+ default = 100
+ help =
+"Threshold system call rate at which something is deemed amiss.";
+
+string action_expand
+ default = %i@%h
+ display = no
+ modify = no;
+
diff --git a/qa/pconf/cpu/load_average b/qa/pconf/cpu/load_average
new file mode 100644
index 0000000..e5fa9bc
--- /dev/null
+++ b/qa/pconf/cpu/load_average
@@ -0,0 +1,29 @@
+#pmieconf-rules 1
+#
+
+rule cpu.load_average
+ default = "$rule$"
+ predicate =
+"some_host (
+ kernel.all.load $hosts$ #'1 minute'
+ > hinv.ncpu $hosts$ * $threshold$
+)"
+ enabled = yes
+ version = 1
+ help =
+"The current 1-minute load average is higher than threshold times the
+number of CPUs.
+The load average measures the number of processes that are running,
+runnable or soon to be runnable (i.e. in short term sleep).";
+
+string rule
+ default = "high 1-minute load average"
+ modify = no
+ display = no;
+
+double threshold
+ default = 1.5
+ help =
+"The threshold multiplier for load per CPU, typically in the range
+0.5 (very light load) to 4.0 (very heavy load ).";
+
diff --git a/qa/pconf/cpu/single_util b/qa/pconf/cpu/single_util
new file mode 100644
index 0000000..3f5dc81
--- /dev/null
+++ b/qa/pconf/cpu/single_util
@@ -0,0 +1,36 @@
+#pmieconf-rules 1
+#
+
+rule cpu.single_util
+ default = "$rule$"
+ predicate =
+"some_host (
+ some_inst (
+ kernel.percpu.cpu.user $hosts$ +
+ kernel.percpu.cpu.sys $hosts$ +
+ kernel.percpu.cpu.intr $hosts$
+ > $threshold$ / 100
+ )
+)"
+ enabled = no
+ version = 1
+ help =
+"The average processor utilization for at least one CPU exceeded
+threshold percent during the last sample interval.";
+
+string rule
+ default = "single processor saturation"
+ modify = no
+ display = no;
+
+percent threshold
+ default = 95
+ help =
+"Threshold percentage for CPU saturation, in the range 0 (idle)
+to 100 (completely busy)";
+
+string action_expand
+ default = %i@%h
+ display = no
+ modify = no;
+
diff --git a/qa/pconf/cpu/system_calls b/qa/pconf/cpu/system_calls
new file mode 100644
index 0000000..c2244e1
--- /dev/null
+++ b/qa/pconf/cpu/system_calls
@@ -0,0 +1,29 @@
+#pmieconf-rules 1
+#
+
+rule cpu.system_calls
+ default = "$rule$"
+ predicate =
+"some_host (
+ ( kernel.all.syscall $hosts$ / hinv.ncpu $hosts$ )
+ > $threshold$ count/sec
+)"
+ enabled = no
+ version = 1
+ help =
+"Average number of system calls per CPU per second exceeded threshold
+over the past sample interval.";
+
+string rule
+ default = "high average system call rate per CPU"
+ modify = no
+ display = no;
+
+double threshold
+ default = 2500
+ help =
+"The threshold of system calls per second per CPU. The appropriate
+value is a function of the processor type and the workload, but in
+the range 500 (a few, expensive systems calls) to 5000 (many,
+lightweight system calls) would be typical.";
+
diff --git a/qa/pconf/cpu/system_mode b/qa/pconf/cpu/system_mode
new file mode 100644
index 0000000..bdecce7
--- /dev/null
+++ b/qa/pconf/cpu/system_mode
@@ -0,0 +1,45 @@
+#pmieconf-rules 1
+#
+
+rule cpu.system_mode
+ default = "$rule$"
+ predicate =
+"some_host (
+ ( kernel.all.cpu.user $hosts$
+ + kernel.all.cpu.sys $hosts$
+ > hinv.ncpu $hosts$ * $busy$ / 100 ) &&
+ ( kernel.all.cpu.sys $hosts$ /
+ ( kernel.all.cpu.user $hosts$
+ + kernel.all.cpu.sys $hosts$ )
+ > $threshold$ / 100 )
+)"
+ enabled = yes
+ version = 1
+ help =
+"The average processor utilization over all CPUs was at least busy
+percent during the last sample interval, and the ratio of system time
+to busy time exceeded threshold percent.";
+
+string rule
+ default = "busy executing in system mode"
+ modify = no
+ display = no;
+
+percent busy
+ default = 75
+ help =
+"Busy percentage for average CPU utilization, in the range 0 (idle)
+to 100 (completely busy), independent of the number of CPUs.";
+
+percent threshold
+ default = 80
+ help =
+"Threshold percentage for system time as a fraction of the non-idle
+CPU time, in the range 0 (no system time) to 100 (all system time),
+independent of the number of CPUs.";
+
+string action_expand
+ default = "%h"
+ display = no
+ modify = no;
+