diff options
Diffstat (limited to 'qa/pconf/cpu')
-rw-r--r-- | qa/pconf/cpu/GNUmakefile | 17 | ||||
-rw-r--r-- | qa/pconf/cpu/aggregate_util | 34 | ||||
-rw-r--r-- | qa/pconf/cpu/excess_fpe | 43 | ||||
-rw-r--r-- | qa/pconf/cpu/load_average | 29 | ||||
-rw-r--r-- | qa/pconf/cpu/single_util | 36 | ||||
-rw-r--r-- | qa/pconf/cpu/system_calls | 29 | ||||
-rw-r--r-- | qa/pconf/cpu/system_mode | 45 |
7 files changed, 233 insertions, 0 deletions
diff --git a/qa/pconf/cpu/GNUmakefile b/qa/pconf/cpu/GNUmakefile new file mode 100644 index 0000000..3588b68 --- /dev/null +++ b/qa/pconf/cpu/GNUmakefile @@ -0,0 +1,17 @@ +#!gmake + +TOPDIR = ../../.. +include $(TOPDIR)/src/include/builddefs + +TESTDIR = $(PCP_VAR_DIR)/testsuite/pconf/cpu +CONFIGS = aggregate_util excess_fpe load_average \ + single_util system_calls system_mode +LSRCFILES = $(CONFIGS) + +default default_pcp setup: + +install install_pcp: + $(INSTALL) -m 755 -d $(TESTDIR) + $(INSTALL) -m 644 $(CONFIGS) $(TESTDIR) + +include $(BUILDRULES) diff --git a/qa/pconf/cpu/aggregate_util b/qa/pconf/cpu/aggregate_util new file mode 100644 index 0000000..504264b --- /dev/null +++ b/qa/pconf/cpu/aggregate_util @@ -0,0 +1,34 @@ +#pmieconf-rules 1 +# + +rule cpu.aggregate_util + default = "$rule$" + predicate = +"some_host ( + kernel.all.cpu.user $hosts$ + + kernel.all.cpu.sys $hosts$ + + kernel.all.cpu.intr $hosts$ + > hinv.ncpu $hosts$ * $threshold$ / 100 +)" + enabled = no + version = 1 + help = +"The average processor utilization over all CPUs exceeded +threshold percent during the last sample interval."; + +string rule + default = "aggregate processor saturation" + modify = no + display = no; + +percent threshold + default = 90 + help = +"Threshold percentage for CPU saturation, in the range 0 (idle) +to 100 (completely busy), independent of the number of CPUs."; + +string action_expand + default = %h + display = no + modify = no; + diff --git a/qa/pconf/cpu/excess_fpe b/qa/pconf/cpu/excess_fpe new file mode 100644 index 0000000..fe166eb --- /dev/null +++ b/qa/pconf/cpu/excess_fpe @@ -0,0 +1,43 @@ +#pmieconf-rules 1 +# + +rule cpu.excess_fpe + default = "$rule$" + predicate = +"some_host ( + some_inst ( + kernel.percpu.cpu.sys $hosts$ > $systime_util$ / 100 && + kernel.percpu.syscall $hosts$ < $syscall_rate$ + ) +)" + enabled = no + version = 1 + help = +"This predicate attempts to detect processes generating very large +numbers of floating point exceptions (FPEs). Characteristic of this +situation is heavy system time coupled with low system call rates +(exceptions are delivered through the kernel to the process, taking +some system time, but no system call is serviced on the applications +behalf."; + +string rule + default = "possible high floating point exception rate" + modify = no + display = no; + +percent systime_util + default = 50 + help = +"Threshold percentage for kernel CPU utilization, in the range 0 +(idle) to 100 (completely busy)"; + +double syscall_rate + default = 100 + help = +"Threshold system call rate at which something is deemed amiss."; + +string action_expand + default = %i@%h + display = no + modify = no; + diff --git a/qa/pconf/cpu/load_average b/qa/pconf/cpu/load_average new file mode 100644 index 0000000..e5fa9bc --- /dev/null +++ b/qa/pconf/cpu/load_average @@ -0,0 +1,29 @@ +#pmieconf-rules 1 +# + +rule cpu.load_average + default = "$rule$" + predicate = +"some_host ( + kernel.all.load $hosts$ #'1 minute' + > hinv.ncpu $hosts$ * $threshold$ +)" + enabled = yes + version = 1 + help = +"The current 1-minute load average is higher than threshold times the +number of CPUs. +The load average measures the number of processes that are running, +runnable or soon to be runnable (i.e. in short term sleep)."; + +string rule + default = "high 1-minute load average" + modify = no + display = no; + +double threshold + default = 1.5 + help = +"The threshold multiplier for load per CPU, typically in the range +0.5 (very light load) to 4.0 (very heavy load )."; + diff --git a/qa/pconf/cpu/single_util b/qa/pconf/cpu/single_util new file mode 100644 index 0000000..3f5dc81 --- /dev/null +++ b/qa/pconf/cpu/single_util @@ -0,0 +1,36 @@ +#pmieconf-rules 1 +# + +rule cpu.single_util + default = "$rule$" + predicate = +"some_host ( + some_inst ( + kernel.percpu.cpu.user $hosts$ + + kernel.percpu.cpu.sys $hosts$ + + kernel.percpu.cpu.intr $hosts$ + > $threshold$ / 100 + ) +)" + enabled = no + version = 1 + help = +"The average processor utilization for at least one CPU exceeded +threshold percent during the last sample interval."; + +string rule + default = "single processor saturation" + modify = no + display = no; + +percent threshold + default = 95 + help = +"Threshold percentage for CPU saturation, in the range 0 (idle) +to 100 (completely busy)"; + +string action_expand + default = %i@%h + display = no + modify = no; + diff --git a/qa/pconf/cpu/system_calls b/qa/pconf/cpu/system_calls new file mode 100644 index 0000000..c2244e1 --- /dev/null +++ b/qa/pconf/cpu/system_calls @@ -0,0 +1,29 @@ +#pmieconf-rules 1 +# + +rule cpu.system_calls + default = "$rule$" + predicate = +"some_host ( + ( kernel.all.syscall $hosts$ / hinv.ncpu $hosts$ ) + > $threshold$ count/sec +)" + enabled = no + version = 1 + help = +"Average number of system calls per CPU per second exceeded threshold +over the past sample interval."; + +string rule + default = "high average system call rate per CPU" + modify = no + display = no; + +double threshold + default = 2500 + help = +"The threshold of system calls per second per CPU. The appropriate +value is a function of the processor type and the workload, but in +the range 500 (a few, expensive systems calls) to 5000 (many, +lightweight system calls) would be typical."; + diff --git a/qa/pconf/cpu/system_mode b/qa/pconf/cpu/system_mode new file mode 100644 index 0000000..bdecce7 --- /dev/null +++ b/qa/pconf/cpu/system_mode @@ -0,0 +1,45 @@ +#pmieconf-rules 1 +# + +rule cpu.system_mode + default = "$rule$" + predicate = +"some_host ( + ( kernel.all.cpu.user $hosts$ + + kernel.all.cpu.sys $hosts$ + > hinv.ncpu $hosts$ * $busy$ / 100 ) && + ( kernel.all.cpu.sys $hosts$ / + ( kernel.all.cpu.user $hosts$ + + kernel.all.cpu.sys $hosts$ ) + > $threshold$ / 100 ) +)" + enabled = yes + version = 1 + help = +"The average processor utilization over all CPUs was at least busy +percent during the last sample interval, and the ratio of system time +to busy time exceeded threshold percent."; + +string rule + default = "busy executing in system mode" + modify = no + display = no; + +percent busy + default = 75 + help = +"Busy percentage for average CPU utilization, in the range 0 (idle) +to 100 (completely busy), independent of the number of CPUs."; + +percent threshold + default = 80 + help = +"Threshold percentage for system time as a fraction of the non-idle +CPU time, in the range 0 (no system time) to 100 (all system time), +independent of the number of CPUs."; + +string action_expand + default = "%h" + display = no + modify = no; + |