diff options
Diffstat (limited to 'src/pmieconf/cpu')
-rw-r--r-- | src/pmieconf/cpu/context_switch | 59 | ||||
-rw-r--r-- | src/pmieconf/cpu/excess_fpe | 73 | ||||
-rw-r--r-- | src/pmieconf/cpu/load_average | 73 | ||||
-rw-r--r-- | src/pmieconf/cpu/localdefs | 49 | ||||
-rw-r--r-- | src/pmieconf/cpu/low_util | 69 | ||||
-rw-r--r-- | src/pmieconf/cpu/syscall | 69 | ||||
-rw-r--r-- | src/pmieconf/cpu/system | 73 | ||||
-rw-r--r-- | src/pmieconf/cpu/util | 62 |
8 files changed, 527 insertions, 0 deletions
diff --git a/src/pmieconf/cpu/context_switch b/src/pmieconf/cpu/context_switch new file mode 100644 index 0000000..fc2b8ac --- /dev/null +++ b/src/pmieconf/cpu/context_switch @@ -0,0 +1,59 @@ +#pmieconf-rules 1 +# --- DO NOT MODIFY THIS FILE --- see pmieconf(4) +# + +rule cpu.context_switch + summary = "$rule$" + predicate = +"some_host ( + kernel.all.pswitch $hosts$ + > hinv.ncpu $hosts$ * $threshold$ count/sec +)" + enabled = yes + version = 1 + help = +"Average number of context switches per CPU per second exceeded +threshold over the past sample interval."; + +string rule + default = "High aggregate context switch rate" + modify = no + display = no; + +double threshold + default = 4000 + help = +"The threshold number of process context switches per second."; + +string action_expand + default = %vctxsw/s@%h + display = no + modify = no; + +string email_expand + default = "host: %h aggregate context switches: %v/sec" + display = no + modify = no; + + +# Configuration info specific to non-PCP tools follows... +# + +# for SGI Embedded Support Partner integration: +string esp_type + default = "0x20005C" + display = no + modify = no; + +# for EnlightenDSM integration: +string enln_test + default = cpu.context_switch + display = no + modify = no; +string enln_units + default = ctxsw/s + display = no + modify = no; + +# +# --- DO NOT MODIFY THIS FILE --- see pmieconf(4) diff --git a/src/pmieconf/cpu/excess_fpe b/src/pmieconf/cpu/excess_fpe new file mode 100644 index 0000000..90b62dc --- /dev/null +++ b/src/pmieconf/cpu/excess_fpe @@ -0,0 +1,73 @@ +#pmieconf-rules 1 +# --- DO NOT MODIFY THIS FILE --- see pmieconf(4) +# + +rule cpu.excess_fpe + summary = "$rule$" + enumerate = hosts + predicate = +"some_host ( + some_inst ( + ( 100 * kernel.percpu.cpu.sys $hosts$ > $systime_util$ ) + && kernel.percpu.syscall $hosts$ < $syscall_rate$ + ) +)" + enabled = no + version = 1 + help = +"This predicate attempts to detect processes generating very large +numbers of floating point exceptions (FPEs). Characteristic of +this situation is heavy system time coupled with low system call +rates (exceptions are delivered through the kernel to the process, +taking some system time, but no system call is serviced on the +application's behalf)."; + +string rule + default = "Possible high floating point exception rate" + modify = no + display = no; + +percent systime_util + default = 50 + help = +"Threshold percentage for kernel CPU utilization, in the range 0 +(idle) to 100 (completely busy)"; + +double syscall_rate + default = 100 + help = +"Threshold system call rate (calls per second) below which something +is deemed amiss."; + +string action_expand + default = %v%sys[%i]@%h + display = no + modify = no; + +string email_expand + default = "host: %h CPU: %i system mode: %v% and low syscall rate" + display = no + modify = no; + + +# Configuration info specific to non-PCP tools follows... +# + +# for SGI Embedded Support Partner integration: +string esp_type + default = "0x200041" + display = no + modify = no; + +# for EnlightenDSM integration: +string enln_test + default = cpu.excess_fpe + display = no + modify = no; +string enln_units + default = %sys[%i] + display = no + modify = no; + +# +# --- DO NOT MODIFY THIS FILE --- see pmieconf(4) diff --git a/src/pmieconf/cpu/load_average b/src/pmieconf/cpu/load_average new file mode 100644 index 0000000..95b4c5d --- /dev/null +++ b/src/pmieconf/cpu/load_average @@ -0,0 +1,73 @@ +#pmieconf-rules 1 +# --- DO NOT MODIFY THIS FILE --- see pmieconf(4) +# + +rule cpu.load_average + summary = "$rule$" + predicate = +"some_host ( + // threshold scales with the number of CPUs (works better for + // large systems) and there is an absolute lower bound, + // especially for small systems + kernel.all.load $hosts$ #'1 minute' > hinv.ncpu $hosts$ * $per_cpu_load$ + && kernel.all.load $hosts$ #'1 minute' > $min_load$ +)" + enabled = yes + version = 1 + help = +"The current 1-minute load average is higher than the larger of +min_load and ( per_cpu_load times the number of CPUs ). +The load average measures the number of processes that are running, +runnable or soon to be runnable (i.e. in short term sleep)."; + +string rule + default = "High 1-minute load average" + modify = no + display = no; + +double per_cpu_load + default = 3 + help = +"The multiplier per CPU for the minimum load to make the rule true, +when expressed as a function of the number of CPUs. Typically in +the range 1.0 (very light load) to 8.0 (very heavy load )."; + +double min_load + default = 4 + help = +"The minimum load average before the rule is true. Most useful for +single-processor systems or where the desired threshold is +absolute, rather than a function of the number of CPUs."; + +string action_expand + default = %vload@%h + display = no + modify = no; + +string email_expand + default = "host: %h load average: %v" + display = no + modify = no; + + +# Configuration info specific to non-PCP tools follows... +# + +# for SGI Embedded Support Partner integration: +string esp_type + default = "0x200042" + display = no + modify = no; + +# for EnlightenDSM integration: +string enln_test + default = cpu.load_average + display = no + modify = no; +string enln_units + default = load + display = no + modify = no; + +# +# --- DO NOT MODIFY THIS FILE --- see pmieconf(4) diff --git a/src/pmieconf/cpu/localdefs b/src/pmieconf/cpu/localdefs new file mode 100644 index 0000000..dbe7c39 --- /dev/null +++ b/src/pmieconf/cpu/localdefs @@ -0,0 +1,49 @@ +ALL_RULES = util syscall context_switch system load_average \ + excess_fpe low_util + +LOCAL_RULES = $(ALL_RULES) + +# Metrics missing from Linux +# +# rule: excess_fpe +# kernel.percpu.syscall -12357 Unknown metric name +# +# rule: syscall +# kernel.all.syscall -12357 Unknown metric name +# +ifeq ($(TARGET_OS), linux) +LOCAL_RULES = util context_switch system load_average low_util +endif + +# Metrics missing from Mac OS X +# +# rule: util +# kernel.all.cpu.intr -12357 Unknown metric name +# +# rule: syscall +# kernel.all.syscall -12357 Unknown metric name +# +# rule: context_switch +# kernel.all.pswitch -12357 Unknown metric name +# +# rule: excess_fpe +# kernel.percpu.syscall -12357 Unknown metric name +# +# rule: low_util +# kernel.all.cpu.intr -12357 Unknown metric name +# +ifeq ($(TARGET_OS), darwin) +LOCAL_RULES = system load_average +endif + +# Metrics missing from Solaris +# +# rule: low_util +# kernel.all.cpu.intr -12357 Unknown metric name +# +# rule: util +# kernel.all.cpu.intr -12357 Unknown metric name +# +ifeq ($(TARGET_OS), solaris) +LOCAL_RULES = syscall context_switch system load_average excess_fpe +endif diff --git a/src/pmieconf/cpu/low_util b/src/pmieconf/cpu/low_util new file mode 100644 index 0000000..de6c0da --- /dev/null +++ b/src/pmieconf/cpu/low_util @@ -0,0 +1,69 @@ +#pmieconf-rules 1 +# --- DO NOT MODIFY THIS FILE --- see pmieconf(4) +# + +rule cpu.low_util + summary = "$rule$" + predicate = +"some_host ( + // kernel.all.cpu metrics count up to 1 second of CPU time + // per second per CPU + 100 * ( kernel.all.cpu.user $hosts$ + + kernel.all.cpu.sys $hosts$ + + kernel.all.cpu.intr $hosts$ ) / hinv.ncpu $hosts$ + < $threshold$ +)" + enabled = no + version = 1 + help = +"The average processor utilization over all CPUs was below threshold +percent during the last sample interval. +This rule is effectively the opposite of cpu.util and is disabled by +default - it is only useful in specialized environments where, for +example, processing is batch oriented and low processor utilization +is indicative of poor use of system resources. In such a situation +the cpu.low_util rule should be enabled, and cpu.util disabled."; + +string rule + default = "Low average processor utilization" + modify = no + display = no; + +percent threshold + default = 25 + help = +"Lower bound percentage for CPU utilization, in the range 0 (idle) +to 100 (completely busy), independent of the number of CPUs."; + +string action_expand + default = %v%util@%h + display = no + modify = no; + +string email_expand + default = "host: %h average CPU utilization: %v%" + display = no + modify = no; + + +# Configuration info specific to non-PCP tools follows... +# + +# for SGI Embedded Support Partner integration: +string esp_type + default = "0x20005E" + display = no + modify = no; + +# for EnlightenDSM integration: +string enln_test + default = cpu.low_util + display = no + modify = no; +string enln_units + default = %util + display = no + modify = no; + +# +# --- DO NOT MODIFY THIS FILE --- see pmieconf(4) diff --git a/src/pmieconf/cpu/syscall b/src/pmieconf/cpu/syscall new file mode 100644 index 0000000..4dcf074 --- /dev/null +++ b/src/pmieconf/cpu/syscall @@ -0,0 +1,69 @@ +#pmieconf-rules 1 +# --- DO NOT MODIFY THIS FILE --- see pmieconf(4) +# + +rule cpu.syscall + summary = "$rule$" + predicate = +"some_host ( + kernel.all.syscall $hosts$ + > hinv.ncpu $hosts$ * $threshold$ count/sec +)" + enabled = yes + version = 1 + help = +"Average number of system calls per CPU per second exceeded +threshold over the past sample interval."; + +string rule + default = "High aggregate system call rate" + modify = no + display = no; + +double threshold + default = 10000 + help = +"The threshold of system calls per second per CPU. The appropriate +value here is a function of the processor type and the workload, but +here are some indicative figures of sustained system call rates for a +single process: + getpid() - 380000 syscalls/sec + lseek() to start of file - 280000 syscalls/sec + gettimeofday() - 200000 syscalls/sec + read() at end of file - 83000 syscalls/sec + file creat() and close() - 65000 syscalls/sec + socket(), connect() and close() - 7000 syscalls/sec +(generated using an otherwise idle system with 180MHz R10000 processors)."; + +string action_expand + default = %vscall/s@%h + display = no + modify = no; + +string email_expand + default = "host: %h aggregate syscalls/sec: %v" + display = no + modify = no; + + +# Configuration info specific to non-PCP tools follows... +# + +# for SGI Embedded Support Partner integration: +string esp_type + default = "0x200043" + display = no + modify = no; + +# for EnlightenDSM integration: +string enln_test + default = cpu.syscall + display = no + modify = no; +string enln_units + default = scall/s + display = no + modify = no; + +# +# --- DO NOT MODIFY THIS FILE --- see pmieconf(4) diff --git a/src/pmieconf/cpu/system b/src/pmieconf/cpu/system new file mode 100644 index 0000000..04d61a5 --- /dev/null +++ b/src/pmieconf/cpu/system @@ -0,0 +1,73 @@ +#pmieconf-rules 1 +# --- DO NOT MODIFY THIS FILE --- see pmieconf(4) +# + +rule cpu.system + summary = "$rule$" + predicate = +"some_host ( + // first term is always true, but provides %v for actions ... + ( 100 * kernel.all.cpu.sys $hosts$ / hinv.ncpu $hosts$ ) > 0 + && 100 * ( kernel.all.cpu.user $hosts$ + kernel.all.cpu.sys $hosts$ ) + > $busy$ * hinv.ncpu $hosts$ + && 100 * kernel.all.cpu.sys $hosts$ / + ( kernel.all.cpu.user $hosts$ + kernel.all.cpu.sys $hosts$ ) + > $threshold$ +)" + enabled = yes + version = 1 + help = +"Over the last sample interval, the average utilization per CPU was +busy percent or more, and the ratio of system time to busy time +exceeded threshold percent."; + +string rule + default = "Busy executing in system mode" + modify = no + display = no; + +percent busy + default = 70 + help = +"Busy percentage for average CPU utilization, in the range 0 (idle) +to 100 (completely busy), independent of the number of CPUs."; + +percent threshold + default = 75 + help = +"Threshold percentage for system time as a fraction of the non-idle +CPU time, in the range 0 (no system time) to 100 (all system time), +independent of the number of CPUs."; + +string action_expand + default = %v%sys@%h + display = no + modify = no; + +string email_expand + default = "host: %h system mode: %v%" + display = no + modify = no; + + +# Configuration info specific to non-PCP tools follows... +# + +# for SGI Embedded Support Partner integration: +string esp_type + default = "0x200044" + display = no + modify = no; + +# for EnlightenDSM integration: +string enln_test + default = cpu.system + display = no + modify = no; +string enln_units + default = %sys + display = no + modify = no; + +# +# --- DO NOT MODIFY THIS FILE --- see pmieconf(4) diff --git a/src/pmieconf/cpu/util b/src/pmieconf/cpu/util new file mode 100644 index 0000000..be21b5c --- /dev/null +++ b/src/pmieconf/cpu/util @@ -0,0 +1,62 @@ +#pmieconf-rules 1 +# --- DO NOT MODIFY THIS FILE --- see pmieconf(4) +# + +rule cpu.util + summary = "$rule$" + predicate = +"some_host ( + 100 * ( kernel.all.cpu.user $hosts$ + + kernel.all.cpu.sys $hosts$ + + kernel.all.cpu.intr $hosts$ ) / hinv.ncpu $hosts$ + > $threshold$ +)" + enabled = yes + version = 1 + help = +"The average processor utilization over all CPUs exceeded threshold +percent during the last sample interval."; + +string rule + default = "High average processor utilization" + modify = no + display = no; + +percent threshold + default = 90 + help = +"Threshold percentage for CPU saturation, in the range 0 (idle) +to 100 (completely busy), independent of the number of CPUs."; + +string action_expand + default = %v%util@%h + display = no + modify = no; + +string email_expand + default = "host: %h average CPU utilization: %v%" + display = no + modify = no; + + +# Configuration info specific to non-PCP tools follows... +# + +# for SGI Embedded Support Partner integration: +string esp_type + default = "0x200045" + display = no + modify = no; + +# for EnlightenDSM integration: +string enln_test + default = cpu.util + display = no + modify = no; +string enln_units + default = %util + display = no + modify = no; + +# +# --- DO NOT MODIFY THIS FILE --- see pmieconf(4) |