summaryrefslogtreecommitdiff
path: root/src/pmieconf/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'src/pmieconf/cpu')
-rw-r--r--src/pmieconf/cpu/context_switch59
-rw-r--r--src/pmieconf/cpu/excess_fpe73
-rw-r--r--src/pmieconf/cpu/load_average73
-rw-r--r--src/pmieconf/cpu/localdefs49
-rw-r--r--src/pmieconf/cpu/low_util69
-rw-r--r--src/pmieconf/cpu/syscall69
-rw-r--r--src/pmieconf/cpu/system73
-rw-r--r--src/pmieconf/cpu/util62
8 files changed, 527 insertions, 0 deletions
diff --git a/src/pmieconf/cpu/context_switch b/src/pmieconf/cpu/context_switch
new file mode 100644
index 0000000..fc2b8ac
--- /dev/null
+++ b/src/pmieconf/cpu/context_switch
@@ -0,0 +1,59 @@
+#pmieconf-rules 1
+# --- DO NOT MODIFY THIS FILE --- see pmieconf(4)
+#
+
+rule cpu.context_switch
+ summary = "$rule$"
+ predicate =
+"some_host (
+ kernel.all.pswitch $hosts$
+ > hinv.ncpu $hosts$ * $threshold$ count/sec
+)"
+ enabled = yes
+ version = 1
+ help =
+"Average number of context switches per CPU per second exceeded
+threshold over the past sample interval.";
+
+string rule
+ default = "High aggregate context switch rate"
+ modify = no
+ display = no;
+
+double threshold
+ default = 4000
+ help =
+"The threshold number of process context switches per second.";
+
+string action_expand
+ default = %vctxsw/s@%h
+ display = no
+ modify = no;
+
+string email_expand
+ default = "host: %h aggregate context switches: %v/sec"
+ display = no
+ modify = no;
+
+
+# Configuration info specific to non-PCP tools follows...
+#
+
+# for SGI Embedded Support Partner integration:
+string esp_type
+ default = "0x20005C"
+ display = no
+ modify = no;
+
+# for EnlightenDSM integration:
+string enln_test
+ default = cpu.context_switch
+ display = no
+ modify = no;
+string enln_units
+ default = ctxsw/s
+ display = no
+ modify = no;
+
+#
+# --- DO NOT MODIFY THIS FILE --- see pmieconf(4)
diff --git a/src/pmieconf/cpu/excess_fpe b/src/pmieconf/cpu/excess_fpe
new file mode 100644
index 0000000..90b62dc
--- /dev/null
+++ b/src/pmieconf/cpu/excess_fpe
@@ -0,0 +1,73 @@
+#pmieconf-rules 1
+# --- DO NOT MODIFY THIS FILE --- see pmieconf(4)
+#
+
+rule cpu.excess_fpe
+ summary = "$rule$"
+ enumerate = hosts
+ predicate =
+"some_host (
+ some_inst (
+ ( 100 * kernel.percpu.cpu.sys $hosts$ > $systime_util$ )
+ && kernel.percpu.syscall $hosts$ < $syscall_rate$
+ )
+)"
+ enabled = no
+ version = 1
+ help =
+"This predicate attempts to detect processes generating very large
+numbers of floating point exceptions (FPEs). Characteristic of
+this situation is heavy system time coupled with low system call
+rates (exceptions are delivered through the kernel to the process,
+taking some system time, but no system call is serviced on the
+application's behalf).";
+
+string rule
+ default = "Possible high floating point exception rate"
+ modify = no
+ display = no;
+
+percent systime_util
+ default = 50
+ help =
+"Threshold percentage for kernel CPU utilization, in the range 0
+(idle) to 100 (completely busy)";
+
+double syscall_rate
+ default = 100
+ help =
+"Threshold system call rate (calls per second) below which something
+is deemed amiss.";
+
+string action_expand
+ default = %v%sys[%i]@%h
+ display = no
+ modify = no;
+
+string email_expand
+ default = "host: %h CPU: %i system mode: %v% and low syscall rate"
+ display = no
+ modify = no;
+
+
+# Configuration info specific to non-PCP tools follows...
+#
+
+# for SGI Embedded Support Partner integration:
+string esp_type
+ default = "0x200041"
+ display = no
+ modify = no;
+
+# for EnlightenDSM integration:
+string enln_test
+ default = cpu.excess_fpe
+ display = no
+ modify = no;
+string enln_units
+ default = %sys[%i]
+ display = no
+ modify = no;
+
+#
+# --- DO NOT MODIFY THIS FILE --- see pmieconf(4)
diff --git a/src/pmieconf/cpu/load_average b/src/pmieconf/cpu/load_average
new file mode 100644
index 0000000..95b4c5d
--- /dev/null
+++ b/src/pmieconf/cpu/load_average
@@ -0,0 +1,73 @@
+#pmieconf-rules 1
+# --- DO NOT MODIFY THIS FILE --- see pmieconf(4)
+#
+
+rule cpu.load_average
+ summary = "$rule$"
+ predicate =
+"some_host (
+ // threshold scales with the number of CPUs (works better for
+ // large systems) and there is an absolute lower bound,
+ // especially for small systems
+ kernel.all.load $hosts$ #'1 minute' > hinv.ncpu $hosts$ * $per_cpu_load$
+ && kernel.all.load $hosts$ #'1 minute' > $min_load$
+)"
+ enabled = yes
+ version = 1
+ help =
+"The current 1-minute load average is higher than the larger of
+min_load and ( per_cpu_load times the number of CPUs ).
+The load average measures the number of processes that are running,
+runnable or soon to be runnable (i.e. in short term sleep).";
+
+string rule
+ default = "High 1-minute load average"
+ modify = no
+ display = no;
+
+double per_cpu_load
+ default = 3
+ help =
+"The multiplier per CPU for the minimum load to make the rule true,
+when expressed as a function of the number of CPUs. Typically in
+the range 1.0 (very light load) to 8.0 (very heavy load ).";
+
+double min_load
+ default = 4
+ help =
+"The minimum load average before the rule is true. Most useful for
+single-processor systems or where the desired threshold is
+absolute, rather than a function of the number of CPUs.";
+
+string action_expand
+ default = %vload@%h
+ display = no
+ modify = no;
+
+string email_expand
+ default = "host: %h load average: %v"
+ display = no
+ modify = no;
+
+
+# Configuration info specific to non-PCP tools follows...
+#
+
+# for SGI Embedded Support Partner integration:
+string esp_type
+ default = "0x200042"
+ display = no
+ modify = no;
+
+# for EnlightenDSM integration:
+string enln_test
+ default = cpu.load_average
+ display = no
+ modify = no;
+string enln_units
+ default = load
+ display = no
+ modify = no;
+
+#
+# --- DO NOT MODIFY THIS FILE --- see pmieconf(4)
diff --git a/src/pmieconf/cpu/localdefs b/src/pmieconf/cpu/localdefs
new file mode 100644
index 0000000..dbe7c39
--- /dev/null
+++ b/src/pmieconf/cpu/localdefs
@@ -0,0 +1,49 @@
+ALL_RULES = util syscall context_switch system load_average \
+ excess_fpe low_util
+
+LOCAL_RULES = $(ALL_RULES)
+
+# Metrics missing from Linux
+#
+# rule: excess_fpe
+# kernel.percpu.syscall -12357 Unknown metric name
+#
+# rule: syscall
+# kernel.all.syscall -12357 Unknown metric name
+#
+ifeq ($(TARGET_OS), linux)
+LOCAL_RULES = util context_switch system load_average low_util
+endif
+
+# Metrics missing from Mac OS X
+#
+# rule: util
+# kernel.all.cpu.intr -12357 Unknown metric name
+#
+# rule: syscall
+# kernel.all.syscall -12357 Unknown metric name
+#
+# rule: context_switch
+# kernel.all.pswitch -12357 Unknown metric name
+#
+# rule: excess_fpe
+# kernel.percpu.syscall -12357 Unknown metric name
+#
+# rule: low_util
+# kernel.all.cpu.intr -12357 Unknown metric name
+#
+ifeq ($(TARGET_OS), darwin)
+LOCAL_RULES = system load_average
+endif
+
+# Metrics missing from Solaris
+#
+# rule: low_util
+# kernel.all.cpu.intr -12357 Unknown metric name
+#
+# rule: util
+# kernel.all.cpu.intr -12357 Unknown metric name
+#
+ifeq ($(TARGET_OS), solaris)
+LOCAL_RULES = syscall context_switch system load_average excess_fpe
+endif
diff --git a/src/pmieconf/cpu/low_util b/src/pmieconf/cpu/low_util
new file mode 100644
index 0000000..de6c0da
--- /dev/null
+++ b/src/pmieconf/cpu/low_util
@@ -0,0 +1,69 @@
+#pmieconf-rules 1
+# --- DO NOT MODIFY THIS FILE --- see pmieconf(4)
+#
+
+rule cpu.low_util
+ summary = "$rule$"
+ predicate =
+"some_host (
+ // kernel.all.cpu metrics count up to 1 second of CPU time
+ // per second per CPU
+ 100 * ( kernel.all.cpu.user $hosts$ +
+ kernel.all.cpu.sys $hosts$ +
+ kernel.all.cpu.intr $hosts$ ) / hinv.ncpu $hosts$
+ < $threshold$
+)"
+ enabled = no
+ version = 1
+ help =
+"The average processor utilization over all CPUs was below threshold
+percent during the last sample interval.
+This rule is effectively the opposite of cpu.util and is disabled by
+default - it is only useful in specialized environments where, for
+example, processing is batch oriented and low processor utilization
+is indicative of poor use of system resources. In such a situation
+the cpu.low_util rule should be enabled, and cpu.util disabled.";
+
+string rule
+ default = "Low average processor utilization"
+ modify = no
+ display = no;
+
+percent threshold
+ default = 25
+ help =
+"Lower bound percentage for CPU utilization, in the range 0 (idle)
+to 100 (completely busy), independent of the number of CPUs.";
+
+string action_expand
+ default = %v%util@%h
+ display = no
+ modify = no;
+
+string email_expand
+ default = "host: %h average CPU utilization: %v%"
+ display = no
+ modify = no;
+
+
+# Configuration info specific to non-PCP tools follows...
+#
+
+# for SGI Embedded Support Partner integration:
+string esp_type
+ default = "0x20005E"
+ display = no
+ modify = no;
+
+# for EnlightenDSM integration:
+string enln_test
+ default = cpu.low_util
+ display = no
+ modify = no;
+string enln_units
+ default = %util
+ display = no
+ modify = no;
+
+#
+# --- DO NOT MODIFY THIS FILE --- see pmieconf(4)
diff --git a/src/pmieconf/cpu/syscall b/src/pmieconf/cpu/syscall
new file mode 100644
index 0000000..4dcf074
--- /dev/null
+++ b/src/pmieconf/cpu/syscall
@@ -0,0 +1,69 @@
+#pmieconf-rules 1
+# --- DO NOT MODIFY THIS FILE --- see pmieconf(4)
+#
+
+rule cpu.syscall
+ summary = "$rule$"
+ predicate =
+"some_host (
+ kernel.all.syscall $hosts$
+ > hinv.ncpu $hosts$ * $threshold$ count/sec
+)"
+ enabled = yes
+ version = 1
+ help =
+"Average number of system calls per CPU per second exceeded
+threshold over the past sample interval.";
+
+string rule
+ default = "High aggregate system call rate"
+ modify = no
+ display = no;
+
+double threshold
+ default = 10000
+ help =
+"The threshold of system calls per second per CPU. The appropriate
+value here is a function of the processor type and the workload, but
+here are some indicative figures of sustained system call rates for a
+single process:
+ getpid() - 380000 syscalls/sec
+ lseek() to start of file - 280000 syscalls/sec
+ gettimeofday() - 200000 syscalls/sec
+ read() at end of file - 83000 syscalls/sec
+ file creat() and close() - 65000 syscalls/sec
+ socket(), connect() and close() - 7000 syscalls/sec
+(generated using an otherwise idle system with 180MHz R10000 processors).";
+
+string action_expand
+ default = %vscall/s@%h
+ display = no
+ modify = no;
+
+string email_expand
+ default = "host: %h aggregate syscalls/sec: %v"
+ display = no
+ modify = no;
+
+
+# Configuration info specific to non-PCP tools follows...
+#
+
+# for SGI Embedded Support Partner integration:
+string esp_type
+ default = "0x200043"
+ display = no
+ modify = no;
+
+# for EnlightenDSM integration:
+string enln_test
+ default = cpu.syscall
+ display = no
+ modify = no;
+string enln_units
+ default = scall/s
+ display = no
+ modify = no;
+
+#
+# --- DO NOT MODIFY THIS FILE --- see pmieconf(4)
diff --git a/src/pmieconf/cpu/system b/src/pmieconf/cpu/system
new file mode 100644
index 0000000..04d61a5
--- /dev/null
+++ b/src/pmieconf/cpu/system
@@ -0,0 +1,73 @@
+#pmieconf-rules 1
+# --- DO NOT MODIFY THIS FILE --- see pmieconf(4)
+#
+
+rule cpu.system
+ summary = "$rule$"
+ predicate =
+"some_host (
+ // first term is always true, but provides %v for actions ...
+ ( 100 * kernel.all.cpu.sys $hosts$ / hinv.ncpu $hosts$ ) > 0
+ && 100 * ( kernel.all.cpu.user $hosts$ + kernel.all.cpu.sys $hosts$ )
+ > $busy$ * hinv.ncpu $hosts$
+ && 100 * kernel.all.cpu.sys $hosts$ /
+ ( kernel.all.cpu.user $hosts$ + kernel.all.cpu.sys $hosts$ )
+ > $threshold$
+)"
+ enabled = yes
+ version = 1
+ help =
+"Over the last sample interval, the average utilization per CPU was
+busy percent or more, and the ratio of system time to busy time
+exceeded threshold percent.";
+
+string rule
+ default = "Busy executing in system mode"
+ modify = no
+ display = no;
+
+percent busy
+ default = 70
+ help =
+"Busy percentage for average CPU utilization, in the range 0 (idle)
+to 100 (completely busy), independent of the number of CPUs.";
+
+percent threshold
+ default = 75
+ help =
+"Threshold percentage for system time as a fraction of the non-idle
+CPU time, in the range 0 (no system time) to 100 (all system time),
+independent of the number of CPUs.";
+
+string action_expand
+ default = %v%sys@%h
+ display = no
+ modify = no;
+
+string email_expand
+ default = "host: %h system mode: %v%"
+ display = no
+ modify = no;
+
+
+# Configuration info specific to non-PCP tools follows...
+#
+
+# for SGI Embedded Support Partner integration:
+string esp_type
+ default = "0x200044"
+ display = no
+ modify = no;
+
+# for EnlightenDSM integration:
+string enln_test
+ default = cpu.system
+ display = no
+ modify = no;
+string enln_units
+ default = %sys
+ display = no
+ modify = no;
+
+#
+# --- DO NOT MODIFY THIS FILE --- see pmieconf(4)
diff --git a/src/pmieconf/cpu/util b/src/pmieconf/cpu/util
new file mode 100644
index 0000000..be21b5c
--- /dev/null
+++ b/src/pmieconf/cpu/util
@@ -0,0 +1,62 @@
+#pmieconf-rules 1
+# --- DO NOT MODIFY THIS FILE --- see pmieconf(4)
+#
+
+rule cpu.util
+ summary = "$rule$"
+ predicate =
+"some_host (
+ 100 * ( kernel.all.cpu.user $hosts$ +
+ kernel.all.cpu.sys $hosts$ +
+ kernel.all.cpu.intr $hosts$ ) / hinv.ncpu $hosts$
+ > $threshold$
+)"
+ enabled = yes
+ version = 1
+ help =
+"The average processor utilization over all CPUs exceeded threshold
+percent during the last sample interval.";
+
+string rule
+ default = "High average processor utilization"
+ modify = no
+ display = no;
+
+percent threshold
+ default = 90
+ help =
+"Threshold percentage for CPU saturation, in the range 0 (idle)
+to 100 (completely busy), independent of the number of CPUs.";
+
+string action_expand
+ default = %v%util@%h
+ display = no
+ modify = no;
+
+string email_expand
+ default = "host: %h average CPU utilization: %v%"
+ display = no
+ modify = no;
+
+
+# Configuration info specific to non-PCP tools follows...
+#
+
+# for SGI Embedded Support Partner integration:
+string esp_type
+ default = "0x200045"
+ display = no
+ modify = no;
+
+# for EnlightenDSM integration:
+string enln_test
+ default = cpu.util
+ display = no
+ modify = no;
+string enln_units
+ default = %util
+ display = no
+ modify = no;
+
+#
+# --- DO NOT MODIFY THIS FILE --- see pmieconf(4)