summaryrefslogtreecommitdiff
path: root/src/pmdas/rsyslog/pmdarsyslog.pl
blob: e3972f914f1f04b015cba5a95714a670e52f9cdc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
#
# Copyright (c) 2012-2013 Red Hat.
# Copyright (c) 2011 Aconex.  All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#

use strict;
use warnings;
use PCP::PMDA;

my $pmda = PCP::PMDA->new('rsyslog', 107);
my $statsfile = pmda_config('PCP_LOG_DIR') . '/rsyslog/stats';
my ($es_connfail, $es_submits, $es_failed, $es_success) = (0,0,0,0);
my ($ux_submitted, $ux_discarded, $ux_ratelimiters) = (0,0,0);
my ($interval, $lasttime) = (0,0);

my $queue_indom = 0;
my @queue_insts = ();
use vars qw(%queue_ids %queue_values);

# .* rsyslogd-pstats:
# imuxsock: submitted=37 ratelimit.discarded=0 ratelimit.numratelimiters=22 
# elasticsearch: connfail=0 submits=0 failed=0 success=0 
# [main Q]: size=1 enqueued=1436 full=0 maxqsize=3 

sub rsyslog_parser
{
    ( undef, $_ ) = @_;

    #$pmda->log("rsyslog_parser got line: $_");
    if (m|rsyslogd-pstats:|) {
	my $timenow = time;
	if ($lasttime != 0) {
	    if ($timenow > $lasttime) {
		$interval = $timenow - $lasttime;
		$lasttime = $timenow;
	    }
	} else {
	    $lasttime = $timenow;
	}
    }
    if (m|imuxsock: submitted=(\d+) ratelimit.discarded=(\d+) ratelimit.numratelimiters=(\d+)|) {
	($ux_submitted, $ux_discarded, $ux_ratelimiters) = ($1,$2,$3);
    }
    elsif (m|elasticsearch: connfail=(\d+) submits=(\d+) failed=(\d+) success=(\d+)|) {
	($es_connfail, $es_submits, $es_failed, $es_success) = ($1,$2,$3,$4);
    }
    elsif (m|stats: (.+): size=(\d+) enqueued=(\d+) full=(\d+) maxqsize=(\d+)|) {
	my ($qname, $qid) = ($1, undef);

	if (!defined($queue_ids{$qname})) {
	    $qid = @queue_insts / 2;
	    $queue_ids{$qname} = $qid;
	    push @queue_insts, ($qid, $qname);
	    $pmda->replace_indom($queue_indom, \@queue_insts);
	}
	$queue_values{$qname} = [ $2, $3, $4, $5 ];
    }
}

sub rsyslog_fetch_callback
{
    my ($cluster, $item, $inst) = @_;

    #$pmda->log("rsyslog_fetch_callback for PMID: $cluster.$item ($inst)");

    return (PM_ERR_AGAIN,0) unless ($interval != 0);

    if ($cluster == 0) {
	return (PM_ERR_INST, 0) unless ($inst == PM_IN_NULL);
	if ($item == 0) { return ($interval, 1); }
	if ($item == 1) { return ($ux_submitted, 1); }
	if ($item == 2)	{ return ($ux_discarded, 1); }
	if ($item == 3)	{ return ($ux_ratelimiters, 1); }
	if ($item == 8)	{ return ($es_connfail, 1); }
	if ($item == 9)	{ return ($es_submits, 1); }
	if ($item == 10){ return ($es_failed, 1); }
	if ($item == 11){ return ($es_success, 1); }
    }
    elsif ($cluster == 1) {	# queues
	return (PM_ERR_INST, 0) unless ($inst != PM_IN_NULL);
	return (PM_ERR_INST, 0) unless ($inst <= @queue_insts);
	my $qname = $queue_insts[$inst * 2 + 1];
	my $qvref = $queue_values{$qname};
	my @qvals;

	return (PM_ERR_INST, 0) unless defined ($qvref);
	@qvals = @$qvref;

	if ($item == 0) { return ($qvals[0], 1); }
	if ($item == 1)	{ return ($qvals[1], 1); }
	if ($item == 2)	{ return ($qvals[2], 1); }
	if ($item == 3) { return ($qvals[3], 1); }
    }
    return (PM_ERR_PMID, 0);
}

die "Cannot find a valid rsyslog statistics named pipe\n" unless -p $statsfile;

$pmda->connect_pmcd;

$pmda->add_metric(pmda_pmid(0,0), PM_TYPE_U64, PM_INDOM_NULL, PM_SEM_INSTANT,
	pmda_units(0,1,0,0,PM_TIME_SEC,0), 'rsyslog.interval',
	'Time interval observed between samples', '');
$pmda->add_metric(pmda_pmid(0,1), PM_TYPE_U64, PM_INDOM_NULL, PM_SEM_COUNTER,
	pmda_units(0,0,1,0,0,PM_COUNT_ONE), 'rsyslog.imuxsock.submitted',
	'Cumulative count of unix domain socket input messages queued',
	"Cumulative count of messages successfully queued to the rsyslog\n" .
	"main message queueing core that arrived on unix domain sockets.");
$pmda->add_metric(pmda_pmid(0,2), PM_TYPE_U64, PM_INDOM_NULL, PM_SEM_COUNTER,
	pmda_units(0,0,1,0,0,PM_COUNT_ONE), 'rsyslog.imuxsock.discarded',
	'Count of unix domain socket messages discarded due to rate limiting',
	"Cumulative count of messages that are were discarded due to their\n" .
	"priority being at or below rate-limit-severity and their sending\n" .
	"process being deemed to be sending messages too quickly (refer to\n" .
	"parameters ratelimitburst, ratelimitinterval and ratelimitseverity");
$pmda->add_metric(pmda_pmid(0,3), PM_TYPE_U64, PM_INDOM_NULL, PM_SEM_COUNTER,
	pmda_units(0,0,0,0,0,0), 'rsyslog.imuxsock.numratelimiters',
	'Count of messages received that could be subject to rate limiting',
	"Cumulative count of messages that rsyslog received and performed a\n" .
	"credentials (PID) lookup for subsequent rate limiting decisions.\n" .
	"The message would have to be at rate-limit-severity or lower, with\n" .
	"rate limiting enabled, in order for this count to be incremented.");
$pmda->add_metric(pmda_pmid(0,8), PM_TYPE_U64, PM_INDOM_NULL, PM_SEM_COUNTER,
	pmda_units(0,0,1,0,0,PM_COUNT_ONE), 'rsyslog.elasticsearch.connfail',
	'Count of failed connections while attempting to send events', '');
$pmda->add_metric(pmda_pmid(0,9), PM_TYPE_U64, PM_INDOM_NULL, PM_SEM_COUNTER,
	pmda_units(0,0,1,0,0,PM_COUNT_ONE), 'rsyslog.elasticsearch.submits',
	'Count of valid submissions of events to elasticsearch indexer', '');
$pmda->add_metric(pmda_pmid(0,10), PM_TYPE_U64, PM_INDOM_NULL, PM_SEM_COUNTER,
	pmda_units(0,0,1,0,0,PM_COUNT_ONE), 'rsyslog.elasticsearch.failed',
	'Count of failed attempts to send events to elasticsearch',
	'This count is often a good indicator of malformed JSON messages');
$pmda->add_metric(pmda_pmid(0,11), PM_TYPE_U64, PM_INDOM_NULL, PM_SEM_COUNTER,
	pmda_units(0,0,1,0,0,PM_COUNT_ONE), 'rsyslog.elasticsearch.success',
	'Count of successfully acknowledged events from elasticsearch', '');

$pmda->add_metric(pmda_pmid(1,0), PM_TYPE_U64, $queue_indom, PM_SEM_INSTANT,
	pmda_units(0,0,0,0,0,0), 'rsyslog.queues.size',
	'Current queue depth for each rsyslog queue',
	"As messages arrive they are enqueued to the main message queue\n" .
	"(for example) -this counter is incremented for each such message.");
$pmda->add_metric(pmda_pmid(1,1), PM_TYPE_U64, $queue_indom, PM_SEM_COUNTER,
	pmda_units(0,0,1,0,0,PM_COUNT_ONE), 'rsyslog.queues.enqueued',
	'Cumulative count of nessages enqueued to individual queues',
	"As messages arrive they are added to the main message processing\n" .
	"queue, either individually or in batches in the case of messages\n" .
	"arriving on the network.");
$pmda->add_metric(pmda_pmid(1,2), PM_TYPE_U64, $queue_indom, PM_SEM_COUNTER,
	pmda_units(0,0,1,0,0,PM_COUNT_ONE), 'rsyslog.queues.full',
	'Cumulative count of message arrivals with a full queue',
	"When messages are enqueued, a check is first made to ensure the\n" .
	"queue is not full.  If it is, this counter is incremented.  The\n" .
	"full-queue-handling logic will wait for a configurable time for\n" .
	"the queue congestion to ease, failing which the message will be\n" .
	"discarded.  Worth keeping an eye on this metric, as it indicates\n" .
	"rsyslog is not able to process messages quickly enough given the\n" .
	"current arrival rate.");
$pmda->add_metric(pmda_pmid(1,3), PM_TYPE_U64, $queue_indom, PM_SEM_INSTANT,
	pmda_units(0,0,1,0,0,PM_COUNT_ONE), 'rsyslog.queues.maxsize',
	'Maximum depth reached by an individual queue',
	"When messages arrive (for example) they are enqueued to the main\n" .
	"message queue - if the queue length on arrival is now greater than\n" .
	"ever before observed, we set this value to the current queue size");

$pmda->add_indom($queue_indom, \@queue_insts,
	'Instance domain exporting each rsyslog queue', '');

$pmda->add_tail($statsfile, \&rsyslog_parser, 0);
$pmda->set_fetch_callback(\&rsyslog_fetch_callback);
$pmda->set_user('pcp');
$pmda->run;

=pod

=head1 NAME

pmdarsyslog - rsyslog (reliable and extended syslog) PMDA

=head1 DESCRIPTION

B<pmdarsyslog> is a Performance Metrics Domain Agent (PMDA) which
exports metric values from the rsyslogd(8) server.
Further details about rsyslog can be found at http://www.rsyslog.com/.

=head1 INSTALLATION

If you want access to the names and values for the rsyslog performance
metrics, do the following as root:

	# cd $PCP_PMDAS_DIR/rsyslog
	# ./Install

If you want to undo the installation, do the following as root:

	# cd $PCP_PMDAS_DIR/rsyslog
	# ./Remove

B<pmdarsyslog> is launched by pmcd(1) and should never be executed
directly.  The Install and Remove scripts notify pmcd(1) when
the agent is installed or removed.

In order to use this agent, rsyslog stats gathering must be enabled.
This is done by adding the lines:

	$ModLoad impstats
	$PStatsInterval 5	# log every 5 seconds
	syslog.info		|/var/log/pcp/rsyslog/stats

to your rsyslog.conf(5) configuration file after installing the PMDA.
Take care to ensure the syslog.info messages do not get logged in any
other file, as this could unexpectedly fill your filesystem.  Syntax
useful for this is syslog.!=info for explicitly excluding these.

=head1 FILES

=over

=item /var/log/pcp/rsyslog/stats

named pipe containing statistics exported from rsyslog,
usually created by the PMDA Install script.

=item $PCP_PMDAS_DIR/rsyslog/Install

installation script for the B<pmdarsyslog> agent

=item $PCP_PMDAS_DIR/rsyslog/Remove

undo installation script for the B<pmdarsyslog> agent

=item $PCP_LOG_DIR/pmcd/rsyslog.log

default log file for error messages from B<pmdarsyslog>

=back

=head1 SEE ALSO

pmcd(1), rsyslog.conf(5), rsyslogd(8).