summaryrefslogtreecommitdiff
path: root/src/pmmgr
diff options
context:
space:
mode:
Diffstat (limited to 'src/pmmgr')
-rw-r--r--src/pmmgr/GNUmakefile63
-rw-r--r--src/pmmgr/TODO12
-rw-r--r--src/pmmgr/config/GNUmakefile40
-rw-r--r--src/pmmgr/config/README13
-rw-r--r--src/pmmgr/config/pmie0
-rw-r--r--src/pmmgr/config/pmieconf0
-rw-r--r--src/pmmgr/config/pmlogconf0
-rw-r--r--src/pmmgr/config/pmlogger0
-rw-r--r--src/pmmgr/config/pmlogmerge0
-rw-r--r--src/pmmgr/config/pmlogmerge-granular0
-rw-r--r--src/pmmgr/config/pmlogmerge-rewrite0
-rw-r--r--src/pmmgr/config/target-discovery.example-avahi1
-rw-r--r--src/pmmgr/pmmgr.cxx1285
-rw-r--r--src/pmmgr/pmmgr.h133
-rw-r--r--src/pmmgr/pmmgr.options27
-rw-r--r--src/pmmgr/pmmgr.service.in14
-rw-r--r--src/pmmgr/rc_pmmgr296
17 files changed, 1884 insertions, 0 deletions
diff --git a/src/pmmgr/GNUmakefile b/src/pmmgr/GNUmakefile
new file mode 100644
index 0000000..2c173aa
--- /dev/null
+++ b/src/pmmgr/GNUmakefile
@@ -0,0 +1,63 @@
+#
+# Copyright (c) 2013-2014 Red Hat.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+
+TOPDIR = ../..
+include $(TOPDIR)/src/include/builddefs
+
+SUBDIRS = config
+CXXMDTARGET = pmmgr$(EXECSUFFIX)
+HFILES = pmmgr.h
+CXXFILES = pmmgr.cxx
+LLDLIBS = $(PCPLIB) $(LIB_FOR_PTHREADS) $(LIB_FOR_ATOMIC)
+LLDFLAGS += $(RDYNAMIC_FLAG) $(PIELDFLAGS)
+LCFLAGS += $(PIECFLAGS)
+LDIRT = *.log pmmgr.service
+
+default: build-me
+
+ifeq ($(BUILD_PMMGR),yes)
+build-me: $(SUBDIRS) $(CXXMDTARGET) pmmgr.service
+ $(SUBDIRS_MAKERULE)
+
+pmmgr.service: pmmgr.service.in
+ $(SED) -e 's;@path@;'$(PCP_RC_DIR)';' $< > $@
+
+install: $(SUBDIRS) $(CXXMDTARGET)
+ $(SUBDIRS_MAKERULE)
+ $(INSTALL) -m 755 -d `dirname $(PCP_PMMGROPTIONS_PATH)`
+ $(INSTALL) -m 644 pmmgr.options $(PCP_PMMGROPTIONS_PATH)
+ $(INSTALL) -m 755 rc_pmmgr $(PCP_RC_DIR)/pmmgr
+ifeq ($(ENABLE_SYSTEMD),true)
+ $(INSTALL) -m 644 pmmgr.service $(PCP_SYSTEMDUNIT_DIR)/pmmgr.service
+endif
+ $(INSTALL) -m 755 $(CXXMDTARGET) $(PCP_BINADM_DIR)/$(CXXMDTARGET)
+ $(INSTALL) -m 775 -o $(PCP_USER) -g $(PCP_GROUP) -d $(PCP_LOG_DIR)/pmmgr
+else
+build-me:
+ @echo not building pmmgr
+install:
+ @echo not installing pmmgr
+endif
+
+$(OBJECTS): $(HFILES)
+
+include $(BUILDRULES)
+
+default_pcp : default
+
+install_pcp : install
+
+# Hey, does anyone have a spare vowel?
+
+.PHONY: build-me
diff --git a/src/pmmgr/TODO b/src/pmmgr/TODO
new file mode 100644
index 0000000..b551687
--- /dev/null
+++ b/src/pmmgr/TODO
@@ -0,0 +1,12 @@
+- pmmgr.1 EXAMPLE CONFIGURATIONS
+- optionally delay pm*conf
+- pmlogreduce
+- log aging in background while new pmlogger's already running
+- old log compression (until we get libpcp zlib or something)
+- email error reporting?
+- qa
+- port to mingw?
+- port to cygwin?
+- pmlogger/pmie .log rotation
+- pid->pid_t cleanup
+
diff --git a/src/pmmgr/config/GNUmakefile b/src/pmmgr/config/GNUmakefile
new file mode 100644
index 0000000..0ecdf16
--- /dev/null
+++ b/src/pmmgr/config/GNUmakefile
@@ -0,0 +1,40 @@
+#!gmake
+#
+# Copyright (c) 2013-2014 Red Hat.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+
+LLDIRT =
+
+TOPDIR = ../../..
+include $(TOPDIR)/src/include/builddefs
+
+LSRCFILES = pmie pmieconf pmlogconf pmlogger pmlogmerge pmlogmerge-rewrite pmlogmerge-granular \
+ README target-discovery.example-avahi
+
+PMMGR_SYSCONF_DIR=$(PCP_SYSCONF_DIR)/pmmgr
+
+default:
+
+build-me:
+
+include $(BUILDRULES)
+
+install:
+ $(INSTALL) -m 755 -d $(PMMGR_SYSCONF_DIR)
+ for file in $(LSRCFILES); do \
+ $(INSTALL) -m 644 $$file $(PMMGR_SYSCONF_DIR)/$$file; \
+ done
+
+default_pcp : default
+
+install_pcp : install
diff --git a/src/pmmgr/config/README b/src/pmmgr/config/README
new file mode 100644
index 0000000..d767635
--- /dev/null
+++ b/src/pmmgr/config/README
@@ -0,0 +1,13 @@
+This is the default built-in configuration directory for pmmgr.
+See man pmmgr(1) for details of the individual files that are
+read to extract configuration lines. Other files in this directory
+are for documentation / examples.
+
+The default built-in configuration items for pmmgr are almost sufficient
+to act as a system's primary pmlogger and pmie. The exceptions are the
+actual enablement of pmlogger & pmie, and their auto-generated configuration,
+which are enabled by some empty files here.
+
+Additional possible but non-default configurations are given in .example*
+files here. Rename them to drop the .example* filename extension to
+activate them.
diff --git a/src/pmmgr/config/pmie b/src/pmmgr/config/pmie
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/pmmgr/config/pmie
diff --git a/src/pmmgr/config/pmieconf b/src/pmmgr/config/pmieconf
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/pmmgr/config/pmieconf
diff --git a/src/pmmgr/config/pmlogconf b/src/pmmgr/config/pmlogconf
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/pmmgr/config/pmlogconf
diff --git a/src/pmmgr/config/pmlogger b/src/pmmgr/config/pmlogger
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/pmmgr/config/pmlogger
diff --git a/src/pmmgr/config/pmlogmerge b/src/pmmgr/config/pmlogmerge
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/pmmgr/config/pmlogmerge
diff --git a/src/pmmgr/config/pmlogmerge-granular b/src/pmmgr/config/pmlogmerge-granular
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/pmmgr/config/pmlogmerge-granular
diff --git a/src/pmmgr/config/pmlogmerge-rewrite b/src/pmmgr/config/pmlogmerge-rewrite
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/pmmgr/config/pmlogmerge-rewrite
diff --git a/src/pmmgr/config/target-discovery.example-avahi b/src/pmmgr/config/target-discovery.example-avahi
new file mode 100644
index 0000000..c3d9d94
--- /dev/null
+++ b/src/pmmgr/config/target-discovery.example-avahi
@@ -0,0 +1 @@
+avahi,timeout=2.5
diff --git a/src/pmmgr/pmmgr.cxx b/src/pmmgr/pmmgr.cxx
new file mode 100644
index 0000000..5ff210f
--- /dev/null
+++ b/src/pmmgr/pmmgr.cxx
@@ -0,0 +1,1285 @@
+/*
+ * Copyright (c) 2013-2014 Red Hat.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _XOPEN_SOURCE
+#define _XOPEN_SOURCE 600
+#endif
+#include "pmmgr.h"
+#include "impl.h"
+
+#include <sys/stat.h>
+#include <cassert>
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+
+extern "C" {
+#include <fcntl.h>
+#include <unistd.h>
+#include <glob.h>
+#include <sys/wait.h>
+#ifdef HAVE_PTHREAD_H
+#include <pthread.h>
+#endif
+#ifdef IS_LINUX
+#include <sys/syscall.h>
+#endif
+}
+
+
+using namespace std;
+
+// ------------------------------------------------------------------------
+
+
+int quit;
+int polltime = 60;
+
+
+// ------------------------------------------------------------------------
+
+
+// Create a string that is safe to pass to system(3), i.e., sh -c,
+// by quoting metacharacters. This transform generally should be
+// applied only once.
+string
+sh_quote(const string& input)
+{
+ string output;
+ for (unsigned i=0; i<input.length(); i++)
+ {
+ char c = input[i];
+ if ((ispunct(c) || isspace(c)) && // quite aggressive
+ (c != ':' && c != '.' && c != '_' && c != '/' && c != '-')) // safe & popular punctuation
+ output += '\\';
+ output += c;
+ }
+
+ return output;
+}
+
+
+// Print a string to cout/cerr progress reports, similar to the
+// stuff produced by __pmNotifyErr
+ostream&
+timestamp(ostream &o)
+{
+ time_t now;
+ time (&now);
+ char *now2 = ctime (&now);
+ if (now2)
+ now2[19] = '\0'; // overwrite \n
+
+ return o << "[" << (now2 ? now2 : "") << "] " << pmProgname << "("
+ << getpid()
+#ifdef HAVE_PTHREAD_H
+#ifdef IS_LINUX
+ << "/" << syscall(SYS_gettid)
+#else
+ << "/" << pthread_self()
+#endif
+#endif
+ << "): ";
+}
+
+
+extern "C" void *
+pmmgr_daemon_poll_thread (void* a)
+{
+ pmmgr_daemon* d = (pmmgr_daemon*) a;
+ d->poll();
+ return 0;
+}
+
+
+// A wrapper for something like system(3), but responding quicker to
+// interrupts and standardizing tracing.
+int
+pmmgr_configurable::wrap_system(const std::string& cmd)
+{
+ if (pmDebug & DBG_TRACE_APPL0)
+ timestamp(cout) << "running " << cmd << endl;
+
+ int pid = fork();
+ if (pid == 0)
+ {
+ // child
+ int rc = execl ("/bin/sh", "sh", "-c", cmd.c_str(), NULL);
+ timestamp(cerr) << "failed to execl sh -c " << cmd << " rc=" << rc << endl;
+ _exit (1);
+ }
+ else if (pid < 0)
+ {
+ // error
+ timestamp(cerr) << "fork for " << cmd << " failed: errno=" << errno << endl;
+ return -1;
+ }
+ else
+ {
+ // parent
+ int status = -1;
+ int rc;
+ //timestamp(cout) << "waiting for pid=" << pid << endl;
+
+ do { rc = waitpid(pid, &status, 0); } while (!quit && rc == -1 && errno == EINTR); // TEMP_FAILURE_RETRY
+ if (quit)
+ {
+ // timestamp(cout) << "killing pid=" << pid << endl;
+ kill (pid, SIGTERM); // just to be on the safe side
+ // it might linger a few seconds in zombie mode
+ }
+
+ //timestamp(cout) << "done status=" << status << endl;
+ if (status != 0)
+ timestamp(cerr) << "system(" << cmd << ") failed: rc=" << status << endl;
+ return status;
+ }
+}
+
+
+
+// ------------------------------------------------------------------------
+
+
+pmmgr_configurable::pmmgr_configurable(const string& dir):
+ config_directory(dir)
+{
+}
+
+
+vector<string>
+pmmgr_configurable::get_config_multi(const string& file) const
+{
+ vector<string> lines;
+
+ string complete_filename = config_directory + (char)__pmPathSeparator() + file;
+ ifstream f (complete_filename.c_str());
+ while (f.good()) {
+ string line;
+ getline(f, line);
+ if (! f.good())
+ break;
+ if (line != "")
+ lines.push_back(line);
+ }
+
+ return lines;
+}
+
+
+bool
+pmmgr_configurable::get_config_exists(const string& file) const
+{
+ string complete_filename = config_directory + (char)__pmPathSeparator() + file;
+ ifstream f (complete_filename.c_str());
+ return (f.good());
+}
+
+
+string
+pmmgr_configurable::get_config_single(const string& file) const
+{
+ vector<string> lines = get_config_multi (file);
+ if (lines.size() == 1)
+ return lines[0];
+ else
+ return "";
+}
+
+ostream&
+pmmgr_configurable::timestamp(ostream& o)
+{
+ return ::timestamp(o) << config_directory << ": ";
+}
+
+
+
+// ------------------------------------------------------------------------
+
+
+pmMetricSpec*
+pmmgr_job_spec::parse_metric_spec (const string& spec)
+{
+ if (parsed_metric_cache.find(spec) != parsed_metric_cache.end())
+ return parsed_metric_cache[spec];
+
+ const char* specstr = spec.c_str();
+ pmMetricSpec* pms = 0;
+ char *errmsg;
+ char dummy_host[] = "";
+ int rc = pmParseMetricSpec (specstr,
+ 0, dummy_host, /* both ignored */
+ & pms, & errmsg);
+ if (rc < 0) {
+ timestamp(cerr) << "hostid-metrics '" << specstr << "' parse error: " << errmsg << endl;
+ free (errmsg);
+ }
+
+ parsed_metric_cache[spec] = pms;
+ return pms;
+}
+
+
+pmmgr_hostid
+pmmgr_job_spec::compute_hostid (const pcp_context_spec& ctx)
+{
+ int pmc = pmNewContext (PM_CONTEXT_HOST, ctx.c_str());
+ if (pmc < 0)
+ return "";
+
+ // parse all the hostid metric specifications
+ vector<string> hostid_specs = get_config_multi("hostid-metrics");
+ if (hostid_specs.size() == 0)
+ hostid_specs.push_back(string("pmcd.hostname"));
+
+ // fetch all hostid metrics in sequence
+ vector<string> hostid_fields;
+ for (unsigned i=0; i<hostid_specs.size(); i++)
+ {
+ pmMetricSpec* pms = parse_metric_spec (hostid_specs[i]);
+
+ pmID pmid;
+ int rc = pmLookupName (1, & pms->metric, &pmid);
+ if (rc < 0)
+ continue;
+
+ pmDesc desc;
+ rc = pmLookupDesc (pmid, & desc);
+ if (rc < 0)
+ continue;
+
+ if (desc.type != PM_TYPE_STRING)
+ continue;
+
+ if ((desc.indom != PM_INDOM_NULL) && pms->ninst > 0)
+ {
+ // reset the indom to include all elements
+ rc = pmDelProfile(desc.indom, 0, (int *)0);
+ if (rc < 0)
+ continue;
+
+ int *inums = (int *) malloc (pms->ninst * sizeof(int));
+ if (inums == NULL)
+ continue;
+ // NB: after this point, 'continue' must also free(inums);
+
+ // map the instance names to instance numbers
+ unsigned numinums_used = 0;
+ for (int j=0; j<pms->ninst; j++)
+ {
+ int inum = pmLookupInDom (desc.indom, pms->inst[j]);
+ if (inum < 0)
+ continue;
+ inums[numinums_used++] = inum;
+ }
+
+ // add the selected instances to the profile
+ rc = pmAddProfile (desc.indom, numinums_used, inums);
+ free (inums);
+ if (rc < 0)
+ continue;
+ }
+
+ // fetch the values
+ pmResult *r;
+ rc = pmFetch (1, &pmid, &r);
+ if (rc < 0)
+ continue;
+ // NB: after this point, 'continue' must also pmFreeResult(r)
+
+ // in-place sort value list by indom number
+ pmSortInstances(r);
+
+ // only vset[0] will be set, for csb->pmid
+ if (r->vset[0]->numval > 0)
+ {
+ for (int j=0; j<r->vset[0]->numval; j++) // iterate over instances
+ {
+ // fetch the string value
+ pmAtomValue av;
+ rc = pmExtractValue(r->vset[0]->valfmt,
+ & r->vset[0]->vlist[j],
+ PM_TYPE_STRING, & av, PM_TYPE_STRING);
+ if (rc < 0)
+ continue;
+
+ // at last! we have a string we can accumulate
+ hostid_fields.push_back (av.cp);
+ free (av.cp);
+ }
+ }
+
+ (void) pmFreeResult (r);
+ }
+
+ (void) pmDestroyContext (pmc);
+
+ // Sanitize the host-id metric values into a single string that is
+ // suitable for posix-portable-filenames, and not too ugly for
+ // someone to look at or type in.
+ //
+ // http://www.opengroup.org/onlinepubs/007904975/basedefs/xbd_chap03.html
+ string sanitized;
+ for (unsigned i=0; i<hostid_fields.size(); i++)
+ {
+ const string& f = hostid_fields[i];
+ if (f == "") continue;
+ if (sanitized != "") sanitized += "-"; // separate fields
+ for (unsigned j=0; j<f.length(); j++)
+ {
+ char c = f[j];
+ if (isalnum(c))
+ sanitized += c;
+ else if (c== '-' || c == '.' || c == '_')
+ sanitized += c;
+ else
+ // drop other non-portable characters NB: this can mean
+ // unintentional duplication in IDs, which a user can work
+ // around by configuring additional hostid metrics.
+ ;
+ }
+ }
+
+ return pmmgr_hostid (sanitized);
+}
+
+
+pmmgr_job_spec::pmmgr_job_spec(const std::string& config_directory):
+ pmmgr_configurable(config_directory)
+{
+ // We don't actually have to do any configuration parsing at this
+ // time. Let's do it during poll(), which makes us more responsive
+ // to run-time changes.
+}
+
+
+pmmgr_job_spec::~pmmgr_job_spec()
+{
+ // free any cached pmMetricSpec's
+ for (map<string,pmMetricSpec*>::iterator it = parsed_metric_cache.begin();
+ it != parsed_metric_cache.end();
+ ++it)
+ free (it->second); // aka pmFreeMetricSpec
+
+ // kill all our daemons created during poll()
+ for (map<pmmgr_hostid,pcp_context_spec>::iterator it = known_targets.begin();
+ it != known_targets.end();
+ ++it)
+ note_dead_hostid (it->first);
+}
+
+
+// ------------------------------------------------------------------------
+
+
+void
+pmmgr_job_spec::poll()
+{
+ if (quit) return;
+
+ // phase 1: run all discovery/probing functions to collect context-spec's
+ set<pcp_context_spec> new_specs;
+
+ vector<string> target_hosts = get_config_multi("target-host");
+ for (unsigned i=0; i<target_hosts.size(); i++)
+ new_specs.insert(target_hosts[i]);
+
+ vector<string> target_discovery = get_config_multi("target-discovery");
+ for (unsigned i=0; i<target_discovery.size() && !quit; i++)
+ {
+ char **urls = NULL;
+ const char *discovery = (target_discovery[i] == "")
+ ? NULL
+ : target_discovery[i].c_str();
+ int numUrls = pmDiscoverServices (PM_SERVER_SERVICE_SPEC, discovery, &urls);
+ if (numUrls <= 0)
+ continue;
+ for (int i=0; i<numUrls; i++)
+ new_specs.insert(string(urls[i]));
+ free ((void*) urls);
+ }
+
+ // fallback to logging the local server, if nothing else is configured/discovered
+ if (target_hosts.size() == 0 &&
+ target_discovery.size() == 0)
+ new_specs.insert("local:");
+
+ // phase 2: move previously-identified targets over, so we can tell who
+ // has come or gone
+ const map<pmmgr_hostid,pcp_context_spec> old_known_targets = known_targets;
+ known_targets.clear();
+
+ // phase 3: map the context-specs to hostids to find new hosts
+ map<pmmgr_hostid,double> known_target_scores;
+ for (set<pcp_context_spec>::iterator it = new_specs.begin();
+ it != new_specs.end() && !quit;
+ ++it)
+ {
+ struct timeval before, after;
+ __pmtimevalNow(& before);
+ pmmgr_hostid hostid = compute_hostid (*it);
+ __pmtimevalNow(& after);
+ double score = __pmtimevalSub(& after, & before); // the smaller, the preferreder
+
+ if (hostid != "") // verified existence/liveness
+ {
+ if (pmDebug & DBG_TRACE_APPL0)
+ timestamp(cout) << "hostid " << hostid << " via " << *it << " time " << score << endl;
+
+ if (known_target_scores.find(hostid) == known_target_scores.end() ||
+ known_target_scores[hostid] > score) // previous slower than this one
+ {
+ known_targets[hostid] = *it;
+ known_target_scores[hostid] = score;
+ }
+ }
+ }
+
+ // phase 4a: compare old_known_targets vs. known_targets: look for any recently died
+ for (map<pmmgr_hostid,pcp_context_spec>::const_iterator it = old_known_targets.begin();
+ it != old_known_targets.end();
+ ++it)
+ {
+ const pmmgr_hostid& hostid = it->first;
+ if (known_targets.find(hostid) == known_targets.end())
+ note_dead_hostid (hostid);
+ }
+
+ // phase 4b: compare new known_targets & old_known_targets: look for recently born
+ for (map<pmmgr_hostid,pcp_context_spec>::const_iterator it = known_targets.begin();
+ it != known_targets.end();
+ ++it)
+ {
+ const pmmgr_hostid& hostid = it->first;
+ if (old_known_targets.find(hostid) == old_known_targets.end())
+ note_new_hostid (hostid, known_targets[hostid]);
+ }
+
+ // phase 5: poll all the live daemons
+ // NB: there is a parallelism opportunity, as running many pmlogconf/etc.'s in series
+ // is a possible bottleneck.
+#ifdef HAVE_PTHREAD_H
+ vector<pthread_t> threads;
+#endif
+ for (multimap<pmmgr_hostid,pmmgr_daemon*>::iterator it = daemons.begin();
+ it != daemons.end() && !quit;
+ ++it)
+ {
+#ifdef HAVE_PTHREAD_H
+ pthread_t foo;
+ int rc = pthread_create(&foo, NULL, &pmmgr_daemon_poll_thread, it->second);
+ if (rc == 0)
+ threads.push_back (foo);
+#else
+ int rc = -ENOSUPP;
+#endif
+ if (rc) // threading failed or running single-threaded
+ it->second->poll();
+ }
+
+#ifdef HAVE_PTHREAD_H
+ for (unsigned i=0; i<threads.size(); i++)
+ pthread_join (threads[i], NULL);
+#endif
+
+ // phase 6: garbage-collect ancient log-directory subdirs
+ string subdir_gc = get_config_single("log-subdirectory-gc");
+ if (subdir_gc == "")
+ subdir_gc = "90days";
+ struct timeval tv;
+ char *errmsg;
+ int rc = pmParseInterval(subdir_gc.c_str(), & tv, & errmsg);
+ if (rc < 0)
+ {
+ timestamp(cerr) << "log-subdirectory-gc '" << subdir_gc << "' parse error: " << errmsg << endl;
+ free (errmsg);
+ // default to 90days in another way
+ tv.tv_sec = 60 * 60 * 24 * 90;
+ tv.tv_usec = 0;
+ }
+ time_t now;
+ (void) time(& now);
+
+ // NB: check less frequently?
+
+ // XXX: getting a bit duplicative
+ string default_log_dir =
+ string(pmGetConfig("PCP_LOG_DIR")) + (char)__pmPathSeparator() + "pmmgr";
+ string log_dir = get_config_single ("log-directory");
+ if (log_dir == "") log_dir = default_log_dir;
+ else if(log_dir[0] != '/') log_dir = config_directory + (char)__pmPathSeparator() + log_dir;
+
+ glob_t the_blob;
+ string glob_pattern = log_dir + (char)__pmPathSeparator() + "*";
+ rc = glob (glob_pattern.c_str(),
+ GLOB_NOESCAPE
+#ifdef GLOB_ONLYDIR
+ | GLOB_ONLYDIR
+#endif
+ , NULL, & the_blob);
+ if (rc == 0)
+ {
+ for (unsigned i=0; i<the_blob.gl_pathc && !quit; i++)
+ {
+ string item_name = the_blob.gl_pathv[i];
+
+ // Reject if currently live hostid
+ // NB: basename(3) might modify the argument string, so we don't feed
+ // it item_name.c_str().
+ string target_name = basename(the_blob.gl_pathv[i]);
+ if (known_targets.find(target_name) != known_targets.end())
+ continue;
+
+ struct stat foo;
+ rc = stat (item_name.c_str(), & foo);
+ if (rc == 0 &&
+ S_ISDIR(foo.st_mode) &&
+ (foo.st_mtime + tv.tv_sec) < now)
+ {
+ // <Janine Melnitz>We've got one!!!!!</>
+ timestamp(cout) << "gc subdirectory " << item_name << endl;
+ string cleanup_cmd = "/bin/rm -rf " + sh_quote(item_name);
+ (void) wrap_system(cleanup_cmd);
+ }
+ }
+ }
+ globfree (& the_blob);
+}
+
+
+// ------------------------------------------------------------------------
+
+
+void
+pmmgr_job_spec::note_new_hostid(const pmmgr_hostid& hid, const pcp_context_spec& spec)
+{
+ timestamp(cout) << "new hostid " << hid << " at " << string(spec) << endl;
+
+ if (get_config_exists("pmlogger"))
+ daemons.insert(make_pair(hid, new pmmgr_pmlogger_daemon(config_directory, hid, spec)));
+
+ if (get_config_exists("pmie"))
+ daemons.insert(make_pair(hid, new pmmgr_pmie_daemon(config_directory, hid, spec)));
+}
+
+
+void
+pmmgr_job_spec::note_dead_hostid(const pmmgr_hostid& hid)
+{
+ timestamp(cout) << "dead hostid " << hid << endl;
+
+ pair<multimap<pmmgr_hostid,pmmgr_daemon*>::iterator,
+ multimap<pmmgr_hostid,pmmgr_daemon*>::iterator> range =
+ daemons.equal_range(hid);
+
+ for (multimap<pmmgr_hostid,pmmgr_daemon*>::iterator it = range.first;
+ it != range.second;
+ ++it)
+ delete (it->second);
+
+ daemons.erase(range.first, range.second);
+}
+
+
+// ------------------------------------------------------------------------
+
+
+pmmgr_daemon::pmmgr_daemon(const std::string& config_directory,
+ const pmmgr_hostid& hostid,
+ const pcp_context_spec& spec):
+ pmmgr_configurable(config_directory),
+ hostid(hostid),
+ spec(spec),
+ pid(0),
+ last_restart_attempt(0)
+{
+}
+
+
+pmmgr_pmlogger_daemon::pmmgr_pmlogger_daemon(const std::string& config_directory,
+ const pmmgr_hostid& hostid,
+ const pcp_context_spec& spec):
+ pmmgr_daemon(config_directory, hostid, spec)
+{
+}
+
+
+pmmgr_pmie_daemon::pmmgr_pmie_daemon(const std::string& config_directory,
+ const pmmgr_hostid& hostid,
+ const pcp_context_spec& spec):
+ pmmgr_daemon(config_directory, hostid, spec)
+{
+}
+
+
+pmmgr_daemon::~pmmgr_daemon()
+{
+ if (pid != 0)
+ {
+ int ignored;
+ (void) kill ((pid_t) pid, SIGTERM);
+ (void) waitpid ((pid_t) pid, &ignored, 0); // collect zombie
+ if (pmDebug & DBG_TRACE_APPL0)
+ timestamp(cout) << "daemon pid " << pid << " killed" << endl;
+ }
+}
+
+
+void pmmgr_daemon::poll()
+{
+ if (quit) return;
+
+ if (pid != 0) // test if it's still alive
+ {
+ // reap it if it might have died
+ int ignored;
+ int rc = waitpid ((pid_t) pid, &ignored, WNOHANG);
+
+ rc = kill ((pid_t) pid, 0);
+ if (rc < 0)
+ {
+ if (pmDebug & DBG_TRACE_APPL0)
+ timestamp(cout) << "daemon pid " << pid << " found dead" << endl;
+ pid = 0;
+ // we will try again immediately
+ }
+ }
+
+ if (pid == 0) // needs a restart
+ {
+ time_t now;
+ time (& now);
+
+ // Prevent an error in the environment or the pmmgr daemon
+ // command lines from generating a tight loop of failure /
+ // retry, wasting time and log file space. Limit retry attempts
+ // to one per poll interval (pmmgr -p N parameter).
+ if (last_restart_attempt && (last_restart_attempt + polltime) >= now)
+ return; // quietly, without attempting to restart
+
+ string commandline = daemon_command_line(); // <--- may take many seconds!
+
+ // NB: Note this time as a restart attempt, even if daemon_command_line()
+ // returned an empty string, so that we don't try to restart it too soon.
+ // We note this time rather than the beginning of daemon_command_line(),
+ // to ensure at least polltime seconds of rest between attempts.
+ last_restart_attempt = now;
+
+ if (quit) return; // without starting the daemon process
+
+ if (commandline == "") // error in some intermediate processing stage
+ {
+ timestamp(cerr) << "failed to prepare daemon command line" << endl;
+ return;
+ }
+
+ // We are going to run the daemon with sh -c, but on some versions of
+ // sh, this doesn't imply an exec, which interferes with signalling.
+ // Enforce exec on even these shells.
+ commandline = string("exec ") + commandline;
+
+ if (pmDebug & DBG_TRACE_APPL0)
+ timestamp(cout) << "fork/exec sh -c " << commandline << endl;
+ pid = fork();
+ if (pid == 0) // child process
+ {
+ int rc = execl ("/bin/sh", "sh", "-c", commandline.c_str(), NULL);
+ timestamp(cerr) << "failed to execl sh -c " << commandline << " rc=" << rc << endl;
+ _exit (1);
+ // parent will try again at next poll
+ }
+ else if (pid < 0) // failed fork
+ {
+ timestamp(cerr) << "failed to fork for sh -c " << commandline << endl;
+ pid = 0;
+ // we will try again at next poll
+ }
+ else // congratulations! we're apparently a parent
+ {
+ if (pmDebug & DBG_TRACE_APPL0)
+ timestamp(cout) << "daemon pid " << pid << " started: " << commandline << endl;
+ }
+ }
+}
+
+
+std::string
+pmmgr_pmlogger_daemon::daemon_command_line()
+{
+ string default_log_dir =
+ string(pmGetConfig("PCP_LOG_DIR")) + (char)__pmPathSeparator() + "pmmgr";
+ string log_dir = get_config_single ("log-directory");
+ if (log_dir == "") log_dir = default_log_dir;
+ else if(log_dir[0] != '/') log_dir = config_directory + (char)__pmPathSeparator() + log_dir;
+
+ (void) mkdir2 (log_dir.c_str(), 0777); // implicitly consults umask(2)
+
+ string host_log_dir = log_dir + (char)__pmPathSeparator() + hostid;
+ (void) mkdir2 (host_log_dir.c_str(), 0777);
+ // (errors creating actual files under host_log_dir will be noted shortly)
+
+ string pmlogger_command =
+ string(pmGetConfig("PCP_BIN_DIR")) + (char)__pmPathSeparator() + "pmlogger";
+ string pmlogger_options = sh_quote(pmlogger_command);
+ pmlogger_options += " " + get_config_single ("pmlogger") + " ";
+
+ // run pmlogconf if requested
+ if (get_config_exists("pmlogconf"))
+ {
+ string pmlogconf_output_file = host_log_dir + (char)__pmPathSeparator() + "config.pmlogger";
+ (void) unlink (pmlogconf_output_file.c_str());
+ string pmlogconf_command =
+ string(pmGetConfig("PCP_BINADM_DIR")) + (char)__pmPathSeparator() + "pmlogconf";
+ string pmlogconf_options =
+ sh_quote(pmlogconf_command)
+ + " -c -r -h " + sh_quote(spec)
+ + " " + get_config_single ("pmlogconf")
+ + " " + sh_quote(pmlogconf_output_file)
+ + " >/dev/null"; // pmlogconf is too chatty
+
+ int rc = wrap_system(pmlogconf_options);
+ if (rc) return "";
+
+ pmlogger_options += " -c " + sh_quote(pmlogconf_output_file);
+ }
+
+ // collect -h direction
+ pmlogger_options += " -h " + sh_quote(spec);
+
+ // hard-code -r to report metrics & expected disk usage rate
+ pmlogger_options += " -r";
+
+ // collect subsidiary pmlogger diagnostics
+ pmlogger_options += " -l " + sh_quote(host_log_dir + (char)__pmPathSeparator() + "pmlogger.log");
+
+ // do log merging
+ if (get_config_exists ("pmlogmerge"))
+ {
+ string pmlogextract_command =
+ string(pmGetConfig("PCP_BIN_DIR")) + (char)__pmPathSeparator() + "pmlogextract";
+
+ string pmlogcheck_command =
+ string(pmGetConfig("PCP_BIN_DIR")) + (char)__pmPathSeparator() + "pmlogcheck";
+
+ string pmlogrewrite_command =
+ string(pmGetConfig("PCP_BINADM_DIR")) + (char)__pmPathSeparator() + "pmlogrewrite";
+
+ string pmlogextract_options = sh_quote(pmlogextract_command);
+
+ string retention = get_config_single ("pmlogmerge-retain");
+ if (retention == "") retention = "14days";
+ struct timeval retention_tv;
+ char *errmsg;
+ int rc = pmParseInterval(retention.c_str(), &retention_tv, &errmsg);
+ if (rc)
+ {
+ timestamp(cerr) << "pmlogmerge-retain '" << retention << "' parse error: " << errmsg << endl;
+ free (errmsg);
+ retention = "14days";
+ retention_tv.tv_sec = 14*24*60*60;
+ retention_tv.tv_usec = 0;
+ }
+ pmlogextract_options += " -S -" + sh_quote(retention);
+
+ // Arrange our new pmlogger to kill itself after the given
+ // period, to give us a chance to rerun.
+ string period = get_config_single ("pmlogmerge");
+ if (period == "") period = "24hours";
+ struct timeval period_tv;
+ rc = pmParseInterval(period.c_str(), &period_tv, &errmsg);
+ if (rc)
+ {
+ timestamp(cerr) << "pmlogmerge '" << period << "' parse error: " << errmsg << endl;
+ free (errmsg);
+ period = "24hours";
+ period_tv.tv_sec = 24*60*60;
+ period_tv.tv_usec = 0;
+ }
+ if (get_config_exists ("pmlogmerge-granular"))
+ {
+ // adjust stopping time to the next multiple of period
+ struct timeval now_tv;
+ __pmtimevalNow (&now_tv);
+ time_t period_s = period_tv.tv_sec;
+ if (period_s < 1) period_s = 1; // at least one second
+ time_t period_end = ((now_tv.tv_sec + period_s - 1) / period_s) * period_s;
+ period = string(" @") +
+ string(ctime(& period_end)).substr(0,24); // 24: ctime(3) magic value, sans \n
+ }
+ pmlogger_options += " -y -T " + sh_quote(period); // NB: pmmgr host local time!
+
+ // Find prior archives by globbing for *.index files,
+ // just like pmlogger_merge does.
+ // Er ... but aren't .index files optional?
+ vector<string> mergeable_archives; // those to merge
+ glob_t the_blob;
+ string glob_pattern = host_log_dir + (char)__pmPathSeparator() + "*.index";
+ rc = glob (glob_pattern.c_str(), GLOB_NOESCAPE, NULL, & the_blob);
+ if (rc == 0)
+ {
+ // compute appropriate
+ struct timeval now_tv;
+ __pmtimevalNow (&now_tv);
+ time_t period_s = period_tv.tv_sec;
+ if (period_s < 1) period_s = 1; // at least one second
+ time_t prior_period_start = ((now_tv.tv_sec - period_s) / period_s) * period_s;
+ time_t prior_period_end = prior_period_start + period_s;
+
+ for (unsigned i=0; i<the_blob.gl_pathc; i++)
+ {
+ if (quit) return "";
+
+ string index_name = the_blob.gl_pathv[i];
+ string base_name = index_name.substr(0,index_name.length()-6); // trim .index
+
+ // Manage retention based upon the stat timestamps of the .index file,
+ // because the archives might be so corrupt that even loglabel-based
+ // checks could fail. Non-corrupt archives will have already been merged
+ // into a fresher archive.
+ struct stat foo;
+ rc = stat (the_blob.gl_pathv[i], & foo);
+ if (rc)
+ {
+ // this apprx. can't happen
+ timestamp(cerr) << "stat '" << the_blob.gl_pathv[i] << "' error; skipping cleanup" << endl;
+ continue; // likely nothing can be done to this one
+ }
+ else if ((foo.st_mtime + retention_tv.tv_sec) < now_tv.tv_sec)
+ {
+ string bnq = sh_quote(base_name);
+ string cleanup_cmd = string("/bin/rm -f")
+ + " " + bnq + ".[0-9]*"
+ + " " + bnq + ".index" +
+ + " " + bnq + ".meta";
+
+ (void) wrap_system(cleanup_cmd);
+ continue; // it's gone now; don't try to merge it or anything
+ }
+
+ if (quit) return "";
+
+ // sic pmlogcheck on it; if it is broken, pmlogextract
+ // will give up and make no progress
+ string pmlogcheck_options = sh_quote(pmlogcheck_command);
+ pmlogcheck_options += " " + sh_quote(base_name) + " >/dev/null";
+
+ rc = wrap_system(pmlogcheck_options);
+ if (rc != 0)
+ {
+ timestamp(cerr) << "corrupt archive " << base_name << " preserved." << endl;
+ continue;
+ }
+
+ if (quit) return "";
+
+ // In granular mode, skip if this file is too old or too new. NB: Decide
+ // based upon the log-label, not fstat timestamps, since files postdate
+ // the time region they cover.
+ if (get_config_exists ("pmlogmerge-granular"))
+ {
+ // One could do this the pmloglabel(1) __pmLog* way,
+ // rather than the pmlogsummary(1) PMAPI way.
+
+ int ctx = pmNewContext(PM_CONTEXT_ARCHIVE, base_name.c_str());
+ if (ctx < 0)
+ continue; // skip; gc later
+
+ pmLogLabel label;
+ rc = pmGetArchiveLabel (& label);
+ if (rc < 0)
+ continue; // skip; gc later
+
+ if (label.ll_start.tv_sec >= prior_period_end) // archive too new?
+ {
+ if (pmDebug & DBG_TRACE_APPL0)
+ timestamp(cout) << "skipping merge of too-new archive " << base_name << endl;
+ pmDestroyContext (ctx);
+ continue;
+ }
+
+ struct timeval archive_end;
+ rc = pmGetArchiveEnd(&archive_end);
+ if (rc < 0)
+ {
+ pmDestroyContext (ctx);
+ continue; // skip; gc later
+ }
+
+ if (archive_end.tv_sec < prior_period_start) // archive too old?
+ {
+ if (pmDebug & DBG_TRACE_APPL0)
+ timestamp(cout) << "skipping merge of too-old archive " << base_name << endl;
+ pmDestroyContext (ctx);
+ continue; // skip; gc later
+ }
+
+ pmDestroyContext (ctx);
+ // fallthrough: the archive intersects the prior_period_{start,end} interval
+
+ // XXX: What happens for archives that span across granular periods?
+ }
+
+ mergeable_archives.push_back (base_name);
+ }
+ globfree (& the_blob);
+ }
+
+ string timestr = "archive";
+ time_t now2 = time(NULL);
+ struct tm *now = gmtime(& now2);
+ if (now != NULL)
+ {
+ char timestr2[100];
+ int rc = strftime(timestr2, sizeof(timestr2), "-%Y%m%d.%H%M%S", now);
+ if (rc > 0)
+ timestr += timestr2;
+ }
+ string merged_archive_name = host_log_dir + (char)__pmPathSeparator() + timestr;
+
+ if (mergeable_archives.size() > 1) // 1 or 0 are not worth merging!
+ {
+ // assemble final bits of pmlogextract command line: the inputs and the output
+ for (unsigned i=0; i<mergeable_archives.size(); i++)
+ {
+ if (quit) return "";
+
+ if (get_config_exists("pmlogmerge-rewrite"))
+ {
+ string pmlogrewrite_options = sh_quote(pmlogrewrite_command);
+ pmlogrewrite_options += " -i " + get_config_single("pmlogmerge-rewrite");
+ pmlogrewrite_options += " " + sh_quote(mergeable_archives[i]);
+
+ (void) wrap_system(pmlogrewrite_options.c_str());
+ // In case of error, don't break; let's try to merge it anyway.
+ // Maybe pmlogrewrite will succeed and will get rid of this file.
+ }
+
+ pmlogextract_options += " " + sh_quote(mergeable_archives[i]);
+ }
+
+ if (quit) return "";
+
+ pmlogextract_options += " " + sh_quote(merged_archive_name);
+
+ rc = wrap_system(pmlogextract_options.c_str());
+ if (rc == 0)
+ {
+ // zap the previous archive files
+ //
+ // Don't skip this upon "if (quit)", since the new merged archive is already complete;
+ // it'd be a waste to keep these files around for a future re-merge.
+ for (unsigned i=0; i<mergeable_archives.size(); i++)
+ {
+ string base_name = sh_quote(mergeable_archives[i]);
+ string cleanup_cmd = string("/bin/rm -f")
+ + " " + base_name + ".[0-9]*"
+ + " " + base_name + ".index" +
+ + " " + base_name + ".meta";
+
+ (void) wrap_system(cleanup_cmd.c_str());
+ }
+ }
+ }
+ }
+
+ // synthesize a logfile name similarly as pmlogger_check, but add %S (seconds)
+ // to reduce likelihood of conflict with a short poll interval
+ string timestr = "archive";
+ time_t now2 = time(NULL);
+ struct tm *now = gmtime(& now2);
+ if (now != NULL)
+ {
+ char timestr2[100];
+ int rc = strftime(timestr2, sizeof(timestr2), "-%Y%m%d.%H%M%S", now);
+ if (rc > 0)
+ timestr += timestr2; // no sh_quote required
+ }
+
+ // last argument
+ pmlogger_options += " " + sh_quote(host_log_dir + (char)__pmPathSeparator() + timestr);
+
+ return pmlogger_options;
+}
+
+
+std::string
+pmmgr_pmie_daemon::daemon_command_line()
+{
+ string default_log_dir =
+ string(pmGetConfig("PCP_LOG_DIR")) + (char)__pmPathSeparator() + "pmmgr";
+ string log_dir = get_config_single ("log-directory");
+ if (log_dir == "") log_dir = default_log_dir;
+ else if(log_dir[0] != '/') log_dir = config_directory + (char)__pmPathSeparator() + log_dir;
+
+ (void) mkdir2 (log_dir.c_str(), 0777); // implicitly consults umask(2)
+
+ string host_log_dir = log_dir + (char)__pmPathSeparator() + hostid;
+ (void) mkdir2 (host_log_dir.c_str(), 0777);
+ // (errors creating actual files under host_log_dir will be noted shortly)
+
+ string pmie_command =
+ string(pmGetConfig("PCP_BIN_DIR")) + (char)__pmPathSeparator() + "pmie";
+ string pmie_options = sh_quote (pmie_command);
+
+ pmie_options += " " + get_config_single ("pmie") + " ";
+
+ // run pmieconf if requested
+ if (get_config_exists ("pmieconf"))
+ {
+ string pmieconf_output_file = host_log_dir + (char)__pmPathSeparator() + "config.pmie";
+ string pmieconf_command =
+ string(pmGetConfig("PCP_BIN_DIR")) + (char)__pmPathSeparator() + "pmieconf";
+
+ // NB: pmieconf doesn't take a host name as an argument, unlike pmlogconf
+ string pmieconf_options =
+ sh_quote(pmieconf_command)
+ + " -F -c " + get_config_single ("pmieconf")
+ + " -f " + sh_quote(pmieconf_output_file);
+
+ int rc = wrap_system(pmieconf_options.c_str());
+ if (rc) return "";
+
+ pmie_options += "-c " + sh_quote(pmieconf_output_file);
+ }
+
+ if (quit) return "";
+
+ // collect -h direction
+ pmie_options += " -h " + sh_quote(spec);
+
+ // collect -f, to get it to run in the foreground, avoid setuid
+ pmie_options += " -f";
+
+ // collect subsidiary pmlogger diagnostics
+ pmie_options += " -l " + sh_quote(host_log_dir + (char)__pmPathSeparator() + "pmie.log");
+
+ return pmie_options;
+}
+
+
+
+// ------------------------------------------------------------------------
+
+
+extern "C"
+void handle_interrupt (int sig)
+{
+ // Propagate signal to inferior processes (just once, to prevent
+ // recursive signals or whatnot, despite sa_mask in
+ // setup_signals()).
+ if (quit == 0)
+ kill(-getpid(), SIGTERM);
+
+ quit ++;
+ if (quit > 3) // ignore 1 from user; 1 from kill(-getpid) above; 1 from same near main() exit
+ {
+ char msg[] = "Too many interrupts received, exiting.\n";
+ int rc = write (2, msg, sizeof(msg)-1);
+ if (rc) {/* Do nothing; we don't care if our last gasp went out. */ ;}
+ // XXX: send a suicide signal to the process group?
+ _exit (1);
+ }
+}
+
+extern "C"
+void ignore_signal (int sig)
+{
+ (void) sig;
+}
+
+
+
+void setup_signals()
+{
+ // NB: we eschew __pmSetSignalHandler, since it uses signal(3),
+ // whose behavior is less predictable than sigaction(2).
+
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = handle_interrupt;
+ sigemptyset (&sa.sa_mask);
+ sigaddset (&sa.sa_mask, SIGHUP);
+ sigaddset (&sa.sa_mask, SIGPIPE);
+ sigaddset (&sa.sa_mask, SIGINT);
+ sigaddset (&sa.sa_mask, SIGTERM);
+ sigaddset (&sa.sa_mask, SIGXFSZ);
+ sigaddset (&sa.sa_mask, SIGXCPU);
+ sa.sa_flags = SA_RESTART;
+ sigaction (SIGHUP, &sa, NULL);
+ sigaction (SIGPIPE, &sa, NULL);
+ sigaction (SIGINT, &sa, NULL);
+ sigaction (SIGTERM, &sa, NULL);
+ sigaction (SIGXFSZ, &sa, NULL);
+ sigaction (SIGXCPU, &sa, NULL);
+}
+
+
+
+// ------------------------------------------------------------------------
+
+static pmOptions opts;
+static pmLongOptions longopts[] =
+ {
+ PMAPI_OPTIONS_HEADER("Options"),
+ PMOPT_DEBUG,
+ { "config", 1, 'c', "DIR", "configuration directory [default $PCP_SYSCONF_DIR/pmmgr]" },
+ { "poll", 1, 'p', "NUM", "set pmcd polling interval [default 60]" },
+ { "username", 1, 'U', "USER", "switch to named user account [default pcp]" },
+ { "log", 1, 'l', "PATH", "redirect diagnostics and trace output" },
+ { "verbose", 0, 'v', 0, "verbose diagnostics to stderr" },
+ PMOPT_HELP,
+ PMAPI_OPTIONS_END
+ };
+
+int main (int argc, char *argv[])
+{
+ /* Become our own process group, to assist signal passing to children. */
+ setpgid(getpid(), 0);
+ setup_signals();
+
+ string default_config_dir =
+ string(pmGetConfig("PCP_SYSCONF_DIR")) + (char)__pmPathSeparator() + "pmmgr";
+ vector<pmmgr_job_spec*> js;
+
+ int c;
+ char* username_str;
+ __pmGetUsername(& username_str);
+ string username = username_str;
+ char* output_filename = NULL;
+
+ opts.long_options = longopts;
+ opts.short_options = "D:c:vp:U:l:?";
+
+ while ((c = pmgetopt_r(argc, argv, &opts)) != EOF)
+ {
+ switch (c)
+ {
+ case 'D': // undocumented
+ if ((c = __pmParseDebug(opts.optarg)) < 0)
+ {
+ pmprintf("%s: unrecognized debug flag specification (%s)\n",
+ pmProgname, opts.optarg);
+ opts.errors++;
+ }
+ else
+ {
+ pmDebug |= c;
+ }
+ break;
+
+ case 'l':
+ output_filename = opts.optarg;
+ break;
+
+ case 'v':
+ pmDebug |= DBG_TRACE_APPL0;
+ break;
+
+ case 'p':
+ polltime = atoi(opts.optarg);
+ if (polltime <= 0)
+ {
+ pmprintf("%s: poll time too short\n", pmProgname);
+ opts.errors++;
+ }
+ break;
+
+ case 'c':
+ js.push_back (new pmmgr_job_spec(opts.optarg));
+ break;
+
+ case 'U':
+ username = opts.optarg;
+ break;
+
+ default:
+ opts.errors++;
+ }
+ }
+
+ if (opts.errors)
+ {
+ pmUsageMessage(&opts);
+ exit(1);
+ }
+
+ // default
+ if (js.size() == 0)
+ js.push_back (new pmmgr_job_spec(default_config_dir));
+
+ // let pmdapmcd know pmmgr is currently running
+ if (__pmServerCreatePIDFile(pmProgname, PM_FATAL_ERR) < 0)
+ exit(1);
+
+ // lose root privileges if we have them
+ __pmSetProcessIdentity(username.c_str());
+
+ // (re)create log file, redirect stdout/stderr
+ // NB: must be done after __pmSetProcessIdentity() for proper file permissions
+ if (output_filename)
+ {
+ int fd;
+ (void) unlink (output_filename); // in case one's left over from a previous other-uid run
+ fd = open (output_filename, O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 0666);
+ if (fd < 0)
+ timestamp(cerr) << "Cannot re-create logfile " << output_filename << endl;
+ else
+ {
+ int rc;
+ // Move the new file descriptors on top of stdout/stderr
+ rc = dup2 (fd, STDOUT_FILENO);
+ if (rc < 0) // rather unlikely
+ timestamp(cerr) << "Cannot redirect logfile to stdout" << endl;
+ rc = dup2 (fd, STDERR_FILENO);
+ if (rc < 0) // rather unlikely
+ timestamp(cerr) << "Cannot redirect logfile to stderr" << endl;
+ rc = close (fd);
+ if (rc < 0) // rather unlikely
+ timestamp(cerr) << "Cannot close logfile fd" << endl;
+ }
+
+ }
+
+ timestamp(cout) << "Log started" << endl;
+ while (! quit)
+ {
+ // In this section, we must not fidget with SIGCHLD, due to use of system(3).
+ for (unsigned i=0; i<js.size() && !quit; i++)
+ js[i]->poll();
+
+ if (quit)
+ break;
+
+ // We want to respond quickly if a child daemon process dies.
+ (void) signal (SIGCHLD, ignore_signal);
+ (void) signal (SIGALRM, ignore_signal);
+ alarm (polltime);
+ pause ();
+ alarm (0);
+ (void) signal (SIGCHLD, SIG_DFL);
+ (void) signal (SIGALRM, SIG_DFL);
+ }
+
+ // NB: don't let this cleanup be interrupted by pending-quit signals;
+ // we want the daemon pid's killed.
+ for (unsigned i=0; i<js.size(); i++)
+ delete js[i];
+
+ timestamp(cout) << "Log finished" << endl;
+
+ // Send a last-gasp signal out, just in case daemons somehow missed
+ kill(-getpid(), SIGTERM);
+
+ return 0;
+}
diff --git a/src/pmmgr/pmmgr.h b/src/pmmgr/pmmgr.h
new file mode 100644
index 0000000..4934ace
--- /dev/null
+++ b/src/pmmgr/pmmgr.h
@@ -0,0 +1,133 @@
+/* -*- C++ -*-
+ * Copyright (c) 2013-2014 Red Hat.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef PMMGR_H
+#define PMMGR_H
+
+extern "C" {
+#include "pmapi.h"
+}
+#include <string>
+#include <vector>
+#include <set>
+#include <map>
+#include <stdexcept>
+#include <iostream>
+
+
+typedef std::string pcp_context_spec; // pmNewContext PM_CONTEXT_HOST parameter
+typedef std::string pmmgr_hostid; // a unique id for a pmcd
+
+
+// Instances of pmmgr_configurable represent a configurable object,
+// which reads one or more lines of djb-style directories.
+class pmmgr_configurable
+{
+protected:
+ pmmgr_configurable(const std::string& dir);
+ virtual ~pmmgr_configurable() {}
+
+ std::vector<std::string> get_config_multi(const std::string&) const;
+ std::string get_config_single(const std::string&) const;
+ bool get_config_exists(const std::string&) const;
+
+ // private: maybe?
+ std::string config_directory;
+
+ std::ostream& timestamp(std::ostream&);
+ int wrap_system(const std::string& cmd);
+};
+
+
+
+
+// Instances of pmmgr_daemon represent a possibly-live, restartable daemon.
+class pmmgr_daemon: public pmmgr_configurable
+{
+public:
+ pmmgr_daemon(const std::string& config_directory,
+ const pmmgr_hostid& hostid, const pcp_context_spec& spec);
+ virtual ~pmmgr_daemon();
+ void poll();
+
+protected:
+ pmmgr_hostid hostid;
+ pcp_context_spec spec;
+ int pid;
+ time_t last_restart_attempt;
+
+ virtual std::string daemon_command_line() = 0;
+};
+
+
+class pmmgr_pmlogger_daemon: public pmmgr_daemon
+{
+public:
+ pmmgr_pmlogger_daemon(const std::string& config_directory,
+ const pmmgr_hostid& hostid, const pcp_context_spec& spec);
+protected:
+ std::string daemon_command_line();
+};
+
+class pmmgr_pmie_daemon: public pmmgr_daemon
+{
+public:
+ pmmgr_pmie_daemon(const std::string& config_directory,
+ const pmmgr_hostid& hostid, const pcp_context_spec& spec);
+protected:
+ std::string daemon_command_line();
+};
+
+
+
+
+// An instance of a pmmgr_job_spec represents a pmmgr
+// configuration item to monitor some set of pcp target patterns
+// (which collectively map to a varying set of pmcd's), and a
+// corresponding set of daemons to keep running for each of them.
+//
+// The pmcds are identified by a configurable algorithm that collects
+// site-specific metrics into a single string, which is then sanitized
+// to make it typeable, portable, useful as a directory name.
+//
+// It is configured from a djb-style control directory with files containing
+// 100% pure content. Multiple values within the files, where permitted,
+// are newline-separated.
+
+class pmmgr_job_spec: pmmgr_configurable
+{
+public:
+ pmmgr_job_spec(const std::string& config_directory);
+ ~pmmgr_job_spec(); // shut down all daemons
+ void poll(); // check targets, daemons
+
+private:
+ std::map<std::string,pmMetricSpec*> parsed_metric_cache;
+ pmMetricSpec* parse_metric_spec(const std::string&);
+
+ pmmgr_hostid compute_hostid (const pcp_context_spec&);
+ std::map<pmmgr_hostid,pcp_context_spec> known_targets;
+
+ void note_new_hostid(const pmmgr_hostid&, const pcp_context_spec&);
+ void note_dead_hostid(const pmmgr_hostid&);
+ std::multimap<pmmgr_hostid,pmmgr_daemon*> daemons;
+};
+
+
+
+
+
+
+#endif
+
diff --git a/src/pmmgr/pmmgr.options b/src/pmmgr/pmmgr.options
new file mode 100644
index 0000000..86037d7
--- /dev/null
+++ b/src/pmmgr/pmmgr.options
@@ -0,0 +1,27 @@
+# command-line options and environment variables for pmmgr
+# uncomment/edit lines as required
+# note: environment variables are *not* expanded - use full path
+
+# make log more verbose
+# -v
+
+# poll less frequently
+# -p 300
+
+# add more configuration directories
+# -c DIR1
+# -c DIR2
+
+# assume identity of some user other than "pcp"
+# -U foobar
+
+# make log go someplace else
+# -l /some/place/else
+
+# setting of environment variables for pmmgr
+
+# timeouts for interactions with pmcd on behalf of clients
+# PMCD_CONNECT_TIMEOUT=10
+# PMCD_RECONNECT_TIMEOUT=10,20,30
+# PMCD_REQUEST_TIMEOUT=10
+HOME=/var/lib/pcp
diff --git a/src/pmmgr/pmmgr.service.in b/src/pmmgr/pmmgr.service.in
new file mode 100644
index 0000000..2b73366
--- /dev/null
+++ b/src/pmmgr/pmmgr.service.in
@@ -0,0 +1,14 @@
+[Unit]
+Description=Performance Metrics Daemon Manager
+Documentation=man:pmmgr(8)
+Wants=avahi-daemon.service
+After=network.target avahi-daemon.service
+
+[Service]
+Type=oneshot
+ExecStart=@path@/pmmgr start
+ExecStop=@path@/pmmgr stop
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
diff --git a/src/pmmgr/rc_pmmgr b/src/pmmgr/rc_pmmgr
new file mode 100644
index 0000000..cdc3887
--- /dev/null
+++ b/src/pmmgr/rc_pmmgr
@@ -0,0 +1,296 @@
+#! /bin/sh
+#
+# Copyright (c) 2013 Red Hat.
+# Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# Start or Stop the Performance Co-Pilot (PCP) daemon manager
+#
+# The following is for chkconfig on RedHat based systems
+# chkconfig: 2345 95 05
+# description: pmmgr is a daemon manager for the Performance Co-Pilot (PCP)
+#
+# The following is for insserv(1) based systems,
+# e.g. SuSE, where chkconfig is a perl script.
+### BEGIN INIT INFO
+# Provides: pmmgr
+# Required-Start: $remote_fs
+# Should-Start: $local_fs $network $syslog $time $pmcd
+# Required-Stop: $remote_fs
+# Should-Stop: $local_fs $network $syslog $pmcd
+# Default-Start: 2 3 4 5
+# Default-Stop: 0 1 6
+# Short-Description: Control pmmgr (daemon manager for PCP)
+# Description: Configure and control pmmgr (a daemon manager for the Performance Co-Pilot)
+### END INIT INFO
+
+. $PCP_DIR/etc/pcp.env
+. $PCP_SHARE_DIR/lib/rc-proc.sh
+
+PMMGR=$PCP_BINADM_DIR/pmmgr
+PMMGROPTS=$PCP_PMMGROPTIONS_PATH
+RUNDIR=$PCP_LOG_DIR/pmmgr
+pmprog=$PCP_RC_DIR/pmmgr
+prog=$PCP_RC_DIR/`basename $0`
+
+tmp=`mktemp -d /var/tmp/pcp.XXXXXXXXX` || exit 1
+status=1
+trap "rm -rf $tmp; exit \$status" 0 1 2 3 15
+
+if [ $pmprog = $prog ]
+then
+ VERBOSE_CTL=on
+else
+ VERBOSE_CTL=off
+fi
+
+case "$PCP_PLATFORM"
+in
+ mingw)
+ # nothing we can usefully do here, skip the test
+ #
+ ;;
+
+ *)
+ # standard Unix/Linux style test
+ #
+ ID=id
+ test -f /usr/xpg4/bin/id && ID=/usr/xpg4/bin/id
+
+ IAM=`$ID -u 2>/dev/null`
+ if [ -z "$IAM" ]
+ then
+ # do it the hardway
+ #
+ IAM=`$ID | sed -e 's/.*uid=//' -e 's/(.*//'`
+ fi
+ ;;
+esac
+
+_shutdown()
+{
+ # Is pmmgr running?
+ #
+ _get_pids_by_name pmmgr >$tmp/tmp
+ if [ ! -s $tmp/tmp ]
+ then
+ [ "$1" = verbose ] && echo "$pmprog: pmmgr not running"
+ return 0
+ fi
+
+ # Send pmmgr a SIGTERM, which is noted as a pending shutdown.
+ # When finished the currently active request, pmmgr will close any
+ # connections and then exit.
+ # Wait for pmmgr to terminate.
+ #
+ pmsignal -a -s TERM pmmgr > /dev/null 2>&1
+ $ECHO $PCP_ECHO_N "Waiting for pmmgr to terminate ...""$PCP_ECHO_C"
+ gone=0
+ for i in 1 2 3 4 5 6
+ do
+ sleep 3
+ _get_pids_by_name pmmgr >$tmp/tmp
+ if [ ! -s $tmp/tmp ]
+ then
+ gone=1
+ break
+ fi
+
+ # If pmmgr doesn't go in 15 seconds, SIGKILL and sleep 1 more time
+ # to allow any clients reading from pmmgr sockets to fail so that
+ # socket doesn't end up in TIME_WAIT or somesuch.
+ #
+ if [ $i = 5 ]
+ then
+ $ECHO
+ echo "Process ..."
+ $PCP_PS_PROG $PCP_PS_ALL_FLAGS >$tmp/ps
+ sed 1q $tmp/ps
+ for pid in `cat $tmp/tmp`
+ do
+ $PCP_AWK_PROG <$tmp/ps "\$2 == $pid { print }"
+ done
+ echo "$prog: Warning: Forcing pmmgr to terminate!"
+ pmsignal -a -s KILL pmmgr > /dev/null 2>&1
+ else
+ $ECHO $PCP_ECHO_N ".""$PCP_ECHO_C"
+ fi
+ done
+ if [ $gone != 1 ] # It just WON'T DIE, give up.
+ then
+ echo "Process ..."
+ cat $tmp/tmp
+ echo "$prog: Warning: pmmgr won't die!"
+ exit
+ fi
+ $RC_STATUS -v
+ pmpost "stop pmmgr from $pmprog"
+}
+
+_usage()
+{
+ echo "Usage: $pmprog [-v] {start|restart|condrestart|stop|status|reload|force-reload}"
+}
+
+while getopts v c
+do
+ case $c
+ in
+ v) # force verbose
+ VERBOSE_CTL=on
+ ;;
+
+ *)
+ _usage
+ exit 1
+ ;;
+ esac
+done
+shift `expr $OPTIND - 1`
+
+if [ $VERBOSE_CTL = on ]
+then # For a verbose startup and shutdown
+ ECHO=$PCP_ECHO_PROG
+else # For a quiet startup and shutdown
+ ECHO=:
+fi
+
+if [ "$IAM" != 0 -a "$1" != "status" ]
+then
+ if [ -n "$PCP_DIR" ]
+ then
+ : running in a non-default installation, do not need to be root
+ else
+ echo "$prog:"'
+Error: You must be root (uid 0) to start or stop the PCP pmmgr daemon.'
+ exit
+ fi
+fi
+
+# First reset status of this service
+$RC_RESET
+
+# Return values acc. to LSB for all commands but status:
+# 0 - success
+# 1 - misc error
+# 2 - invalid or excess args
+# 3 - unimplemented feature (e.g. reload)
+# 4 - insufficient privilege
+# 5 - program not installed
+# 6 - program not configured
+#
+# Note that starting an already running service, stopping
+# or restarting a not-running service as well as the restart
+# with force-reload (in case signalling is not supported) are
+# considered a success.
+case "$1" in
+
+ 'start'|'restart'|'condrestart'|'reload'|'force-reload')
+ if [ "$1" = "condrestart" ] && ! is_chkconfig_on pmmgr
+ then
+ status=0
+ exit
+ fi
+
+ _shutdown quietly
+
+ # pmmgr messages should go to stderr, not the GUI notifiers
+ #
+ unset PCP_STDERR
+
+ if [ -x $PMMGR ]
+ then
+ if [ ! -f $PMMGROPTS ]
+ then
+ echo "$prog:"'
+Error: pmmgr control file "$PMMGROPTS" is missing, cannot start pmmgr.'
+ exit
+ fi
+ if [ ! -d "$RUNDIR" ]
+ then
+ mkdir -p -m 775 "$RUNDIR"
+ chown $PCP_USER:$PCP_GROUP "$RUNDIR"
+ fi
+ cd $RUNDIR
+
+ # salvage the previous versions of any pmmgr
+ #
+ if [ -f pmmgr.log ]
+ then
+ rm -f pmmgr.log.prev
+ mv pmmgr.log pmmgr.log.prev
+ fi
+
+ $ECHO $PCP_ECHO_N "Starting pmmgr ..." "$PCP_ECHO_C"
+ # options file processing ...
+ # only consider lines which start with a hyphen
+ # get rid of the -f option
+ # ensure multiple lines concat onto 1 line
+ OPTS=`sed <$PMMGROPTS 2>/dev/null \
+ -e '/^[^-]/d' \
+ -e 's/^/ /' \
+ -e 's/$/ /' \
+ -e 's/ -f / /g' \
+ -e 's/^ //' \
+ -e 's/ $//' \
+ | tr '\012' ' ' `
+
+ # environment stuff
+ #
+ eval `sed -e 's/"/\\"/g' $PMMGROPTS \
+ | awk -F= '
+BEGIN { exports="" }
+/^[A-Z]/ && NF == 2 { exports=exports" "$1
+ printf "%s=${%s:-\"%s\"}\n", $1, $1, $2
+ }
+END { if (exports != "") print "export", exports }'`
+
+ $PMMGR -l pmmgr.log $OPTS &
+ $RC_STATUS -v
+
+ pmpost "start pmmgr from $pmprog"
+ fi
+ status=0
+ ;;
+
+ 'stop')
+ _shutdown
+ status=0
+ ;;
+
+ 'status')
+ # NOTE: $RC_CHECKPROC returns LSB compliant status values.
+ $ECHO $PCP_ECHO_N "Checking for pmmgr:" "$PCP_ECHO_C"
+ if [ -r /etc/rc.status ]
+ then
+ # SuSE
+ $RC_CHECKPROC $PMMGR
+ $RC_STATUS -v
+ status=$?
+ else
+ # not SuSE
+ $RC_CHECKPROC $PMMGR
+ status=$?
+ if [ $status -eq 0 ]
+ then
+ $ECHO running
+ else
+ $ECHO stopped
+ fi
+ fi
+ ;;
+
+ *)
+ _usage
+ ;;
+esac
+