summaryrefslogtreecommitdiff
path: root/src/pmdas/linux_proc
diff options
context:
space:
mode:
Diffstat (limited to 'src/pmdas/linux_proc')
-rw-r--r--src/pmdas/linux_proc/GNUmakefile89
-rwxr-xr-xsrc/pmdas/linux_proc/Install29
-rwxr-xr-xsrc/pmdas/linux_proc/Remove23
-rw-r--r--src/pmdas/linux_proc/cgroups.c1146
-rw-r--r--src/pmdas/linux_proc/cgroups.h74
-rw-r--r--src/pmdas/linux_proc/clusters.h48
-rw-r--r--src/pmdas/linux_proc/contexts.c238
-rw-r--r--src/pmdas/linux_proc/contexts.h57
-rw-r--r--src/pmdas/linux_proc/getinfo.c55
-rw-r--r--src/pmdas/linux_proc/getinfo.h16
-rw-r--r--src/pmdas/linux_proc/help220
-rw-r--r--src/pmdas/linux_proc/indom.h52
-rw-r--r--src/pmdas/linux_proc/ksym.c564
-rw-r--r--src/pmdas/linux_proc/ksym.h41
-rw-r--r--src/pmdas/linux_proc/linux_proc_migrate.conf55
-rw-r--r--src/pmdas/linux_proc/pmda.c1896
-rw-r--r--src/pmdas/linux_proc/proc_pid.c957
-rw-r--r--src/pmdas/linux_proc/proc_pid.h289
-rw-r--r--src/pmdas/linux_proc/proc_runq.c123
-rw-r--r--src/pmdas/linux_proc/proc_runq.h35
-rw-r--r--src/pmdas/linux_proc/root6
-rw-r--r--src/pmdas/linux_proc/root_proc181
22 files changed, 6194 insertions, 0 deletions
diff --git a/src/pmdas/linux_proc/GNUmakefile b/src/pmdas/linux_proc/GNUmakefile
new file mode 100644
index 0000000..97dc518
--- /dev/null
+++ b/src/pmdas/linux_proc/GNUmakefile
@@ -0,0 +1,89 @@
+#
+# Copyright (c) 2000,2003,2004,2008 Silicon Graphics, Inc. All Rights Reserved.
+# Copyright (c) 2007-2010 Aconex. All Rights Reserved.
+# Copyright (c) 2013-2014 Red Hat.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+
+TOPDIR = ../../..
+include $(TOPDIR)/src/include/builddefs
+
+IAM = proc
+DOMAIN = PROC
+CMDTARGET = pmdaproc
+LIBTARGET = pmda_proc.so
+PMDAINIT = proc_init
+PMDADIR = $(PCP_PMDAS_DIR)/$(IAM)
+CONF_LINE = "proc 3 pipe binary $(PMDADIR)/$(CMDTARGET) -d 3"
+
+CFILES = pmda.c \
+ cgroups.c proc_pid.c proc_runq.c ksym.c getinfo.c contexts.c
+
+HFILES = clusters.h indom.h \
+ cgroups.h proc_pid.h proc_runq.h ksym.h getinfo.h contexts.h
+
+SCRIPTS = Install Remove
+VERSION_SCRIPT = exports
+HELPTARGETS = help.dir help.pag
+LSRCFILES = help root root_proc linux_proc_migrate.conf $(SCRIPTS)
+LDIRT = $(HELPTARGETS) domain.h $(VERSION_SCRIPT)
+
+LLDLIBS = $(PCP_PMDALIB)
+LCFLAGS = $(INVISIBILITY)
+
+# Uncomment these flags for profiling
+# LCFLAGS += -pg
+# LLDFLAGS += -pg
+
+default: build-me
+
+include $(BUILDRULES)
+
+ifeq "$(TARGET_OS)" "linux"
+build-me: domain.h $(LIBTARGET) $(CMDTARGET) $(HELPTARGETS)
+ @if [ `grep -c $(CONF_LINE) ../pmcd.conf` -eq 0 ]; then \
+ echo $(CONF_LINE) >> ../pmcd.conf ; \
+ fi
+
+install: default
+ $(INSTALL) -m 755 -d $(PMDADIR)
+ $(INSTALL) -m 644 domain.h help help.dir help.pag root root_proc $(PMDADIR)
+ $(INSTALL) -m 755 $(LIBTARGET) $(CMDTARGET) $(SCRIPTS) $(PMDADIR)
+ $(INSTALL) -m 644 root_proc $(PCP_VAR_DIR)/pmns/root_proc
+ $(INSTALL) -m 644 linux_proc_migrate.conf $(PCP_VAR_DIR)/config/pmlogrewrite/linux_proc_migrate.conf
+else
+build-me:
+install:
+endif
+
+default_pcp : default
+
+install_pcp : install
+
+$(HELPTARGETS) : help
+ $(RUN_IN_BUILD_ENV) $(TOPDIR)/src/newhelp/newhelp -n root_proc -v 2 -o help < help
+
+$(VERSION_SCRIPT):
+ $(VERSION_SCRIPT_MAKERULE)
+
+domain.h: ../../pmns/stdpmid
+ $(DOMAIN_MAKERULE)
+
+cgroups.o pmda.o: clusters.h
+cgroups.o pmda.o: cgroups.h
+cgroups.o pmda.o proc_pid.o proc_runq.o: proc_pid.h
+pmda.o proc_runq.o: proc_runq.h
+indom.o pmda.o: indom.h
+ksym.o pmda.o: ksym.h
+pmda.o: domain.h
+pmda.o: getinfo.h
+pmda.o: $(VERSION_SCRIPT)
diff --git a/src/pmdas/linux_proc/Install b/src/pmdas/linux_proc/Install
new file mode 100755
index 0000000..74fa225
--- /dev/null
+++ b/src/pmdas/linux_proc/Install
@@ -0,0 +1,29 @@
+#!/bin/sh
+#
+# Copyright (c) 2013 Red Hat Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# Install the Linux per-process (proc) PMDA and/or PMNS
+#
+
+. $PCP_DIR/etc/pcp.env
+. $PCP_SHARE_DIR/lib/pmdaproc.sh
+
+iam=proc
+pmda_interface=6
+daemon_opt=true
+pipe_opt=true
+pmns_source=root_proc
+
+pmdaSetup
+pmdaInstall
+exit 0
diff --git a/src/pmdas/linux_proc/Remove b/src/pmdas/linux_proc/Remove
new file mode 100755
index 0000000..4befc73
--- /dev/null
+++ b/src/pmdas/linux_proc/Remove
@@ -0,0 +1,23 @@
+#!/bin/sh
+#
+# Copyright (c) 2013 Red Hat Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# Remove the Linux per-process (proc) PMDA
+#
+
+. $PCP_DIR/etc/pcp.env
+. $PCP_SHARE_DIR/lib/pmdaproc.sh
+iam=proc
+pmdaSetup
+pmdaRemove
+exit 0
diff --git a/src/pmdas/linux_proc/cgroups.c b/src/pmdas/linux_proc/cgroups.c
new file mode 100644
index 0000000..4994465
--- /dev/null
+++ b/src/pmdas/linux_proc/cgroups.c
@@ -0,0 +1,1146 @@
+/*
+ * Copyright (c) 2012-2014 Red Hat.
+ * Copyright (c) 2010 Aconex. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "pmapi.h"
+#include "impl.h"
+#include "pmda.h"
+#include "indom.h"
+#include "cgroups.h"
+#include "clusters.h"
+#include "proc_pid.h"
+#include <sys/stat.h>
+#include <ctype.h>
+
+#define CGROUP_ROOT "cgroup.groups" /* root dynamic PMNS node */
+
+/* Add namespace entries and prepare values for one cgroupfs directory entry */
+struct cgroup_subsys;
+typedef int (*cgroup_prepare_t)(__pmnsTree *, const char *,
+ struct cgroup_subsys *, const char *, int, int, int);
+static int prepare_ull(__pmnsTree *, const char *,
+ struct cgroup_subsys *, const char *, int, int, int);
+static int prepare_string(__pmnsTree *, const char *,
+ struct cgroup_subsys *, const char *, int, int, int);
+static int prepare_named_ull(__pmnsTree *, const char *,
+ struct cgroup_subsys *, const char *, int, int, int);
+static int prepare_block_ull(__pmnsTree *, const char *,
+ struct cgroup_subsys *, const char *, int, int, int);
+static int prepare_blocks_ull(__pmnsTree *, const char *,
+ struct cgroup_subsys *, const char *, int, int, int);
+
+/*
+ * Critical data structures for cgroup subsystem in pmdaproc ...
+ * Initial comment for each struct talks about lifecycle of that
+ * data, in terms of what pmdaproc must do with it (esp. memory
+ * allocation related).
+ */
+
+typedef struct { /* contents depends on individual kernel cgroups */
+ int item; /* PMID == domain:cluster:[id:item] */
+ int dynamic; /* do we need an extra free (string) */
+ cgroup_prepare_t prepare; /* setup metric name(s) and value(s) */
+ char *suffix; /* cpus/mems/rss/... */
+} cgroup_metrics_t;
+
+typedef struct { /* some metrics are multi-valued, but most have only one */
+ int item; /* PMID == domain:cluster:[id:item] */
+ int atom_count;
+ pmAtomValue *atoms;
+} cgroup_values_t;
+
+typedef struct { /* contains data for each group users have created, if any */
+ int id; /* PMID == domain:cluster:[id:item] */
+ int refreshed; /* boolean: are values all uptodate */
+ proc_pid_list_t process_list;
+ cgroup_values_t *metric_values;
+} cgroup_group_t;
+
+typedef struct cgroup_subsys { /* contents covers the known kernel cgroups */
+ const char *name; /* cpuset/memory/... */
+ int cluster; /* PMID == domain:cluster:[id:item] */
+ int group_count; /* number of groups (dynamic) */
+ int metric_count; /* number of metrics (fixed) */
+ time_t previous_time; /* used to avoid repeated refresh */
+ cgroup_group_t *groups; /* array of groups (dynamic) */
+ cgroup_metrics_t *metrics; /* array of metrics (fixed) */
+} cgroup_subsys_t;
+
+static cgroup_metrics_t cpusched_metrics[] = {
+ { .suffix = "shares", .prepare = prepare_ull },
+};
+
+static cgroup_metrics_t cpuacct_metrics[] = {
+ { .suffix = "stat.user", .prepare = prepare_named_ull },
+ { .suffix = "stat.system", .prepare = prepare_named_ull },
+ { .suffix = "usage", .prepare = prepare_ull },
+ { .suffix = "usage_percpu", .prepare = prepare_ull },
+};
+
+static cgroup_metrics_t cpuset_metrics[] = {
+ { .suffix = "io_merged", .prepare = prepare_string },
+ { .suffix = "sectors", .prepare = prepare_string },
+};
+
+static cgroup_metrics_t memory_metrics[] = {
+ { .suffix = "stat.cache", .prepare = prepare_named_ull },
+ { .suffix = "stat.rss", .prepare = prepare_named_ull },
+ { .suffix = "stat.rss_huge", .prepare = prepare_named_ull },
+ { .suffix = "stat.mapped_file", .prepare = prepare_named_ull },
+ { .suffix = "stat.writeback", .prepare = prepare_named_ull },
+ { .suffix = "stat.swap", .prepare = prepare_named_ull },
+ { .suffix = "stat.pgpgin", .prepare = prepare_named_ull },
+ { .suffix = "stat.pgpgout", .prepare = prepare_named_ull },
+ { .suffix = "stat.pgfault", .prepare = prepare_named_ull },
+ { .suffix = "stat.pgmajfault", .prepare = prepare_named_ull },
+ { .suffix = "stat.inactive_anon", .prepare = prepare_named_ull },
+ { .suffix = "stat.active_anon", .prepare = prepare_named_ull },
+ { .suffix = "stat.inactive_file", .prepare = prepare_named_ull },
+ { .suffix = "stat.active_file", .prepare = prepare_named_ull },
+ { .suffix = "stat.unevictable", .prepare = prepare_named_ull },
+ { .suffix = "stat.total_cache", .prepare = prepare_named_ull },
+ { .suffix = "stat.total_rss", .prepare = prepare_named_ull },
+ { .suffix = "stat.total_rss_huge", .prepare = prepare_named_ull },
+ { .suffix = "stat.total_mapped_file", .prepare = prepare_named_ull },
+ { .suffix = "stat.total_writeback", .prepare = prepare_named_ull },
+ { .suffix = "stat.total_swap", .prepare = prepare_named_ull },
+ { .suffix = "stat.total_pgpgin", .prepare = prepare_named_ull },
+ { .suffix = "stat.total_pgpgout", .prepare = prepare_named_ull },
+ { .suffix = "stat.total_pgfault", .prepare = prepare_named_ull },
+ { .suffix = "stat.total_pgmajfault", .prepare = prepare_named_ull },
+ { .suffix = "stat.total_inactive_anon", .prepare = prepare_named_ull },
+ { .suffix = "stat.total_active_anon", .prepare = prepare_named_ull },
+ { .suffix = "stat.total_inactive_file", .prepare = prepare_named_ull },
+ { .suffix = "stat.total_active_file", .prepare = prepare_named_ull },
+ { .suffix = "stat.total_unevictable", .prepare = prepare_named_ull },
+ { .suffix = "stat.recent_rotated_anon", .prepare = prepare_named_ull },
+ { .suffix = "stat.recent_rotated_file", .prepare = prepare_named_ull },
+ { .suffix = "stat.recent_scanned_anon", .prepare = prepare_named_ull },
+ { .suffix = "stat.recent_scanned_file", .prepare = prepare_named_ull },
+};
+
+static cgroup_metrics_t netclass_metrics[] = {
+ { .suffix = "classid", .prepare = prepare_ull },
+};
+
+static cgroup_metrics_t blkio_metrics[] = {
+ { .suffix = "io_merged.read", .prepare = prepare_blocks_ull },
+ { .suffix = "io_merged.write", .prepare = prepare_blocks_ull },
+ { .suffix = "io_merged.sync", .prepare = prepare_blocks_ull },
+ { .suffix = "io_merged.async", .prepare = prepare_blocks_ull },
+ { .suffix = "io_merged.total", .prepare = prepare_blocks_ull },
+ { .suffix = "io_queued.read", .prepare = prepare_blocks_ull },
+ { .suffix = "io_queued.write", .prepare = prepare_blocks_ull },
+ { .suffix = "io_queued.sync", .prepare = prepare_blocks_ull },
+ { .suffix = "io_queued.async", .prepare = prepare_blocks_ull },
+ { .suffix = "io_queued.total", .prepare = prepare_blocks_ull },
+ { .suffix = "io_service_bytes.read", .prepare = prepare_blocks_ull },
+ { .suffix = "io_service_bytes.write", .prepare = prepare_blocks_ull },
+ { .suffix = "io_service_bytes.sync", .prepare = prepare_blocks_ull },
+ { .suffix = "io_service_bytes.async", .prepare = prepare_blocks_ull },
+ { .suffix = "io_service_bytes.total", .prepare = prepare_blocks_ull },
+ { .suffix = "io_serviced.read", .prepare = prepare_blocks_ull },
+ { .suffix = "io_serviced.write", .prepare = prepare_blocks_ull },
+ { .suffix = "io_serviced.sync", .prepare = prepare_blocks_ull },
+ { .suffix = "io_serviced.async", .prepare = prepare_blocks_ull },
+ { .suffix = "io_serviced.total", .prepare = prepare_blocks_ull },
+ { .suffix = "io_service_time.read", .prepare = prepare_blocks_ull },
+ { .suffix = "io_service_time.write", .prepare = prepare_blocks_ull },
+ { .suffix = "io_service_time.sync", .prepare = prepare_blocks_ull },
+ { .suffix = "io_service_time.async", .prepare = prepare_blocks_ull },
+ { .suffix = "io_service_time.total", .prepare = prepare_blocks_ull },
+ { .suffix = "io_wait_time.read", .prepare = prepare_blocks_ull },
+ { .suffix = "io_wait_time.write", .prepare = prepare_blocks_ull },
+ { .suffix = "io_wait_time.sync", .prepare = prepare_blocks_ull },
+ { .suffix = "io_wait_time.async", .prepare = prepare_blocks_ull },
+ { .suffix = "io_wait_time.total", .prepare = prepare_blocks_ull },
+ { .suffix = "sectors", .prepare = prepare_block_ull },
+ { .suffix = "time", .prepare = prepare_block_ull },
+};
+
+static const char *block_stats_names[] = \
+ { "read", "write", "sync", "async", "total" };
+#define BLKIOS (sizeof(block_stats_names)/sizeof(block_stats_names[0]))
+
+static cgroup_subsys_t controllers[] = {
+ { .name = "cpu",
+ .cluster = CLUSTER_CPUSCHED_GROUPS,
+ .metrics = cpusched_metrics,
+ .metric_count = sizeof(cpusched_metrics) / sizeof(cgroup_metrics_t),
+ },
+ { .name = "cpuset",
+ .cluster = CLUSTER_CPUSET_GROUPS,
+ .metrics = cpuset_metrics,
+ .metric_count = sizeof(cpuset_metrics) / sizeof(cgroup_metrics_t),
+ },
+ { .name = "cpuacct",
+ .cluster = CLUSTER_CPUACCT_GROUPS,
+ .metrics = cpuacct_metrics,
+ .metric_count = sizeof(cpuacct_metrics) / sizeof(cgroup_metrics_t),
+ },
+ { .name = "memory",
+ .cluster = CLUSTER_MEMORY_GROUPS,
+ .metrics = memory_metrics,
+ .metric_count = sizeof(memory_metrics) / sizeof(cgroup_metrics_t),
+ },
+ { .name = "net_cls",
+ .cluster = CLUSTER_NET_CLS_GROUPS,
+ .metrics = netclass_metrics,
+ .metric_count = sizeof(netclass_metrics) / sizeof(cgroup_metrics_t),
+ },
+ { .name = "blkio",
+ .cluster = CLUSTER_BLKIO_GROUPS,
+ .metrics = blkio_metrics,
+ .metric_count = sizeof(blkio_metrics) / sizeof(cgroup_metrics_t),
+ },
+};
+
+/*
+ * Data structures used by individual cgroup subsystem controllers
+ */
+typedef struct {
+ __uint32_t major;
+ __uint32_t minor;
+ int inst;
+ char *name;
+} device_t;
+
+typedef struct {
+ device_t dev;
+ __uint64_t values[BLKIOS]; /* read, write, sync, async, total */
+} block_stats_t;
+
+typedef struct filesys {
+ int id;
+ char *device;
+ char *path;
+ char *options;
+} filesys_t;
+
+void
+refresh_cgroup_cpus(pmInDom indom)
+{
+ char buf[MAXPATHLEN];
+ char *space;
+ FILE *fp;
+
+ pmdaCacheOp(indom, PMDA_CACHE_INACTIVE);
+ if ((fp = proc_statsfile("/proc/stat", buf, sizeof(buf))) == NULL)
+ return;
+ while (fgets(buf, sizeof(buf), fp) != NULL) {
+ if (strncmp(buf, "cpu", 3) == 0 && isdigit((int)buf[3])) {
+ if ((space = strchr(buf, ' ')) != NULL) {
+ *space = '\0';
+ pmdaCacheStore(indom, PMDA_CACHE_ADD, buf, NULL);
+ }
+ }
+ }
+ fclose(fp);
+}
+
+static int
+_pm_isloop(char *dname)
+{
+ return strncmp(dname, "loop", 4) == 0;
+}
+
+static int
+_pm_isramdisk(char *dname)
+{
+ return strncmp(dname, "ram", 3) == 0;
+}
+
+/*
+ * For block devices we have one instance domain for dev_t
+ * based lookup, and another for (real) name lookup.
+ * The reason we need this is that the blkio cgroup stats
+ * are exported using the major:minor numbers, and not the
+ * device names - we must perform that mapping ourselves.
+ * In some places (value refresh) we need to lookup the blk
+ * name from device major/minor, in other places (instances
+ * refresh) we need the usual external instid:name lookup.
+ */
+void
+refresh_cgroup_devices(pmInDom diskindom)
+{
+ pmInDom devtindom = INDOM(DEVT_INDOM);
+ char buf[MAXPATHLEN];
+ static time_t before;
+ time_t now;
+ FILE *fp;
+
+ if ((now = time(NULL)) == before)
+ return;
+ before = now;
+
+ pmdaCacheOp(devtindom, PMDA_CACHE_INACTIVE);
+ pmdaCacheOp(diskindom, PMDA_CACHE_INACTIVE);
+
+ if ((fp = proc_statsfile("/proc/diskstats", buf, sizeof(buf))) == NULL)
+ return;
+
+ while (fgets(buf, sizeof(buf), fp) != NULL) {
+ unsigned int major, minor, unused;
+ device_t *dev = NULL;
+ char namebuf[1024];
+ int inst;
+
+ if (sscanf(buf, "%u %u %s %u", &major, &minor, namebuf, &unused) != 4)
+ continue;
+ if (_pm_isloop(namebuf) || _pm_isramdisk(namebuf))
+ continue;
+ if (pmdaCacheLookupName(diskindom, namebuf, &inst, (void **)&dev) < 0 ||
+ dev == NULL) {
+ if (!(dev = (device_t *)malloc(sizeof(device_t)))) {
+ __pmNoMem("device", sizeof(device_t), PM_RECOV_ERR);
+ continue;
+ }
+ dev->major = major;
+ dev->minor = minor;
+ }
+ /* keeping track of all fields (major/minor/inst/name) */
+ pmdaCacheStore(diskindom, PMDA_CACHE_ADD, namebuf, dev);
+ pmdaCacheLookupName(diskindom, namebuf, &dev->inst, NULL);
+ pmdaCacheLookup(diskindom, dev->inst, &dev->name, NULL);
+
+ snprintf(buf, sizeof(buf), "%u:%u", major, minor);
+ pmdaCacheStore(devtindom, PMDA_CACHE_ADD, buf, (void *)dev);
+
+ if (pmDebug & DBG_TRACE_APPL0)
+ fprintf(stderr, "refresh_devices: \"%s\" \"%d:%d\" inst=%d\n",
+ dev->name, dev->major, dev->minor, dev->inst);
+ }
+ fclose(fp);
+}
+
+void
+refresh_cgroup_subsys(pmInDom indom)
+{
+ char buf[4096];
+ static time_t before;
+ time_t now;
+ FILE *fp;
+
+ if ((now = time(NULL)) == before)
+ return;
+ before = now;
+
+ if ((fp = proc_statsfile("/proc/cgroups", buf, sizeof(buf))) == NULL)
+ return;
+
+ while (fgets(buf, sizeof(buf), fp) != NULL) {
+ unsigned int numcgroups, enabled;
+ char name[MAXPATHLEN];
+ long hierarchy;
+ long *data;
+ int sts;
+
+ /* skip lines starting with hash (header) */
+ if (buf[0] == '#')
+ continue;
+ if (sscanf(buf, "%s %ld %u %u", &name[0],
+ &hierarchy, &numcgroups, &enabled) != 4)
+ continue;
+ sts = pmdaCacheLookupName(indom, name, NULL, (void **)&data);
+ if (sts == PMDA_CACHE_ACTIVE) {
+ if (*data != hierarchy) {
+ /*
+ * odd ... instance name repeated but different
+ * hierarchy ... we cannot support more than one hierarchy
+ * yet
+ */
+ fprintf(stderr, "refresh_cgroup_subsys: \"%s\": entries for hierarchy %ld ignored (hierarchy %ld seen first)\n", name, hierarchy, *data);
+ }
+ continue;
+ }
+ else if (sts != PMDA_CACHE_INACTIVE) {
+ if ((data = (long *)malloc(sizeof(long))) == NULL) {
+#if PCP_DEBUG
+ if (pmDebug & DBG_TRACE_APPL0)
+ fprintf(stderr, "refresh_cgroup_subsys: \"%s\": malloc failed\n", name);
+#endif
+ continue;
+ }
+ *data = hierarchy;
+ }
+ pmdaCacheStore(indom, PMDA_CACHE_ADD, name, (void *)data);
+#if PCP_DEBUG
+ if (pmDebug & DBG_TRACE_APPL0)
+ fprintf(stderr, "refresh_cgroup_subsys: add \"%s\" [hierarchy %ld]\n", name, hierarchy);
+#endif
+ }
+ fclose(fp);
+}
+
+void
+refresh_cgroup_filesys(pmInDom indom)
+{
+ char buf[MAXPATHLEN];
+ filesys_t *fs;
+ FILE *fp;
+ time_t now;
+ static time_t before;
+ char *path, *device, *type, *options;
+ int sts;
+
+ if ((now = time(NULL)) == before)
+ return;
+ before = now;
+
+ pmdaCacheOp(indom, PMDA_CACHE_INACTIVE);
+
+ if ((fp = proc_statsfile("/proc/mounts", buf, sizeof(buf))) == NULL)
+ return;
+
+ while (fgets(buf, sizeof(buf), fp) != NULL) {
+ device = strtok(buf, " ");
+ path = strtok(NULL, " ");
+ type = strtok(NULL, " ");
+ options = strtok(NULL, " ");
+ if (strcmp(type, "cgroup") != 0)
+ continue;
+
+ sts = pmdaCacheLookupName(indom, path, NULL, (void **)&fs);
+ if (sts == PMDA_CACHE_ACTIVE) /* repeated line in /proc/mounts? */
+ continue;
+ if (sts == PMDA_CACHE_INACTIVE) { /* re-activate an old mount */
+ pmdaCacheStore(indom, PMDA_CACHE_ADD, path, fs);
+ if (strcmp(path, fs->path) != 0) { /* old device, new path */
+ free(fs->path);
+ fs->path = strdup(path);
+ }
+ if (strcmp(options, fs->options) != 0) { /* old device, new opts */
+ free(fs->options);
+ fs->options = strdup(options);
+ }
+ }
+ else { /* new mount */
+ if ((fs = malloc(sizeof(filesys_t))) == NULL)
+ continue;
+ fs->path = strdup(path);
+ fs->options = strdup(options);
+ if (pmDebug & DBG_TRACE_APPL0)
+ fprintf(stderr, "refresh_filesys: add \"%s\" \"%s\"\n",
+ fs->path, device);
+ pmdaCacheStore(indom, PMDA_CACHE_ADD, path, fs);
+ }
+ }
+ fclose(fp);
+}
+
+static char *
+scan_filesys_options(const char *options, const char *option)
+{
+ static char buffer[128];
+ char *s;
+
+ strncpy(buffer, options, sizeof(buffer));
+ buffer[sizeof(buffer)-1] = '\0';
+
+ s = strtok(buffer, ",");
+ while (s) {
+ if (strcmp(s, option) == 0)
+ return s;
+ s = strtok(NULL, ",");
+ }
+ return NULL;
+}
+
+static int
+read_values(char *buffer, int size, const char *path, const char *subsys,
+ const char *metric)
+{
+ int fd, count;
+
+ snprintf(buffer, size, "%s/%s.%s", path, subsys, metric);
+ if ((fd = open(buffer, O_RDONLY)) < 0)
+ return -oserror();
+ count = read(fd, buffer, size);
+ close(fd);
+ if (count < 0)
+ return -oserror();
+ buffer[count-1] = '\0';
+ return 0;
+}
+
+static pmID
+update_pmns(__pmnsTree *pmns, cgroup_subsys_t *subsys, const char *name,
+ cgroup_metrics_t *metrics, int group, int domain)
+{
+ char entry[MAXPATHLEN];
+ pmID pmid;
+
+ snprintf(entry, sizeof(entry), "%s.%s%s.%s",
+ CGROUP_ROOT, subsys->name, name, metrics->suffix);
+ pmid = cgroup_pmid_build(domain, subsys->cluster, group, metrics->item);
+ __pmAddPMNSNode(pmns, pmid, entry);
+ return pmid;
+}
+
+static int
+prepare_ull(__pmnsTree *pmns, const char *path, cgroup_subsys_t *subsys,
+ const char *name, int metric, int group, int domain)
+{
+ int count = 0;
+ unsigned long long value;
+ char buffer[MAXPATHLEN];
+ char *endp, *p = &buffer[0];
+ cgroup_group_t *groups = &subsys->groups[group];
+ cgroup_metrics_t *metrics = &subsys->metrics[metric];
+ pmAtomValue *atoms = groups->metric_values[metric].atoms;
+
+ if (read_values(p, sizeof(buffer), path, subsys->name, metrics->suffix) < 0)
+ return -oserror();
+
+ while (p && *p) {
+ value = strtoull(p, &endp, 0);
+ if ((atoms = realloc(atoms, (count + 1) * sizeof(pmAtomValue))) == NULL)
+ return -oserror();
+ atoms[count++].ull = value;
+ if (endp == '\0' || endp == p)
+ break;
+ p = endp;
+ while (p && isspace((int)*p))
+ p++;
+ }
+
+ groups->metric_values[metric].item = metric;
+ groups->metric_values[metric].atoms = atoms;
+ groups->metric_values[metric].atom_count = count;
+ update_pmns(pmns, subsys, name, metrics, group, domain);
+ return 0;
+}
+
+static int
+prepare_named_ull(__pmnsTree *pmns, const char *path, cgroup_subsys_t *subsys,
+ const char *name, int metric, int group, int domain)
+{
+ int i, count;
+ unsigned long long value;
+ char filename[64], buffer[MAXPATHLEN];
+ char *offset, *p = &buffer[0];
+ cgroup_group_t *groups = &subsys->groups[group];
+ cgroup_metrics_t *metrics = &subsys->metrics[metric];
+
+ /* metric => e.g. stat.user and stat.system - split it up first */
+ offset = index(metrics->suffix, '.');
+ if (!offset)
+ return PM_ERR_CONV;
+ count = (offset - metrics->suffix);
+ strncpy(filename, metrics->suffix, count);
+ filename[count] = '\0';
+
+ if (read_values(p, sizeof(buffer), path, subsys->name, filename) < 0)
+ return -oserror();
+
+ /* buffer contains <name> <value> pairs */
+ while (p && *p) {
+ char *endp, *field, *offset;
+
+ if ((field = index(p, ' ')) == NULL)
+ return PM_ERR_CONV;
+ offset = field + 1;
+ *field = '\0';
+ field = p; /* field now points to <name> */
+ p = offset;
+ value = strtoull(p, &endp, 0);
+ p = endp;
+ while (p && isspace((int)*p))
+ p++;
+
+ for (i = 0; i < subsys->metric_count; i++) {
+ pmAtomValue *atoms = groups->metric_values[i].atoms;
+ metrics = &subsys->metrics[i];
+
+ if (strcmp(field, metrics->suffix + count + 1) != 0)
+ continue;
+ if ((atoms = groups->metric_values[i].atoms) == NULL)
+ if ((atoms = calloc(1, sizeof(pmAtomValue))) == NULL)
+ return -oserror();
+ atoms[0].ull = value;
+
+ groups->metric_values[i].item = i;
+ groups->metric_values[i].atoms = atoms;
+ groups->metric_values[i].atom_count = 1;
+ update_pmns(pmns, subsys, name, metrics, group, domain);
+ break;
+ }
+ }
+ return 0;
+}
+
+static int
+prepare_block(__pmnsTree *pmns, const char *path, cgroup_subsys_t *subsys,
+ const char *name, int metric, int group, int domain,
+ block_stats_t *stats, int value_count)
+{
+ pmID pmid;
+ char *iname;
+ char buf[MAXPATHLEN];
+ device_t *dev;
+ pmAtomValue *atoms;
+ int count, size, inst, sts, m, i, j;
+ pmInDom devtindom = INDOM(DEVT_INDOM);
+ cgroup_group_t *groups = &subsys->groups[group];
+ cgroup_metrics_t *metrics = &subsys->metrics[metric];
+
+ /* map major:minor to real device name via diskstats */
+ dev = &stats->dev;
+ snprintf(buf, sizeof(buf), "%u:%u", dev->major, dev->minor);
+
+ sts = pmdaCacheLookupName(devtindom, buf, NULL, (void **)&dev);
+ iname = dev->name;
+ inst = dev->inst;
+
+ if (pmDebug & DBG_TRACE_APPL0)
+ fprintf(stderr, "prepare_block: preparing %s found=%s (%s)\n",
+ buf, sts == PMDA_CACHE_ACTIVE ? "ok" : "no", iname);
+
+ /* batch update metric value(s) now, since we have 'em all */
+ for (j = 0; j < value_count; j++) {
+ m = metric + j;
+ atoms = groups->metric_values[m].atoms;
+ count = groups->metric_values[m].atom_count;
+
+ if (inst >= count) {
+ size = (inst + 1) * sizeof(pmAtomValue);
+ if ((atoms = realloc(atoms, size)) == NULL)
+ return -oserror();
+ for (i = count; i < inst + 1; i++)
+ atoms[i].ull = ULLONG_MAX;
+ count = inst + 1;
+ }
+ /* move on-stack value into global struct, add to PMNS */
+ atoms[inst].ull = stats->values[j];
+ pmid = update_pmns(pmns, subsys, name, metrics + j, group, domain);
+
+ if (pmDebug & DBG_TRACE_APPL0)
+ fprintf(stderr, "prepare_block: prepared "
+ "metric=%s inst=%s[%d] value=%llu\n",
+ pmIDStr(pmid), iname, inst,
+ (unsigned long long)atoms[inst].ull);
+
+ groups->metric_values[m].item = m;
+ groups->metric_values[m].atoms = atoms;
+ groups->metric_values[m].atom_count = count;
+ }
+ return 0;
+}
+
+static int
+prepare_block_ull(__pmnsTree *pmns, const char *path, cgroup_subsys_t *subsys,
+ const char *name, int metric, int group, int domain)
+{
+ char buf[MAXPATHLEN];
+ cgroup_metrics_t *metrics = &subsys->metrics[metric];
+ block_stats_t stats;
+ FILE *fp;
+ char *p;
+
+ if (pmDebug & DBG_TRACE_APPL0)
+ fprintf(stderr, "prepare_block_ull: %s metric=%d group=%d domain=%d\n",
+ path, metric, group, domain);
+
+ snprintf(buf, sizeof(buf), "%s/%s.%s", path, subsys->name, metrics->suffix);
+ if ((fp = fopen(buf, "r")) == NULL)
+ return -oserror();
+
+ memset(&stats, 0, sizeof(stats));
+ while ((fgets(buf, sizeof(buf), fp)) != NULL) {
+ if (sscanf(buf, "%u:%u ", &stats.dev.major, &stats.dev.minor) != 2)
+ continue;
+ for (p = buf; *p && !isspace(*p); p++) { } /* skip device number */
+ for (p = buf; *p && isspace(*p); p++) { } /* skip over spaces */
+ if (sscanf(p, "%llu", (unsigned long long *)&stats.values[0]) != 1)
+ stats.values[0] = 0;
+ prepare_block(pmns, path, subsys, name,
+ metric, group, domain, &stats, 1);
+ }
+ fclose(fp);
+ return 0;
+}
+
+static int
+prepare_blocks_ull(__pmnsTree *pmns, const char *path, cgroup_subsys_t *subsys,
+ const char *name, int metric, int group, int domain)
+{
+ char buf[MAXPATHLEN];
+ cgroup_metrics_t *metrics = &subsys->metrics[metric];
+ block_stats_t stats;
+ FILE *fp;
+ char *p;
+ int j;
+
+ if (pmDebug & DBG_TRACE_APPL0)
+ fprintf(stderr, "prepare_blocks_ull: %s metric=%d group=%d domain=%d\n",
+ path, metric, group, domain);
+
+ if (metric % BLKIOS != 0)
+ return 0;
+
+ snprintf(buf, sizeof(buf), "%s/%s.%s", path, subsys->name, metrics->suffix);
+ buf[strlen(buf) - sizeof("read")] = '\0';
+
+ if (pmDebug & DBG_TRACE_APPL2)
+ fprintf(stderr, "prepare_blocks_ull: opening \"%s\"\n", buf);
+
+ if ((fp = fopen(buf, "r")) == NULL)
+ return -oserror();
+
+ memset(&stats, 0, sizeof(stats));
+ while ((fgets(buf, sizeof(buf), fp)) != NULL) {
+ if (sscanf(buf, "%u:%u ", &stats.dev.major, &stats.dev.minor) != 2)
+ continue;
+
+ /* iterate over read/write/sync/async/total (reverse for async) */
+ for (j = BLKIOS-1; j >= 0; j--) {
+ if ((p = strcasestr(buf, block_stats_names[j])) == NULL)
+ continue;
+ p += strlen(block_stats_names[j]) + 1;
+ if (sscanf(p, "%llu", (unsigned long long *)&stats.values[j]) != 1)
+ stats.values[j] = 0;
+ break;
+ }
+
+ if (j == BLKIOS - 1) { /* Total: last one, update incore structures */
+ prepare_block(pmns, path, subsys, name,
+ metric, group, domain, &stats, BLKIOS);
+ /* reset on-stack structure for next outer loop iteration */
+ memset(&stats, 0, sizeof(stats));
+ }
+ }
+ fclose(fp);
+ return 0;
+}
+
+static int
+prepare_string(__pmnsTree *pmns, const char *path, cgroup_subsys_t *subsys,
+ const char *name, int metric, int group, int domain)
+{
+ char buffer[MAXPATHLEN];
+ cgroup_group_t *groups = &subsys->groups[group];
+ cgroup_metrics_t *metrics = &subsys->metrics[metric];
+ pmAtomValue *atoms = groups->metric_values[metric].atoms;
+ char *p = &buffer[0];
+
+ if (read_values(p, sizeof(buffer), path, subsys->name, metrics->suffix) < 0)
+ return -oserror();
+
+ if ((atoms = malloc(sizeof(pmAtomValue))) == NULL)
+ return -oserror();
+ if ((atoms[0].cp = strdup(buffer)) == NULL) {
+ free(atoms);
+ return -oserror();
+ }
+ groups->metric_values[metric].item = metric;
+ groups->metric_values[metric].atoms = atoms;
+ groups->metric_values[metric].atom_count = 1;
+ update_pmns(pmns, subsys, name, metrics, group, domain);
+ return 0;
+}
+
+static void
+translate(char *dest, const char *src, size_t size)
+{
+ char *p;
+
+ if (*src != '\0') /* non-root */
+ *dest = '.';
+ strncpy(dest, src, size);
+ for (p = dest; *p; p++) {
+ if (*p == '/')
+ *p = '.';
+ }
+}
+
+static int
+namespace(__pmnsTree *pmns, cgroup_subsys_t *subsys,
+ const char *cgrouppath, const char *cgroupname, int domain)
+{
+ int i, id;
+ size_t size;
+ cgroup_values_t *cvp;
+ char group[128];
+
+ translate(&group[0], cgroupname, sizeof(group));
+
+ /* allocate space for this group */
+ size = (subsys->group_count + 1) * sizeof(cgroup_group_t);
+ subsys->groups = (cgroup_group_t *)realloc(subsys->groups, size);
+ if (subsys->groups == NULL)
+ return -oserror();
+
+ /* allocate space for all values up-front */
+ size = subsys->metric_count;
+ cvp = (cgroup_values_t *)calloc(size, sizeof(cgroup_values_t));
+ if (cvp == NULL)
+ return -oserror();
+
+ id = subsys->group_count++;
+ memset(&subsys->groups[id], 0, sizeof(cgroup_group_t));
+ subsys->groups[id].id = id;
+ subsys->groups[id].metric_values = cvp;
+
+ for (i = 0; i < size; i++) {
+ cgroup_metrics_t *metrics = &subsys->metrics[i];
+ metrics->prepare(pmns, cgrouppath, subsys, group, i, id, domain);
+ }
+ return 1;
+}
+
+char *
+cgroup_find_subsys(pmInDom indom, void *data)
+{
+ static char dunno[] = "?";
+ static char opts[256];
+ char buffer[256];
+ char *s, *out = NULL;
+ filesys_t *fs = (filesys_t *)data;
+
+ memset(opts, 0, sizeof(opts));
+ strncpy(buffer, fs->options, sizeof(buffer));
+
+ s = strtok(buffer, ",");
+ while (s) {
+ if (pmdaCacheLookupName(indom, s, NULL, NULL) == PMDA_CACHE_ACTIVE) {
+ if (out) { /* append option */
+ strcat(out, ",");
+ strcat(out, s);
+ out += strlen(s) + 1; /* +1 => cater for comma */
+ } else { /* first option */
+ strcat(opts, s);
+ out = opts + strlen(s);
+ }
+ }
+ s = strtok(NULL, ",");
+ }
+ if (out)
+ return opts;
+ return dunno;
+}
+
+/* Ensure cgroup name can be used as a PCP namespace entry, ignore it if not */
+static int
+valid_pmns_name(char *name)
+{
+ if (!isalpha((int)name[0]))
+ return 0;
+ for (; *name != '\0'; name++)
+ if (!isalnum((int)*name) && *name != '_')
+ return 0;
+ return 1;
+}
+
+static int
+cgroup_scan(const char *mnt, const char *path, cgroup_subsys_t *subsys,
+ int domain, __pmnsTree *pmns, int root)
+{
+ int sts, length;
+ DIR *dirp;
+ struct stat sbuf;
+ struct dirent *dp;
+ char *cgroupname;
+ char cgrouppath[MAXPATHLEN];
+
+ if (root) {
+ snprintf(cgrouppath, sizeof(cgrouppath), "%s%s", proc_statspath, mnt);
+ length = strlen(cgrouppath);
+ } else {
+ snprintf(cgrouppath, sizeof(cgrouppath), "%s%s/%s", proc_statspath, mnt, path);
+ length = strlen(proc_statspath) + strlen(mnt) + 1;
+ }
+
+ if ((dirp = opendir(cgrouppath)) == NULL)
+ return -oserror();
+
+ cgroupname = &cgrouppath[length];
+ sts = namespace(pmns, subsys, cgrouppath, cgroupname, domain);
+
+ /*
+ * readdir - descend into directories to find all cgroups, then
+ * populate namespace with <controller>[.<groupname>].<metrics>
+ */
+ while ((dp = readdir(dirp)) != NULL) {
+ int lsts;
+ if (!valid_pmns_name(dp->d_name))
+ continue;
+ if (path[0] == '\0')
+ snprintf(cgrouppath, sizeof(cgrouppath), "%s%s/%s",
+ proc_statspath, mnt, dp->d_name);
+ else
+ snprintf(cgrouppath, sizeof(cgrouppath), "%s%s/%s/%s",
+ proc_statspath, mnt, path, dp->d_name);
+ cgroupname = &cgrouppath[length];
+ if (stat(cgrouppath, &sbuf) < 0)
+ continue;
+ if (!(S_ISDIR(sbuf.st_mode)))
+ continue;
+
+ lsts = namespace(pmns, subsys, cgrouppath, cgroupname, domain);
+ if (lsts > 0)
+ sts = 1;
+
+ /*
+ * also scan for any child cgroups, but cgroup_scan() may return
+ * an error
+ */
+ lsts = cgroup_scan(mnt, cgroupname, subsys, domain, pmns, 0);
+ if (lsts > 0)
+ sts = 1;
+ }
+ closedir(dirp);
+ return sts;
+}
+
+static void
+reset_subsys_stats(cgroup_subsys_t *subsys)
+{
+ int g, k, a;
+
+ for (g = 0; g < subsys->group_count; g++) {
+ cgroup_group_t *group = &subsys->groups[g];
+ for (k = 0; k < subsys->metric_count; k++) {
+ pmAtomValue *atoms = group->metric_values[k].atoms;
+ if (subsys->metrics[k].dynamic)
+ for (a = 0; a < group->metric_values[k].atom_count; a++)
+ free(atoms[a].cp);
+ free(atoms);
+ }
+ free(group->metric_values);
+ if (group->process_list.size)
+ free(group->process_list.pids);
+ memset(group, 0, sizeof(cgroup_group_t));
+ }
+ subsys->group_count = 0;
+}
+
+int
+refresh_cgroups(pmdaExt *pmda, __pmnsTree **pmns)
+{
+ int i, sts, mtab = 0;
+ int domain = pmda->e_domain;
+ filesys_t *fs;
+ time_t now;
+ static time_t before;
+ static __pmnsTree *beforetree;
+ __pmnsTree *tree = pmns ? *pmns : NULL;
+ pmInDom mounts = INDOM(CGROUP_MOUNTS_INDOM);
+ pmInDom devices = INDOM(DISK_INDOM);
+
+ now = time(NULL);
+ if (tree) {
+ if (now == before) {
+ *pmns = beforetree;
+ return 0;
+ }
+ } else if (now == before)
+ return 0;
+
+ refresh_cgroup_filesys(mounts);
+ refresh_cgroup_devices(devices);
+
+ if (tree)
+ __pmFreePMNS(tree);
+
+ if ((sts = __pmNewPMNS(&tree)) < 0) {
+ __pmNotifyErr(LOG_ERR, "%s: failed to create new pmns: %s\n",
+ pmProgname, pmErrStr(sts));
+ return 0;
+ }
+
+ for (i = 0; i < sizeof(controllers)/sizeof(controllers[0]); i++) {
+ cgroup_subsys_t *subsys = &controllers[i];
+
+ /*
+ * Fetch latest state for subsystem and groups of the given clusters,
+ * by walking the cgroup mounts, finding the mounts of this subsystem
+ * type, and descending into all of the groups (subdirs)
+ */
+ reset_subsys_stats(subsys);
+
+ pmdaCacheOp(mounts, PMDA_CACHE_WALK_REWIND);
+ while ((sts = pmdaCacheOp(mounts, PMDA_CACHE_WALK_NEXT)) != -1) {
+ if (!pmdaCacheLookup(mounts, sts, NULL, (void **)&fs))
+ continue;
+ if (scan_filesys_options(fs->options, subsys->name) == NULL)
+ continue;
+ sts = cgroup_scan(fs->path, "", subsys, domain, tree, 1);
+ if (sts > 0)
+ mtab = 1;
+ }
+ }
+
+ if (pmns) {
+ *pmns = tree;
+ beforetree = tree;
+ before = now;
+ } else
+ __pmFreePMNS(tree);
+
+ return mtab;
+}
+
+/*
+ * Shared fetch callback for all cgroups metrics
+ */
+int
+cgroup_group_fetch(pmID pmid, unsigned int inst, pmAtomValue *atom)
+{
+ int i, j, k;
+ int gid, cluster, metric;
+
+ gid = cgroup_pmid_group(pmid);
+ metric = cgroup_pmid_metric(pmid);
+ cluster = proc_pmid_cluster(pmid);
+
+ for (i = 0; i < sizeof(controllers)/sizeof(controllers[0]); i++) {
+ cgroup_subsys_t *subsys = &controllers[i];
+
+ if (subsys->cluster != cluster)
+ continue;
+ for (j = 0; j < subsys->group_count; j++) {
+ cgroup_group_t *group = &subsys->groups[j];
+
+ if (group->id != gid)
+ continue;
+ for (k = 0; k < subsys->metric_count; k++) {
+ cgroup_values_t *cvp = &group->metric_values[k];
+
+ if (cvp->item != metric)
+ continue;
+ else if (cvp->atom_count <= 0)
+ return PM_ERR_VALUE;
+ else if (inst == PM_IN_NULL)
+ inst = 0;
+ else if (inst >= cvp->atom_count)
+ return PM_ERR_INST;
+ else if (cvp->atoms[inst].ull == ULLONG_MAX)
+ return PM_ERR_INST;
+ *atom = cvp->atoms[inst];
+ return 1;
+ }
+ }
+ }
+ return PM_ERR_PMID;
+}
+
+/*
+ * Needs to answer the question: how much extra space needs to be allocated
+ * in the metric table for (dynamic) cgroup metrics"? We have static entries
+ * for group ID zero - if we have any non-zero group IDs, we need entries to
+ * cover those. Return value is the number of additional entries needed.
+ */
+static void
+size_metrictable(int *total, int *trees)
+{
+ int i, g, maxgroup = 0, nmetrics = 0;
+
+ for (i = 0; i < sizeof(controllers)/sizeof(controllers[0]); i++) {
+ cgroup_subsys_t *subsys = &controllers[i];
+
+ for (g = 0; g < subsys->group_count; g++) {
+ cgroup_group_t *group = &subsys->groups[g];
+
+ if (group->id > maxgroup)
+ maxgroup = group->id;
+ }
+ nmetrics += subsys->metric_count + 0; /* +1 for task.pid */
+ }
+
+ if (pmDebug & DBG_TRACE_APPL0)
+ fprintf(stderr, "size_metrictable: %d total x %d trees\n",
+ nmetrics, maxgroup);
+
+ *total = nmetrics;
+ *trees = maxgroup;
+}
+
+/*
+ * Create new metric table entry for a group based on an existing one.
+ */
+static void
+refresh_metrictable(pmdaMetric *source, pmdaMetric *dest, int gid)
+{
+ int domain = pmid_domain(source->m_desc.pmid);
+ int cluster = proc_pmid_cluster(source->m_desc.pmid);
+ int item = pmid_item(source->m_desc.pmid);
+
+ memcpy(dest, source, sizeof(pmdaMetric));
+ dest->m_desc.pmid = cgroup_pmid_build(domain, cluster, gid, item);
+
+ if (pmDebug & DBG_TRACE_APPL1)
+ fprintf(stderr, "refresh_metrictable: (%p -> %p)\n", source, dest);
+ if (pmDebug & DBG_TRACE_APPL0)
+ fprintf(stderr, "cgroup metric ID dup: %d.[%d.%d].%d - %d.[%d.%d].%d\n",
+ domain, cluster,
+ cgroup_pmid_group(source->m_desc.pmid),
+ cgroup_pmid_metric(source->m_desc.pmid),
+ pmid_domain(dest->m_desc.pmid),
+ proc_pmid_cluster(dest->m_desc.pmid),
+ cgroup_pmid_group(dest->m_desc.pmid),
+ cgroup_pmid_metric(dest->m_desc.pmid));
+}
+
+static int
+cgroup_text(pmdaExt *pmda, pmID pmid, int type, char **buf)
+{
+ return PM_ERR_TEXT;
+}
+
+static void
+cgroup_metrics_init(pmdaMetric *metrics, int nmetrics)
+{
+ int i, j, item, cluster = 0;
+
+ for (i = 0; i < sizeof(controllers)/sizeof(controllers[0]); i++) {
+ cgroup_subsys_t *subsys = &controllers[i];
+
+ /* set initial default values for controller metrics item field */
+ for (j = 0; j < subsys->metric_count; j++)
+ subsys->metrics[j].item = j;
+
+ /* set initial seed values for dynamic PMIDs in global metric table */
+ for (j = item = 0; j < nmetrics; j++) {
+ if (pmid_cluster(metrics[j].m_desc.pmid) == subsys->cluster) {
+ if (cluster != subsys->cluster) {
+ cluster = subsys->cluster;
+ item = 0;
+ }
+ metrics[j].m_desc.pmid = PMDA_PMID(cluster, item++);
+ }
+ }
+ }
+}
+
+void
+cgroup_init(pmdaMetric *metrics, int nmetrics)
+{
+ static int set[] = {
+ CLUSTER_BLKIO_GROUPS,
+ CLUSTER_CPUSET_GROUPS,
+ CLUSTER_CPUACCT_GROUPS,
+ CLUSTER_CPUSCHED_GROUPS,
+ CLUSTER_MEMORY_GROUPS,
+ CLUSTER_NET_CLS_GROUPS,
+ };
+
+ cgroup_metrics_init(metrics, nmetrics);
+
+ pmdaDynamicPMNS(CGROUP_ROOT,
+ set, sizeof(set) / sizeof(set[0]),
+ refresh_cgroups, cgroup_text,
+ refresh_metrictable, size_metrictable,
+ metrics, nmetrics);
+ pmdaDynamicSetClusterMask(CGROUP_ROOT, CGROUP_MASK);
+}
diff --git a/src/pmdas/linux_proc/cgroups.h b/src/pmdas/linux_proc/cgroups.h
new file mode 100644
index 0000000..d2ec430
--- /dev/null
+++ b/src/pmdas/linux_proc/cgroups.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2013-2014 Red Hat.
+ * Copyright (c) 2010 Aconex. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef _CGROUP_H
+#define _CGROUP_H
+
+/*
+ * Note: cgroup metrics have an "extra" component - the cluster part
+ * of the PMID (12 bits) is split into two (6 bits each): the bottom
+ * part contains the regular metric (cluster) ID while the top holds
+ * the cgroup ID (index - e.g. this is the 3rd cgroup we've seen for
+ * a particular subsystem).
+ */
+
+#define CGROUP_SPLIT 6
+#define CGROUP_MASK ((1 << CGROUP_SPLIT) - 1)
+
+static inline pmID
+cgroup_pmid_build(unsigned int domain, unsigned int cluster,
+ unsigned int gid, unsigned int metric)
+{
+ return pmid_build(domain, (gid << CGROUP_SPLIT) | cluster, metric);
+}
+
+static inline unsigned int
+cgroup_pmid_group(pmID id)
+{
+ return pmid_cluster(id) >> CGROUP_SPLIT;
+}
+
+static inline unsigned int
+proc_pmid_cluster(pmID id)
+{
+ return pmid_cluster(id) & CGROUP_MASK;
+}
+
+static inline unsigned int
+cgroup_pmid_metric(pmID id)
+{
+ return pmid_item(id);
+}
+
+/*
+ * General cgroup interfaces
+ */
+extern void cgroup_init(pmdaMetric *, int);
+extern char *cgroup_find_subsys(pmInDom, void *);
+extern int cgroup_group_fetch(pmID, unsigned int, pmAtomValue *);
+
+/*
+ * Metric name and value refresh interfaces
+ */
+extern int refresh_cgroups(pmdaExt *, __pmnsTree **);
+
+/*
+ * Indom-specific interfaces
+ */
+extern void refresh_cgroup_cpus(pmInDom);
+extern void refresh_cgroup_devices(pmInDom);
+extern void refresh_cgroup_filesys(pmInDom);
+extern void refresh_cgroup_subsys(pmInDom);
+
+#endif /* _CGROUP_H */
diff --git a/src/pmdas/linux_proc/clusters.h b/src/pmdas/linux_proc/clusters.h
new file mode 100644
index 0000000..e1c8c2a
--- /dev/null
+++ b/src/pmdas/linux_proc/clusters.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2013-2014 Red Hat.
+ * Copyright (c) 2005,2007-2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _CLUSTERS_H
+#define _CLUSTERS_H
+
+/*
+ * fetch cluster numbers ... to manage the PMID migration after the
+ * linux -> linux + proc PMDAs split, these need to match the enum
+ * assigned values for CLUSTER_* from the linux PMDA.
+ */
+#define CLUSTER_PID_STAT 8 /* /proc/<pid>/stat */
+#define CLUSTER_PID_STATM 9 /* /proc/<pid>/statm + /proc/<pid>/maps */
+#define CLUSTER_CONTROL 10 /* instance + value fetch control metrics */
+#define CLUSTER_PID_CGROUP 11 /* /proc/<pid>/cgroup */
+#define CLUSTER_PID_LABEL 12 /* /proc/<pid>/attr/current (label) */
+#define CLUSTER_PROC_RUNQ 13 /* number of processes in various states */
+#define CLUSTER_PID_STATUS 24 /* /proc/<pid>/status */
+#define CLUSTER_PID_SCHEDSTAT 31 /* /proc/<pid>/schedstat */
+#define CLUSTER_PID_IO 32 /* /proc/<pid>/io */
+#define CLUSTER_CGROUP_SUBSYS 37 /* /proc/cgroups control group subsystems */
+#define CLUSTER_CGROUP_MOUNTS 38 /* /proc/mounts active control groups */
+#define CLUSTER_CPUSET_GROUPS 39 /* cpuset control groups */
+#define CLUSTER_CPUACCT_GROUPS 41 /* cpu accounting control groups */
+#define CLUSTER_CPUSCHED_GROUPS 43 /* scheduler control groups */
+#define CLUSTER_MEMORY_GROUPS 45 /* memory control groups */
+#define CLUSTER_NET_CLS_GROUPS 47 /* network classification control groups */
+#define CLUSTER_BLKIO_GROUPS 49 /* blkio control groups */
+#define CLUSTER_PID_FD 51 /* /proc/<pid>/fd */
+ /* Note: do not use higher than (1 << CGROUP_SPLIT)-1 as cluster ID */
+
+#define MIN_CLUSTER 8 /* first cluster number we use here */
+#define NUM_CLUSTERS 52 /* one more than highest cluster number used */
+#define MAX_CLUSTER 63 /* last available - fill gaps if more needed */
+
+#endif /* _CLUSTERS_H */
diff --git a/src/pmdas/linux_proc/contexts.c b/src/pmdas/linux_proc/contexts.c
new file mode 100644
index 0000000..f213c14
--- /dev/null
+++ b/src/pmdas/linux_proc/contexts.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2013 Red Hat.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "pmapi.h"
+#include "impl.h"
+#include "pmda.h"
+#include "contexts.h"
+
+static proc_perctx_t *ctxtab;
+static int num_ctx;
+static uid_t baseuid;
+static gid_t basegid;
+
+static void
+proc_ctx_clear(int ctx)
+{
+ ctxtab[ctx].state = CTX_INACTIVE;
+ ctxtab[ctx].uid = -1;
+ ctxtab[ctx].gid = -1;
+ ctxtab[ctx].threads = 1;
+ ctxtab[ctx].cgroups = NULL;
+}
+
+void
+proc_ctx_end(int ctx)
+{
+ if (ctx < 0 || ctx >= num_ctx || ctxtab[ctx].state == CTX_INACTIVE)
+ return;
+ if (ctxtab[ctx].state & CTX_CGROUPS)
+ free((void *)ctxtab[ctx].cgroups);
+ proc_ctx_clear(ctx);
+}
+
+static void
+proc_ctx_growtab(int ctx)
+{
+ size_t need;
+
+ if (ctx < num_ctx)
+ return;
+
+ need = (ctx + 1) * sizeof(ctxtab[0]);
+ ctxtab = (proc_perctx_t *)realloc(ctxtab, need);
+ if (ctxtab == NULL)
+ __pmNoMem("proc ctx table", need, PM_FATAL_ERR);
+ while (num_ctx <= ctx)
+ proc_ctx_clear(num_ctx++);
+}
+
+static void
+proc_ctx_set_userid(int ctx, const char *value)
+{
+ proc_ctx_growtab(ctx);
+ ctxtab[ctx].uid = atoi(value);
+ ctxtab[ctx].state |= (CTX_ACTIVE | CTX_USERID);
+}
+
+static void
+proc_ctx_set_groupid(int ctx, const char *value)
+{
+ proc_ctx_growtab(ctx);
+ ctxtab[ctx].gid = atoi(value);
+ ctxtab[ctx].state |= (CTX_ACTIVE | CTX_GROUPID);
+}
+
+int
+proc_ctx_attrs(int ctx, int attr, const char *value, int length, pmdaExt *pmda)
+{
+ if (pmDebug & DBG_TRACE_AUTH) {
+ char buffer[256];
+
+ if (!__pmAttrStr_r(attr, value, buffer, sizeof(buffer))) {
+ __pmNotifyErr(LOG_ERR, "Bad Attribute: ctx=%d, attr=%d\n", ctx, attr);
+ } else {
+ buffer[sizeof(buffer)-1] = '\0';
+ __pmNotifyErr(LOG_INFO, "Attribute: ctx=%d %s", ctx, buffer);
+ }
+ }
+
+ switch (attr) {
+ case PCP_ATTR_USERID:
+ proc_ctx_set_userid(ctx, value);
+ break;
+ case PCP_ATTR_GROUPID:
+ proc_ctx_set_groupid(ctx, value);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+void
+proc_ctx_init(void)
+{
+ baseuid = getuid();
+ basegid = getgid();
+}
+
+int
+proc_ctx_access(int ctx)
+{
+ proc_perctx_t *pp;
+ int accessible = 0;
+
+ if (ctx < 0 || ctx >= num_ctx)
+ return accessible;
+ pp = &ctxtab[ctx];
+ if (pp->state == CTX_INACTIVE)
+ return accessible;
+
+ if (pp->state & CTX_GROUPID) {
+ accessible++;
+ if (basegid != pp->gid) {
+ if (setegid(pp->gid) < 0) {
+ __pmNotifyErr(LOG_ERR, "setegid(%d) access failed: %s\n",
+ pp->gid, osstrerror());
+ accessible--;
+ }
+ }
+ }
+ if (pp->state & CTX_USERID) {
+ accessible++;
+ if (baseuid != pp->uid) {
+ if (seteuid(pp->uid) < 0) {
+ __pmNotifyErr(LOG_ERR, "seteuid(%d) access failed: %s\n",
+ pp->uid, osstrerror());
+ accessible--;
+ }
+ }
+ }
+ return (accessible > 1);
+}
+
+int
+proc_ctx_revert(int ctx)
+{
+ proc_perctx_t *pp;
+
+ if (ctx < 0 || ctx >= num_ctx)
+ return 0;
+ pp = &ctxtab[ctx];
+ if (pp->state == CTX_INACTIVE)
+ return 0;
+
+ if ((pp->state & CTX_USERID) && baseuid != pp->uid) {
+ if (seteuid(baseuid) < 0)
+ __pmNotifyErr(LOG_ERR, "seteuid(%d) revert failed: %s\n",
+ baseuid, osstrerror());
+ }
+ if ((pp->state & CTX_GROUPID) && basegid != pp->gid) {
+ if (setegid(basegid) < 0)
+ __pmNotifyErr(LOG_ERR, "setegid(%d) revert failed: %s\n",
+ basegid, osstrerror());
+ }
+ return 0;
+}
+
+unsigned int
+proc_ctx_threads(int ctx, unsigned int threads)
+{
+ proc_perctx_t *pp;
+
+ if (ctx < 0 || ctx >= num_ctx)
+ return threads; /* fallback to default */
+ pp = &ctxtab[ctx];
+ if (pp->state == CTX_INACTIVE)
+ return threads; /* fallback to default */
+
+ if (pp->state & CTX_THREADS)
+ return pp->threads; /* client setting */
+
+ return threads; /* fallback to default */
+}
+
+int
+proc_ctx_set_threads(int ctx, unsigned int threads)
+{
+ proc_perctx_t *pp;
+
+ if (ctx < 0 || ctx >= num_ctx)
+ return PM_ERR_NOCONTEXT;
+ pp = &ctxtab[ctx];
+ if (pp->state == CTX_INACTIVE)
+ return PM_ERR_NOCONTEXT;
+ if (threads > 1)
+ return PM_ERR_CONV;
+
+ pp->state |= CTX_THREADS;
+ pp->threads = threads;
+ return 0;
+}
+
+const char *
+proc_ctx_cgroups(int ctx, const char *cgroups)
+{
+ proc_perctx_t *pp;
+
+ if (ctx < 0 || ctx >= num_ctx)
+ return cgroups; /* fallback to default */
+ pp = &ctxtab[ctx];
+ if (pp->state == CTX_INACTIVE)
+ return cgroups; /* fallback to default */
+
+ if (pp->state & CTX_CGROUPS)
+ return pp->cgroups; /* client setting */
+
+ return cgroups; /* fallback to default */
+}
+
+int
+proc_ctx_set_cgroups(int ctx, const char *cgroups)
+{
+ proc_perctx_t *pp;
+
+ if (ctx < 0 || ctx >= num_ctx)
+ return PM_ERR_NOCONTEXT;
+ pp = &ctxtab[ctx];
+ if (pp->state == CTX_INACTIVE)
+ return PM_ERR_NOCONTEXT;
+ if (cgroups == NULL || cgroups[0] == '\0')
+ return PM_ERR_CONV;
+
+ pp->state |= CTX_CGROUPS;
+ pp->cgroups = cgroups;
+ return 0;
+}
diff --git a/src/pmdas/linux_proc/contexts.h b/src/pmdas/linux_proc/contexts.h
new file mode 100644
index 0000000..c2abe8c
--- /dev/null
+++ b/src/pmdas/linux_proc/contexts.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2013 Red Hat.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _CONTEXTS_H
+#define _CONTEXTS_H
+
+/*
+ * Handle newly arriving clients, security attributes being set on 'em,
+ * switching to alternative accounts (temporarily) and back, and client
+ * termination. State maintained in a global table, with a high-water
+ * allocator and active/inactive entry tracking.
+ *
+ * The proc.control.perclient metrics also have state tracked here now.
+ */
+
+enum {
+ CTX_INACTIVE = 0x0,
+ CTX_ACTIVE = 0x1,
+ CTX_USERID = 0x2,
+ CTX_GROUPID = 0x4,
+ CTX_THREADS = 0x8,
+ CTX_CGROUPS = 0x10,
+};
+
+typedef struct {
+ unsigned int state;
+ uid_t uid;
+ gid_t gid;
+ unsigned int threads;
+ const char *cgroups;
+} proc_perctx_t;
+
+extern void proc_ctx_init(void);
+extern int proc_ctx_attrs(int, int, const char *, int, pmdaExt *);
+extern void proc_ctx_end(int);
+
+extern int proc_ctx_access(int);
+extern int proc_ctx_revert(int);
+
+extern unsigned int proc_ctx_threads(int, unsigned int);
+extern int proc_ctx_set_threads(int, unsigned int);
+
+extern const char *proc_ctx_cgroups(int, const char *);
+extern int proc_ctx_set_cgroups(int, const char *);
+
+#endif /* _CONTEXTS_H */
diff --git a/src/pmdas/linux_proc/getinfo.c b/src/pmdas/linux_proc/getinfo.c
new file mode 100644
index 0000000..b4633a5
--- /dev/null
+++ b/src/pmdas/linux_proc/getinfo.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2010 Aconex. All Rights Reserved.
+ * Copyright (c) 2000,2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include <sys/stat.h>
+#include <sys/dir.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include "pmapi.h"
+
+char *
+get_ttyname_info(int pid, dev_t dev, char *ttyname)
+{
+ DIR *dir;
+ struct dirent *dp;
+ struct stat sbuf;
+ int found=0;
+ char procpath[MAXPATHLEN];
+ char ttypath[MAXPATHLEN];
+
+ sprintf(procpath, "/proc/%d/fd", pid);
+ if ((dir = opendir(procpath)) != NULL) {
+ while ((dp = readdir(dir)) != NULL) {
+ if (!isdigit((int)dp->d_name[0]))
+ continue;
+ sprintf(procpath, "/proc/%d/fd/%s", pid, dp->d_name);
+ if (realpath(procpath, ttypath) == NULL || stat(ttypath, &sbuf) < 0)
+ continue;
+ if (S_ISCHR(sbuf.st_mode) && dev == sbuf.st_rdev) {
+ found=1;
+ break;
+ }
+ }
+ closedir(dir);
+ }
+
+ if (!found)
+ strcpy(ttyname, "?");
+ else
+ /* skip the "/dev/" prefix */
+ strcpy(ttyname, &ttypath[5]);
+
+ return ttyname;
+}
diff --git a/src/pmdas/linux_proc/getinfo.h b/src/pmdas/linux_proc/getinfo.h
new file mode 100644
index 0000000..9006c00
--- /dev/null
+++ b/src/pmdas/linux_proc/getinfo.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2010 Aconex. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+extern char *get_ttyname_info(int, dev_t, char *);
+
diff --git a/src/pmdas/linux_proc/help b/src/pmdas/linux_proc/help
new file mode 100644
index 0000000..6640a08
--- /dev/null
+++ b/src/pmdas/linux_proc/help
@@ -0,0 +1,220 @@
+#
+# Copyright (c) 2000,2004-2008 Silicon Graphics, Inc. All Rights Reserved.
+# Portions Copyright (c) International Business Machines Corp., 2002
+# Portions Copyright (c) 2007-2009 Aconex. All Rights Reserved.
+# Portions Copyright (c) 2013 Red Hat.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# Linux proc PMDA help file in the ASCII format
+#
+# lines beginning with a # are ignored
+# lines beginning @ introduce a new entry of the form
+# @ metric_name oneline-text
+# help test goes
+# here over multiple lines
+# ...
+#
+# the metric_name is decoded against the default PMNS -- as a special case,
+# a name of the form NNN.MM (for numeric NNN and MM) is interpreted as an
+# instance domain identification, and the text describes the instance domain
+#
+# blank lines before the @ line are ignored
+#
+
+@ cgroup.subsys.hierarchy subsystem hierarchy from /proc/cgroups
+@ cgroup.subsys.count count of known subsystems in /proc/cgroups
+@ cgroup.mounts.subsys mount points for each cgroup subsystem
+@ cgroup.mounts.count count of cgroup filesystem mount points
+
+@ proc.nprocs instantaneous number of processes
+@ proc.psinfo.pid process identifier
+@ proc.psinfo.psargs full command string
+@ proc.psinfo.cmd command name
+@ proc.psinfo.sname process state identifier (see ps(1)). See also proc.runq metrics.
+@ proc.psinfo.ppid parent process identifier
+@ proc.psinfo.pgrp process group identifier
+@ proc.psinfo.session process session identifier
+@ proc.psinfo.tty controlling tty device number (zero if none)
+@ proc.psinfo.tty_pgrp controlling tty process group identifier
+@ proc.psinfo.flags process state flags, as a bitmap
+@ proc.psinfo.minflt count of minor page faults (i.e. reclaims)
+@ proc.psinfo.cmin_flt count of minor page faults (i.e. reclaims) of all exited children
+@ proc.psinfo.maj_flt count of page faults other than reclaims
+@ proc.psinfo.cmaj_flt count of page faults other than reclaims of all exited children
+@ proc.psinfo.utime time (in ms) spent executing user code since process started
+@ proc.psinfo.stime time (in ms) spent executing system code (calls) since process started
+@ proc.psinfo.cutime time (in ms) spent executing user code of all exited children
+@ proc.psinfo.cstime time (in ms) spent executing system code of all exited children
+@ proc.psinfo.priority priority value
+@ proc.psinfo.nice process nice value (negative nice values are lower priority)
+@ proc.psinfo.it_real_value current interval timer value (zero if none)
+@ proc.psinfo.start_time start time of the process relative to system boot time in seconds
+@ proc.psinfo.vsize virtual size of the process in Kbytes
+@ proc.psinfo.rss resident set size (i.e. physical memory) of the process
+@ proc.psinfo.rss_rlim limit on resident set size of process
+@ proc.psinfo.start_code address of the start of the code segment for the process
+@ proc.psinfo.end_code address of the end of the code segment for the process
+@ proc.psinfo.start_stack address of the stack segment for the process
+@ proc.psinfo.esp the value in the esp field of struct task_struct for the process
+@ proc.psinfo.eip the value in the eip field of struct task_struct for the process
+@ proc.psinfo.signal the value in the signal field of struct task_struct for the process
+@ proc.psinfo.blocked the value in the blocked field of struct task_struct for the process
+@ proc.psinfo.sigignore the value in the sigignore field of struct task_struct for the process
+@ proc.psinfo.sigcatch the value in the sigcatch field of struct task_struct for the process
+@ proc.psinfo.wchan wait channel, kernel address this process is blocked or sleeping on
+@ proc.psinfo.nswap count of page swap operations
+@ proc.psinfo.cnswap count of page swap operations of all exited children
+@ proc.psinfo.exit_signal the value in the exit_signal field of struct task_struct for the process
+@ proc.psinfo.ttyname name of controlling tty device, or "?" if none. See also proc.psinfo.tty.
+@ proc.psinfo.processor last CPU the process was running on
+@ proc.psinfo.wchan_s name of an event for which the process is sleeping (if blank, the process is running).
+This field needs access to a namelist file for proper
+address-to-symbol name translation. If no namelist file
+is available, the address is printed instead. The namelist
+file must match the current Linux kernel exactly.
+The search path for the namelist file is as follows:
+ /boot/System.map-`uname -r`
+ /boot/System.map
+ /lib/modules/`uname -r`/System.map
+ /usr/src/linux/System.map
+ /System.map
+@ proc.psinfo.signal_s pending signals mask in string form (from /proc/<pid>/status)
+@ proc.psinfo.blocked_s blocked signals mask in string form (from /proc/<pid>/status)
+@ proc.psinfo.sigignore_s ignored signals mask in string form (from /proc/<pid>/status)
+@ proc.psinfo.sigcatch_s caught signals mask in string form (from /proc/<pid>/status)
+@ proc.psinfo.threads number of threads (from /proc/<pid>/status)
+@ proc.psinfo.cgroups list of processes cgroups (from /proc/<pid>/cgroup)
+@ proc.psinfo.labels list of processes security labels (from /proc/<pid>/attr/current)
+@ proc.memory.size instantaneous virtual size of process, excluding page table and task structure.
+@ proc.memory.rss instantaneous resident size of process, excluding page table and task structure.
+@ proc.memory.share instantaneous amount of memory shared by this process with other processes
+@ proc.memory.textrss instantaneous resident size of process code segment in Kbytes
+@ proc.memory.librss instantaneous resident size of library code mapped by the process, in Kbytes
+@ proc.memory.datrss instantaneous resident size of process data segment, in Kbytes
+@ proc.memory.dirty instantaneous amount of memory that has been modified by the process, in Kbytes
+@ proc.memory.maps table of memory mapped by process in string form from /proc/<pid>/maps
+@ proc.memory.vmsize total virtual memory (from /proc/<pid>/status)
+@ proc.memory.vmlock locked virtual memory (from /proc/<pid>/status)
+@ proc.memory.vmrss resident virtual memory (from /proc/<pid>/status)
+@ proc.memory.vmdata virtual memory used for data (from /proc/<pid>/status)
+@ proc.memory.vmstack virtual memory used for stack (from /proc/<pid>/status)
+@ proc.memory.vmexe virtual memory used for non-library executable code (from /proc/<pid>/status)
+@ proc.memory.vmlib virtual memory used for libraries (from /proc/<pid>/status)
+@ proc.memory.vmswap virtual memory that has been brought in and out.
+@ proc.id.uid real user ID from /proc/<pid>/status
+@ proc.id.euid effective user ID from /proc/<pid>/status
+@ proc.id.suid saved user ID from /proc/<pid>/status
+@ proc.id.fsuid filesystem user ID from /proc/<pid>/status
+@ proc.id.gid real group ID from /proc/<pid>/status
+@ proc.id.egid effective group ID from /proc/<pid>/status
+@ proc.id.sgid saved group ID from /proc/<pid>/status
+@ proc.id.fsgid filesystem group ID from /proc/<pid>/status
+@ proc.id.uid_nm real user name based on real user ID from /proc/<pid>/status
+@ proc.id.euid_nm effective user name based on effective user ID from /proc/<pid>/status
+@ proc.id.suid_nm saved user name based on saved user ID from /proc/<pid>/status
+@ proc.id.fsuid_nm filesystem user name based on filesystem user ID from /proc/<pid>/status
+@ proc.id.gid_nm real group name based on real group ID from /proc/<pid>/status
+@ proc.id.egid_nm effective group name based on effective group ID from /proc/<pid>/status
+@ proc.id.sgid_nm saved group name based on saved group ID from /proc/<pid>/status
+@ proc.id.fsgid_nm filesystem group name based on filesystem group ID from /proc/<pid>/status
+
+@ proc.runq.runnable number of runnable (on run queue) processes
+Instantaneous number of runnable (on run queue) processes, state 'R' in ps
+@ proc.runq.blocked number of processes in uninterruptible sleep
+Instantaneous number of processes in uninterruptible sleep, state 'D' in ps
+@ proc.runq.sleeping number of processes sleeping
+Instantaneous number of processes sleeping, state 'S' in ps
+@ proc.runq.stopped number of traced, stopped or suspended processes
+Instantaneous number of traced, stopped or suspended processes, state
+'T' in ps
+@ proc.runq.swapped number of processes that are swapped
+Instantaneous number of processes (excluding kernel threads) that are
+swapped, state 'SW' in ps
+@ proc.runq.defunct number of defunct/zombie processes
+Instantaneous number of defunct/zombie processes, state 'Z' in ps
+@ proc.runq.unknown number of processes is an unknown state
+Instantaneous number of processes is an unknown state, including all
+kernel threads
+@ proc.runq.kernel number of kernel threads
+Instantaneous number of processes with virtual size of zero (kernel threads)
+
+@ proc.io.rchar read(), readv() and sendfile() receive bytes
+Extended accounting information - count of the number of bytes that
+have passed over the read(2), readv(2) and sendfile(2) syscalls by
+each process.
+
+@ proc.io.wchar write(), writev() and sendfile() send bytes
+Extended accounting information - count of the number of bytes that
+have passed over the write(2), writev(2) and sendfile(2) syscalls by
+each process.
+
+@ proc.io.syscr read(), readv() and sendfile() receive system calls
+Extended accounting information - count of number of calls to the
+read(2), readv(2) and sendfile(2) syscalls by each process.
+
+@ proc.io.syscw write(), writev() and sendfile() send system calls
+Extended accounting information - count of number of calls to the
+write(2), writev(2) and sendfile(2) syscalls by each process.
+
+@ proc.io.read_bytes physical device read bytes
+Number of bytes physically read on by devices on behalf of this process.
+@ proc.io.write_bytes physical device write bytes
+Number of bytes physically written to devices on behalf of this process.
+This must be reduced by any truncated I/O (proc.io.cancelled_write_bytes).
+@ proc.io.cancelled_write_bytes physical device write cancelled bytes
+Number of bytes cancelled via truncate by this process. Actual physical
+writes for an individual process can be calculated as:
+ proc.io.write_bytes - proc.io.cancelled_write_bytes.
+
+@ proc.schedstat.cpu_time runnable (scheduled) + run time
+Length of time in nanoseconds that a process has been running, including
+scheduling time.
+@ proc.schedstat.run_delay run queue time
+Length of time in nanoseconds that a process spent waiting to be scheduled
+to run in the run queue.
+@ proc.schedstat.pcount number of times a process is allowed to run
+Number of times a process has been scheduled to run on a CPU (this is
+incremented when a task actually reaches a CPU to run on, not simply
+when it is added to the run queue).
+
+@ proc.fd.count open file descriptors
+Number of file descriptors this process has open.
+
+@ proc.control.all.threads process indom includes threads
+If set to one, the process instance domain as reported by pmdaproc
+contains all threads as well as the processes that started them.
+If set to zero, the process instance domain contains only processes.
+
+This setting is persistent for the life of pmdaproc and affects all
+client tools that request instances and values from pmdaproc.
+Use either pmstore(1) or pmStore(3) to modify this metric.
+
+@ proc.control.perclient.threads for a client, process indom includes threads
+If set to one, the process instance domain as reported by pmdaproc
+contains all threads as well as the processes that started them.
+If set to zero, the process instance domain contains only processes.
+
+This setting is only visible to the active client context. In other
+words, storing into this metric has no effect for other monitoring
+tools. See proc.control.all.threads, if that is the desired outcome.
+Only pmStore(3) can effectively set this metric (pmstore(1) cannot).
+
+@ proc.control.perclient.cgroups for a client, process indom reflects specific cgroups
+If set to the empty string (the default), the process instance domain
+as reported by pmdaproc contains all processes. However, a cgroup
+name (full path) can be stored into this metric in order to restrict
+processes reported to only those within the specified cgroup. This
+set is further affected by the value of proc.control.perclient.threads.
+
+This setting is only visible to the active client context. In other
+words, storing into this metric has no effect for other monitoring
+tools. pmStore(3) must be used to set this metric (not pmstore(1)).
diff --git a/src/pmdas/linux_proc/indom.h b/src/pmdas/linux_proc/indom.h
new file mode 100644
index 0000000..9c928cd
--- /dev/null
+++ b/src/pmdas/linux_proc/indom.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2012-2014 Red Hat.
+ * Copyright (c) 2010 Aconex. All Rights Reserved.
+ * Copyright (c) 2005,2007-2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef _INDOM_H
+#define _INDOM_H
+
+/*
+ * indom serial numbers ... to manage the indom migration after the
+ * linux -> linux + proc PMDAs split, these need to match the enum
+ * assigned values for *_INDOM from the linux PMDA. Consequently,
+ * the proc indom table is sparse.
+ */
+#define CPU_INDOM 0 /* - percpu */
+#define DISK_INDOM 1 /* - disks (with normal names) */
+#define DEVT_INDOM 2 /* - disks (major:minor names) */
+#define PROC_INDOM 9 /* - processes */
+#define STRINGS_INDOM 10 /* - fake indom, string hash */
+#define CGROUP_SUBSYS_INDOM 20 /* - control group subsystems */
+#define CGROUP_MOUNTS_INDOM 21 /* - control group mounts */
+
+#define MIN_INDOM 0 /* first indom number we use here */
+#define NUM_INDOMS 22 /* one more than highest indom number we use here */
+
+extern pmInDom proc_indom(int);
+#define INDOM(i) proc_indom(i)
+
+/*
+ * Optional path prefix for all stats files, used for testing.
+ */
+extern char *proc_statspath;
+extern FILE *proc_statsfile(const char *, char *, int);
+
+/*
+ * static string dictionary - one copy of oft-repeated strings;
+ * implemented using STRINGS_INDOM and pmdaCache(3) routines.
+ */
+char *proc_strings_lookup(int);
+int proc_strings_insert(const char *);
+
+#endif /* _INDOM_H */
diff --git a/src/pmdas/linux_proc/ksym.c b/src/pmdas/linux_proc/ksym.c
new file mode 100644
index 0000000..1604c84
--- /dev/null
+++ b/src/pmdas/linux_proc/ksym.c
@@ -0,0 +1,564 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2002
+ * Copyright (c) 2003,2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+/*
+ * This code originally contributed by Mike Mason <mmlnx@us.ibm.com>
+ * with hints from the procps and ksymoops projects.
+ */
+
+#include <ctype.h>
+#include <limits.h>
+#include <sys/time.h>
+#include <sys/utsname.h>
+#include "pmapi.h"
+#include "impl.h"
+#include "ksym.h"
+#include "indom.h"
+
+static struct ksym *ksym_a;
+static size_t ksym_a_sz;
+
+static int
+find_index(__psint_t addr, int lo, int hi)
+{
+ int mid;
+
+ if (lo > hi) {
+ return -1;
+ }
+
+ mid = lo + ((hi - lo) / 2);
+ if (addr == ksym_a[mid].addr ||
+ (addr > ksym_a[mid].addr && addr < ksym_a[mid+1].addr)) {
+ return mid;
+ }
+
+ if (addr > ksym_a[mid].addr)
+ return find_index(addr, mid+1, hi);
+ else
+ return find_index(addr, lo, mid-1);
+}
+
+static char *
+find_name_by_addr(__psint_t addr)
+{
+ int ix = -1;
+
+ if (ksym_a)
+ ix = find_index(addr, 0, ksym_a_sz - 1);
+ if (ix < 0)
+ return NULL;
+
+ return ksym_a[ix].name;
+}
+
+static int
+find_dup_name(int maxix, __psint_t addr, char *name)
+{
+ int i, res;
+
+ for (i = 0; i < maxix; i++) {
+ if (ksym_a[i].name) {
+ res = strcmp(ksym_a[i].name, name);
+ if (res > 0)
+ break;
+ if (res == 0) {
+ if (addr == ksym_a[i].addr)
+ return KSYM_FOUND;
+ else
+ return KSYM_FOUND_MISMATCH;
+ }
+ }
+ }
+
+ return KSYM_NOT_FOUND;
+}
+
+/* Brute force linear search to determine if the kernel version
+ in System.map matches the running kernel version and returns
+ a tri-state result as follows:
+
+ 0 no match
+ 1 _end not found but version matched
+ 2 _end found and matched
+ */
+static int
+validate_sysmap(FILE *fp, char *version, __psint_t end_addr)
+{
+ __psint_t addr;
+ char type;
+ int ret = 0;
+ char kname[128];
+
+ while (fscanf(fp, "%p %c %s", (void **)&addr, &type, kname) != EOF) {
+ if (end_addr && strcmp(kname, "_end") == 0) {
+ ret = (end_addr == addr) ? 2 : 0;
+ break; /* no need to look any further */
+ }
+ if (strcmp(kname, version) == 0)
+ ret = 1;
+ }
+
+ return ret;
+}
+
+char *
+wchan(__psint_t addr)
+{
+ static char zero;
+ char *p = NULL;
+
+ if (addr == 0) /* 0 address means not in kernel space */
+ p = &zero;
+ else if ((p = find_name_by_addr(addr))) {
+ /* strip off "sys_" or leading "_"s if necessary */
+ if (strncmp(p, "sys_", 4) == 0)
+ p += 4;
+ while (*p == '_' && *p)
+ ++p;
+ }
+
+ return p;
+}
+
+static int
+ksym_compare_addr(const void *e1, const void *e2)
+{
+ struct ksym *ks1 = (struct ksym *) e1;
+ struct ksym *ks2 = (struct ksym *) e2;
+
+ if (ks1->addr < ks2->addr)
+ return -1;
+ if (ks1->addr > ks2->addr)
+ return 1;
+ return 0;
+}
+
+static int
+ksym_compare_name(const void *e1, const void *e2)
+{
+ struct ksym *ks1 = (struct ksym *) e1;
+ struct ksym *ks2 = (struct ksym *) e2;
+
+ return(strcmp(ks1->name, ks2->name));
+}
+
+static int
+read_ksyms(__psint_t *end_addr)
+{
+ char inbuf[256];
+ char *ip;
+ char *sp;
+ char *tp;
+ char *p;
+ int ix = 0;
+ int l = 0;
+ int len;
+ int err;
+ FILE *fp;
+ struct ksym *ksym_tmp;
+
+ *end_addr = 0;
+ if ((fp = proc_statsfile("/proc/ksyms", inbuf, sizeof(inbuf))) == NULL)
+ return -oserror();
+
+ while (fgets(inbuf, sizeof(inbuf), fp) != NULL) {
+ l++;
+
+ /*
+ * /proc/ksyms lines look like this on ia32 ...
+ *
+ * c8804060 __insmod_rtc_S.text_L4576 [rtc]
+ * c010a320 disable_irq_nosync
+ *
+ * else on ia64 ...
+ *
+ * a0000000003e0d28 debug [arsess]
+ * e002100000891140 disable_irq_nosync
+ */
+
+ if (strstr(inbuf, "\n") == NULL) {
+ fprintf(stderr, "read_ksyms: truncated /proc/ksyms line [%d]: %s\n", l-1, inbuf);
+ continue;
+ }
+
+ /* Increase array size, if necessary */
+ if (ksym_a_sz < ix+1) {
+ if (ksym_a_sz > 0)
+ ksym_a_sz += INCR_KSIZE;
+ else
+ ksym_a_sz = INIT_KSIZE;
+ ksym_tmp = (struct ksym *)realloc(ksym_a, ksym_a_sz * sizeof(struct ksym));
+ if (ksym_tmp == NULL) {
+ err = -oserror();
+ free(ksym_a);
+ fclose(fp);
+ return err;
+ }
+ ksym_a = ksym_tmp;
+ }
+
+ ip = inbuf;
+ /* parse over address */
+ while (isxdigit((int)*ip)) ip++;
+
+ if (!isspace((int)*ip) || ip-inbuf < 4) {
+ /* bad format line */
+#if PCP_DEBUG
+ if (pmDebug & DBG_TRACE_APPL2) {
+ fprintf(stderr, "read_ksyms: bad addr? %c[%d] line=\"%s\"\n", *ip, (int)(ip-inbuf), inbuf);
+ }
+#endif
+ continue;
+ }
+
+ sscanf(inbuf, "%p", (void **)&ksym_a[ix].addr);
+
+ while (isblank((int)*ip)) ip++;
+
+ /* next should be the symbol name */
+ sp = ip++;
+ while (!isblank((int)*ip) &&*ip != '\n') ip++;
+
+ /* strip off GPLONLY_ prefix, if found */
+ if (strncmp(sp, "GPLONLY_", 8) == 0)
+ sp += 8;
+
+ /*
+ * strip off symbol version suffix, if found ... looking for
+ * trailing pattern of the form _R.*[0-9a-fA-F]{8,}
+ * - find rightmost _R, if any
+ */
+ tp = sp;
+ while ((p = strstr(tp, "_R")) != NULL) tp = p+2;
+ if (tp > sp) {
+ /*
+ * found _R, need the last 8 digits to be hex
+ */
+ if (ip - tp + 1 >= 8) {
+ for (p = &ip[-8]; p < ip; p++) {
+ if (!isxdigit((int)*p)) {
+ tp = sp;
+ break;
+ }
+ }
+ }
+ else {
+ /* not enough characters for [0-9a-fA-f]{8,} at the end */
+ tp = sp;
+ }
+ }
+ if (tp > sp)
+ /* need to strip the trailing _R.*[0-9a-fA-f]{8,} */
+ len = tp - sp - 2;
+ else
+ len = ip - sp + 1;
+
+ ksym_a[ix].name = strndup(sp, len);
+ if (ksym_a[ix].name == NULL) {
+ err = -oserror();
+ fclose(fp);
+ return err;
+ }
+ ksym_a[ix].name[len-1] = '\0';
+
+ if (*end_addr == 0 && strcmp(ksym_a[ix].name, "_end") == 0)
+ *end_addr = ksym_a[ix].addr;
+
+ if (*ip == '\n')
+ /* nothing after the symbol name, so no module name */
+ goto next;
+
+ while (isblank((int)*ip)) ip++;
+
+ /* next expect module name */
+ if (*ip != '[') {
+#if PCP_DEBUG
+ if (pmDebug & DBG_TRACE_APPL2) {
+ fprintf(stderr, "read_ksyms: bad start module name %c[%d] != [ line=\"%s\"\n", *ip, (int)(ip-inbuf), inbuf);
+ }
+#endif
+ free(ksym_a[ix].name);
+ continue;
+ }
+
+ sp = ++ip;
+ while (!isblank((int)*ip) && *ip != ']') ip++;
+
+ if (*ip != ']') {
+#if PCP_DEBUG
+ if (pmDebug & DBG_TRACE_APPL2) {
+ fprintf(stderr, "read_ksyms: bad end module name %c[%d] != ] line=\"%s\"\n", *ip, (int)(ip-inbuf), inbuf);
+ }
+#endif
+ free(ksym_a[ix].name);
+ continue;
+ }
+
+ ksym_a[ix].module = strndup(sp, ip - sp + 1);
+ if (ksym_a[ix].module == NULL) {
+ err = -oserror();
+ fclose(fp);
+ free(ksym_a[ix].name);
+ return err;
+ }
+ ksym_a[ix].module[ip - sp] = '\0';
+
+next:
+ ix++;
+ }
+
+ /* release unused ksym array entries */
+ if (ix) {
+ ksym_tmp = (struct ksym *)realloc(ksym_a, ix * sizeof(struct ksym));
+ if (ksym_tmp == NULL) {
+ free(ksym_a);
+ fclose(fp);
+ return -oserror();
+ }
+ ksym_a = ksym_tmp;
+ }
+
+ ksym_a_sz = ix;
+
+ qsort(ksym_a, ksym_a_sz, sizeof(struct ksym), ksym_compare_name);
+
+ fclose(fp);
+
+#if PCP_DEBUG
+ if (pmDebug & DBG_TRACE_APPL2) {
+ fprintf(stderr, "symbols from ksyms ...\n");
+ for (ix = 0; ix < ksym_a_sz; ix++) {
+ fprintf(stderr, "ksym[%d] " PRINTF_P_PFX "%p %s", ix, (void *)ksym_a[ix].addr, ksym_a[ix].name);
+ if (ksym_a[ix].module != NULL) fprintf(stderr, " [%s]", ksym_a[ix].module);
+ fprintf(stderr, "\n");
+ }
+ }
+#endif
+
+ return ksym_a_sz;
+}
+
+static int
+read_sysmap(const char *release, __psint_t end_addr)
+{
+ char inbuf[256], path[MAXPATHLEN], **fmt;
+ struct ksym *ksym_tmp;
+ __psint_t addr;
+ int ix, res, e;
+ int l = 0;
+ char *ip;
+ char *sp;
+ int major, minor, patch;
+ FILE *fp;
+ char *bestpath = NULL;
+ int ksym_mismatch_count;
+ char *sysmap_paths[] = { /* Paths to check for System.map file */
+ "%s/boot/System.map-%s",
+ "%s/boot/System.map",
+ "%s/lib/modules/%s/System.map",
+ "%s/usr/src/linux/System.map",
+ "%s/System.map",
+ NULL
+ };
+
+ /* Create version symbol name to look for in System.map */
+ if (sscanf(release, "%d.%d.%d", &major, &minor, &patch) < 3 )
+ return -1;
+ sprintf(inbuf, "Version_%u", KERNEL_VERSION(major, minor, patch));
+
+ /*
+ * Walk through System.map path list looking for one that matches
+ * either _end from /proc/ksyms or the uts version.
+ */
+ for (fmt = sysmap_paths; *fmt; fmt++) {
+ snprintf(path, MAXPATHLEN, *fmt, proc_statspath, release);
+ if ((fp = fopen(path, "r"))) {
+ if ((e = validate_sysmap(fp, inbuf, end_addr)) != 0) {
+ if (e == 2) {
+ /* matched _end, so this is the right System.map */
+ if (bestpath)
+ free(bestpath);
+ bestpath = strdup(path);
+ }
+ else
+ if (e == 1 && !bestpath)
+ bestpath = strdup(path);
+ }
+ fclose(fp);
+ if (e == 2) {
+ /* _end matched => don't look any further */
+ break;
+ }
+ }
+ }
+
+ if (bestpath)
+ fprintf(stderr, "NOTICE: using \"%s\" for kernel symbols map.\n", bestpath);
+ else {
+ /* Didn't find a valid System.map */
+ fprintf(stderr, "Warning: Valid System.map file not found!\n");
+ fprintf(stderr, "Warning: proc.psinfo.wchan_s symbol names cannot be derived!\n");
+ fprintf(stderr, "Warning: Addresses will be returned for proc.psinfo.wchan_s instead!\n");
+ /* Free symbol array */
+ for (ix = 0; ix < ksym_a_sz; ix++) {
+ if (ksym_a[ix].name)
+ free(ksym_a[ix].name);
+ if (ksym_a[ix].module)
+ free(ksym_a[ix].module);
+ }
+ free(ksym_a);
+ ksym_a = NULL;
+ ksym_a_sz = 0;
+ return -1;
+ }
+
+ /* scan the System map */
+ if ((fp = proc_statsfile(bestpath, path, sizeof(path))) == NULL)
+ return -oserror();
+
+ ix = ksym_a_sz;
+
+ /* Read each line in System.map */
+ ksym_mismatch_count = 0;
+ while (fgets(inbuf, sizeof(inbuf), fp) != NULL) {
+ /*
+ * System.map lines look like this on ia32 ...
+ *
+ * c010a320 T disable_irq_nosync
+ *
+ * else on ia64 ...
+ *
+ * e002000000014c80 T disable_irq_nosync
+ */
+
+ if (strstr(inbuf, "\n") == NULL) {
+ fprintf(stderr, "read_sysmap: truncated System.map line [%d]: %s\n", l-1, inbuf);
+ continue;
+ }
+
+ /* Increase array size, if necessary */
+ if (ksym_a_sz < ix+1) {
+ ksym_a_sz += INCR_KSIZE;
+ ksym_tmp = (struct ksym *)realloc(ksym_a, ksym_a_sz * sizeof(struct ksym));
+ if (ksym_tmp == NULL) {
+ free(ksym_a);
+ goto fail;
+ }
+ ksym_a = ksym_tmp;
+ }
+
+ ip = inbuf;
+ /* parse over address */
+ while (isxdigit((int)*ip)) ip++;
+
+ if (!isspace((int)*ip) || ip-inbuf < 4) {
+ /* bad format line */
+#if PCP_DEBUG
+ if (pmDebug & DBG_TRACE_APPL2) {
+ fprintf(stderr, "read_sysmap: bad addr? %c[%d] line=\"%s\"\n", *ip, (int)(ip-inbuf), inbuf);
+ }
+#endif
+ continue;
+ }
+
+ sscanf(inbuf, "%p", (void **)&addr);
+
+ while (isblank((int)*ip)) ip++;
+
+ /* Only interested in symbol types that map to code addresses,
+ * so: t, T, W or A
+ */
+ if (*ip != 't' && *ip != 'T' && *ip != 'W' && *ip != 'A')
+ continue;
+
+ ip++;
+ while (isblank((int)*ip)) ip++;
+
+ /* next should be the symbol name */
+ sp = ip++;
+ while (!isblank((int)*ip) && *ip != '\n') ip++;
+ *ip = '\0';
+
+ /* Determine if symbol is already in ksym array.
+ If so, make sure the addresses match. */
+ res = find_dup_name(ix - 1, addr, sp);
+ if (res == KSYM_NOT_FOUND) { /* add it */
+ ksym_a[ix].name = strdup(sp);
+ if (ksym_a[ix].name == NULL)
+ goto fail;
+ ksym_a[ix].addr = addr;
+ ix++;
+ }
+ else if (res == KSYM_FOUND_MISMATCH) {
+ if (ksym_mismatch_count++ < KSYM_MISMATCH_MAX_ALLOWED) {
+ /*
+ * ia64 function pointer descriptors make this validation
+ * next to useless. So only report the first
+ * KSYM_MISMATCH_MAX_ALLOWED mismatches found.
+ */
+ fprintf(stderr, "Warning: mismatch for \"%s\" between System.map"
+ " and /proc/ksyms.\n", sp);
+ }
+ }
+ }
+
+ if (ksym_mismatch_count > KSYM_MISMATCH_MAX_ALLOWED) {
+ fprintf(stderr, "Warning: only reported first %d out of %d mismatches "
+ "between System.map and /proc/ksyms.\n",
+ KSYM_MISMATCH_MAX_ALLOWED, ksym_mismatch_count);
+ }
+
+ /* release unused ksym array entries */
+ ksym_tmp = (struct ksym *)realloc(ksym_a, ix * sizeof(struct ksym));
+ if (ksym_tmp == NULL) {
+ free(ksym_a);
+ goto fail;
+ }
+ ksym_a = ksym_tmp;
+ ksym_a_sz = ix;
+
+ qsort(ksym_a, ksym_a_sz, sizeof(struct ksym), ksym_compare_addr);
+
+#if PCP_DEBUG
+ if (pmDebug & DBG_TRACE_APPL2) {
+ fprintf(stderr, "symbols from ksyms + sysmap ...\n");
+ for (ix = 0; ix < ksym_a_sz; ix++) {
+ fprintf(stderr, "ksym[%d] " PRINTF_P_PFX "%p %s", ix, (void *)ksym_a[ix].addr, ksym_a[ix].name);
+ if (ksym_a[ix].module != NULL) fprintf(stderr, " [%s]", ksym_a[ix].module);
+ fprintf(stderr, "\n");
+ }
+ }
+#endif
+
+ fclose(fp);
+
+ return ksym_a_sz;
+
+fail:
+ e = -oserror();
+ if (fp)
+ fclose(fp);
+ return e;
+}
+
+void
+read_ksym_sources(const char *release)
+{
+ __psint_t end_addr;
+
+ if (read_ksyms(&end_addr) > 0) /* read /proc/ksyms first */
+ read_sysmap(release, end_addr); /* then System.map */
+}
diff --git a/src/pmdas/linux_proc/ksym.h b/src/pmdas/linux_proc/ksym.h
new file mode 100644
index 0000000..f328ca4
--- /dev/null
+++ b/src/pmdas/linux_proc/ksym.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2002
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * This code contributed by Mike Mason (mmlnx@us.ibm.com)
+ */
+#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
+
+#define INIT_KSIZE 8192
+#define INCR_KSIZE 2048
+
+#define KSYM_FOUND_MISMATCH -1
+#define KSYM_NOT_FOUND 0
+#define KSYM_FOUND 1
+
+#define KSYM_MISMATCH_MAX_ALLOWED 10
+
+struct ksym {
+ __psint_t addr;
+ char *name;
+ char *module;
+};
+
+extern char *wchan(__psint_t);
+extern void read_ksym_sources(const char *);
+
diff --git a/src/pmdas/linux_proc/linux_proc_migrate.conf b/src/pmdas/linux_proc/linux_proc_migrate.conf
new file mode 100644
index 0000000..51190da
--- /dev/null
+++ b/src/pmdas/linux_proc/linux_proc_migrate.conf
@@ -0,0 +1,55 @@
+# Copyright 2012 Red Hat, Inc. All Rights Reserved
+#
+# pmlogrewrite configuration for migrating archives containing proc metrics
+# that were captured prior to the proc PMDA split-off from the Linux PMDA.
+#
+# Basically, the PMID domain changed from 60 (linux) to 3 (proc) but all
+# cluster and item numbers remain unchanged.
+#
+# Note that the CPU indom is not migrated, even though it is
+# used for cgroup.groups.cpuacct.[<group>.]usage_percpu and
+# cgroup.groups.cpuacct.usage_percpu because these metrics use a
+# the dynamic pmns. To migrate archives containing these metrics,
+# a script would be needed to generate the pmlogwrite config based
+# on the metric names actually present in the source archive.
+
+#
+# Migrate instance domains
+indom 60.9 { indom -> 3.9 } # per-process indom
+indom 60.20 { indom -> 3.20 } # cgroup hierarchy indom
+indom 60.21 { indom -> 3.21 } # cgroup mount subsys indom
+
+#
+# Migrate the pmid domain for each cluster
+metric 60.8.* { pmid -> 3.*.* } # CLUSTER_PID_STAT
+metric 60.9.* { pmid -> 3.*.* } # CLUSTER_PID_STATM
+metric 60.13.* { pmid -> 3.*.* } # CLUSTER_PROC_RUNQ
+metric 60.24.* { pmid -> 3.*.* } # CLUSTER_PID_STATUS
+metric 60.31.* { pmid -> 3.*.* } # CLUSTER_PID_SCHEDSTAT
+metric 60.32.* { pmid -> 3.*.* } # CLUSTER_PID_IO
+metric 60.51.* { pmid -> 3.*.* } # CLUSTER_PID_FD
+metric 60.37.* { pmid -> 3.*.* } # CLUSTER_CGROUP_SUBSYS
+metric 60.38.* { pmid -> 3.*.* } # CLUSTER_CGROUP_MOUNTS
+metric 60.39.* { pmid -> 3.*.* } # CLUSTER_CPUSET_GROUPS
+metric 60.40.* { pmid -> 3.*.* } # CLUSTER_CPUSET_PROCS
+metric 60.41.* { pmid -> 3.*.* } # CLUSTER_CPUACCT_GROUPS
+metric 60.42.* { pmid -> 3.*.* } # CLUSTER_CPUACCT_PROCS
+metric 60.43.* { pmid -> 3.*.* } # CLUSTER_CPUSCHED_GROUPS
+metric 60.44.* { pmid -> 3.*.* } # CLUSTER_CPUSCHED_PROCS
+metric 60.45.* { pmid -> 3.*.* } # CLUSTER_MEMORY_GROUPS
+metric 60.46.* { pmid -> 3.*.* } # CLUSTER_MEMORY_PROCS
+metric 60.47.* { pmid -> 3.*.* } # CLUSTER_NET_CLS_GROUPS
+metric 60.48.* { pmid -> 3.*.* } # CLUSTER_NET_CLS_PROCS
+
+#
+# These two proc.io metrics were incorrectly classified
+#
+metric proc.io.rchar {
+ sem -> counter
+ units -> 1,0,0,BYTE,0,0
+}
+
+metric proc.io.wchar {
+ sem -> counter
+ units -> 1,0,0,BYTE,0,0
+}
diff --git a/src/pmdas/linux_proc/pmda.c b/src/pmdas/linux_proc/pmda.c
new file mode 100644
index 0000000..2d40a54
--- /dev/null
+++ b/src/pmdas/linux_proc/pmda.c
@@ -0,0 +1,1896 @@
+/*
+ * proc PMDA
+ *
+ * Copyright (c) 2000,2004,2007-2008 Silicon Graphics, Inc. All Rights Reserved.
+ * Portions Copyright (c) 2002 International Business Machines Corp.
+ * Portions Copyright (c) 2007-2011 Aconex. All Rights Reserved.
+ * Portions Copyright (c) 2012-2014 Red Hat.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "pmapi.h"
+#include "impl.h"
+#include "pmda.h"
+#include "domain.h"
+#include "contexts.h"
+
+#include <ctype.h>
+#include <unistd.h>
+#include <sys/vfs.h>
+#include <sys/stat.h>
+#include <sys/times.h>
+#include <sys/utsname.h>
+#include <utmp.h>
+#include <pwd.h>
+#include <grp.h>
+
+#include "../linux/convert.h"
+#include "clusters.h"
+#include "indom.h"
+
+#include "getinfo.h"
+#include "proc_pid.h"
+#include "proc_runq.h"
+#include "ksym.h"
+#include "cgroups.h"
+
+/* globals */
+static int _isDSO = 1; /* for local contexts */
+static proc_pid_t proc_pid;
+static struct utsname kernel_uname;
+static proc_runq_t proc_runq;
+static int all_access; /* =1 no access checks */
+static int have_access; /* =1 recvd uid/gid */
+static size_t _pm_system_pagesize;
+static unsigned int threads; /* control.all.threads */
+static char * cgroups; /* control.all.cgroups */
+
+char *proc_statspath = ""; /* optional path prefix for all stats files */
+
+/*
+ * The proc instance domain table is direct lookup and sparse.
+ * It is initialized in proc_init(), see below.
+ */
+static pmdaIndom indomtab[NUM_INDOMS];
+
+/*
+ * all metrics supported in this PMDA - one table entry for each
+ */
+static pmdaMetric metrictab[] = {
+
+/*
+ * proc/<pid>/stat cluster
+ */
+
+/* proc.nprocs */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,99), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.pid */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,0), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.cmd */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,1), PM_TYPE_STRING, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.sname */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,2), PM_TYPE_STRING, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.ppid */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,3), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.pgrp */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,4), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.session */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,5), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.tty */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,6), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.tty_pgrp */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,7), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.flags */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,8), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.minflt */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,9), PM_TYPE_U32, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) } },
+
+/* proc.psinfo.cmin_flt */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,10), PM_TYPE_U32, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) } },
+
+/* proc.psinfo.maj_flt */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,11), PM_TYPE_U32, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) } },
+
+/* proc.psinfo.cmaj_flt */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,12), PM_TYPE_U32, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) } },
+
+/* proc.psinfo.utime */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,13), KERNEL_ULONG, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_MSEC,0) } },
+
+/* proc.psinfo.stime */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,14), KERNEL_ULONG, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_MSEC,0) } },
+
+/* proc.psinfo.cutime */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,15), KERNEL_ULONG, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_MSEC,0) } },
+
+/* proc.psinfo.cstime */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,16), KERNEL_ULONG, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_MSEC,0) } },
+
+/* proc.psinfo.priority */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,17), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.nice */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,18), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+#if 0
+/* invalid field */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,19), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+#endif
+
+/* proc.psinfo.it_real_value */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,20), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.start_time */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,21), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) } },
+
+/* proc.psinfo.vsize */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,22), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) } },
+
+/* proc.psinfo.rss */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,23), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) } },
+
+/* proc.psinfo.rss_rlim */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,24), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) } },
+
+/* proc.psinfo.start_code */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,25), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.end_code */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,26), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.start_stack */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,27), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.esp */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,28), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.eip */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,29), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.signal */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,30), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.blocked */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,31), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.sigignore */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,32), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.sigcatch */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,33), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.wchan */
+#if defined(HAVE_64BIT_PTR)
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,34), PM_TYPE_U64, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+#elif defined(HAVE_32BIT_PTR)
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,34), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+#else
+ error! unsupported pointer size
+#endif
+
+/* proc.psinfo.nswap */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,35), PM_TYPE_U32, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) } },
+
+/* proc.psinfo.cnswap */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,36), PM_TYPE_U32, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) } },
+
+/* proc.psinfo.exit_signal */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,37), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.processor -- added by Mike Mason <mmlnx@us.ibm.com> */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,38), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.psinfo.ttyname */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,39), PM_TYPE_STRING, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0) } },
+
+/* proc.psinfo.wchan_s -- added by Mike Mason <mmlnx@us.ibm.com> */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,40), PM_TYPE_STRING, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.psinfo.psargs -- modified by Mike Mason <mmlnx@us.ibm.com> */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STAT,41), PM_TYPE_STRING, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/*
+ * proc/<pid>/status cluster
+ * Cluster added by Mike Mason <mmlnx@us.ibm.com>
+ */
+
+/* proc.id.uid */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,0), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.id.euid */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,1), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.id.suid */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,2), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.id.fsuid */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,3), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.id.gid */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,4), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.id.egid */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,5), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.id.sgid */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,6), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.id.fsgid */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,7), PM_TYPE_U32, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.id.uid_nm */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,8), PM_TYPE_STRING, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.id.euid_nm */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,9), PM_TYPE_STRING, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.id.suid_nm */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,10), PM_TYPE_STRING, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.id.fsuid_nm */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,11), PM_TYPE_STRING, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.id.gid_nm */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,12), PM_TYPE_STRING, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.id.egid_nm */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,13), PM_TYPE_STRING, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.id.sgid_nm */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,14), PM_TYPE_STRING, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.id.fsgid_nm */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,15), PM_TYPE_STRING, PROC_INDOM, PM_SEM_DISCRETE,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.psinfo.signal_s */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,16), PM_TYPE_STRING, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.psinfo.blocked_s */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,17), PM_TYPE_STRING, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.psinfo.sigignore_s */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,18), PM_TYPE_STRING, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.psinfo.sigcatch_s */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,19), PM_TYPE_STRING, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.memory.vmsize */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,20), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
+
+/* proc.memory.vmlock */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,21), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
+
+/* proc.memory.vmrss */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,22), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
+
+/* proc.memory.vmdata */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,23), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
+
+/* proc.memory.vmstack */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,24), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
+
+/* proc.memory.vmexe */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,25), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
+
+/* proc.memory.vmlib */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,26), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
+
+/* proc.memory.vmswap */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,27), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
+
+/* proc.psinfo.threads */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATUS,28), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.psinfo.cgroups */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_CGROUP,0), PM_TYPE_STRING, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/* proc.psinfo.labels */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_LABEL,0), PM_TYPE_STRING, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+
+/*
+ * proc/<pid>/statm cluster
+ */
+
+/* proc.memory.size */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATM,0), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) } },
+
+/* proc.memory.rss */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATM,1), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) } },
+
+/* proc.memory.share */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATM,2), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) } },
+
+/* proc.memory.textrss */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATM,3), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) } },
+
+/* proc.memory.librss */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATM,4), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) } },
+
+/* proc.memory.datrss */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATM,5), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) } },
+
+/* proc.memory.dirty */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATM,6), PM_TYPE_U32, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) } },
+
+/* proc.memory.maps -- added by Mike Mason <mmlnx@us.ibm.com> */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_STATM,7), PM_TYPE_STRING, PROC_INDOM, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,0,0,0,0)}},
+
+/*
+ * proc/<pid>/schedstat cluster
+ */
+
+/* proc.schedstat.cpu_time */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_SCHEDSTAT,0), PM_TYPE_U64, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_NSEC,0)}},
+/* proc.schedstat.run_delay */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_SCHEDSTAT,1), PM_TYPE_U64, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,1,0,0,PM_TIME_NSEC,0)}},
+/* proc.schedstat.pcount */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_SCHEDSTAT,2), KERNEL_ULONG, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE)}},
+
+/*
+ * proc/<pid>/io cluster
+ */
+/* proc.io.rchar */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_IO,0), PM_TYPE_U64, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_BYTE,0,0)}},
+/* proc.io.wchar */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_IO,1), PM_TYPE_U64, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_BYTE,0,0)}},
+/* proc.io.syscr */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_IO,2), PM_TYPE_U64, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE)}},
+/* proc.io.syscw */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_IO,3), PM_TYPE_U64, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE)}},
+/* proc.io.read_bytes */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_IO,4), PM_TYPE_U64, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_BYTE,0,0)}},
+/* proc.io.write_bytes */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_IO,5), PM_TYPE_U64, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_BYTE,0,0)}},
+/* proc.io.cancelled_write_bytes */
+ { NULL,
+ { PMDA_PMID(CLUSTER_PID_IO,6), PM_TYPE_U64, PROC_INDOM, PM_SEM_COUNTER,
+ PMDA_PMUNITS(1,0,0,PM_SPACE_BYTE,0,0)}},
+
+/*
+ * proc.runq cluster
+ */
+
+/* proc.runq.runnable */
+ { &proc_runq.runnable,
+ { PMDA_PMID(CLUSTER_PROC_RUNQ, 0), PM_TYPE_32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) } },
+
+/* proc.runq.blocked */
+ { &proc_runq.blocked,
+ { PMDA_PMID(CLUSTER_PROC_RUNQ, 1), PM_TYPE_32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) } },
+
+/* proc.runq.sleeping */
+ { &proc_runq.sleeping,
+ { PMDA_PMID(CLUSTER_PROC_RUNQ, 2), PM_TYPE_32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) } },
+
+/* proc.runq.stopped */
+ { &proc_runq.stopped,
+ { PMDA_PMID(CLUSTER_PROC_RUNQ, 3), PM_TYPE_32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) } },
+
+/* proc.runq.swapped */
+ { &proc_runq.swapped,
+ { PMDA_PMID(CLUSTER_PROC_RUNQ, 4), PM_TYPE_32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) } },
+
+/* proc.runq.defunct */
+ { &proc_runq.defunct,
+ { PMDA_PMID(CLUSTER_PROC_RUNQ, 5), PM_TYPE_32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) } },
+
+/* proc.runq.unknown */
+ { &proc_runq.unknown,
+ { PMDA_PMID(CLUSTER_PROC_RUNQ, 6), PM_TYPE_32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) } },
+
+/* proc.runq.kernel */
+ { &proc_runq.kernel,
+ { PMDA_PMID(CLUSTER_PROC_RUNQ, 7), PM_TYPE_32, PM_INDOM_NULL, PM_SEM_INSTANT,
+ PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) } },
+
+/*
+ * control groups cluster
+ */
+ /* cgroups.subsys.hierarchy */
+ { NULL, {PMDA_PMID(CLUSTER_CGROUP_SUBSYS,0), PM_TYPE_U32,
+ CGROUP_SUBSYS_INDOM, PM_SEM_INSTANT, PMDA_PMUNITS(0,0,0,0,0,0) }, },
+
+ /* cgroups.subsys.count */
+ { NULL, {PMDA_PMID(CLUSTER_CGROUP_SUBSYS,1), PM_TYPE_U32,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroups.mounts.subsys */
+ { NULL, {PMDA_PMID(CLUSTER_CGROUP_MOUNTS,0), PM_TYPE_STRING,
+ CGROUP_MOUNTS_INDOM, PM_SEM_INSTANT, PMDA_PMUNITS(0,0,0,0,0,0) }, },
+
+ /* cgroups.mounts.count */
+ { NULL, {PMDA_PMID(CLUSTER_CGROUP_MOUNTS,1), PM_TYPE_U32,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.cpuset.[<group>.]cpus */
+ { NULL, {PMDA_PMID(CLUSTER_CPUSET_GROUPS,0), PM_TYPE_STRING,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(0,0,0,0,0,0) }, },
+
+ /* cgroup.groups.cpuset.[<group>.]mems */
+ { NULL, {PMDA_PMID(CLUSTER_CPUSET_GROUPS,0), PM_TYPE_STRING,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(0,0,0,0,0,0) }, },
+
+ /* cgroup.groups.cpuacct.[<group>.]stat.user */
+ { NULL, {PMDA_PMID(CLUSTER_CPUACCT_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(0,1,0,0,PM_TIME_MSEC,0) }, },
+
+ /* cgroup.groups.cpuacct.[<group>.]stat.system */
+ { NULL, {PMDA_PMID(CLUSTER_CPUACCT_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(0,1,0,0,PM_TIME_MSEC,0) }, },
+
+ /* cgroup.groups.cpuacct.[<group>.]usage */
+ { NULL, {PMDA_PMID(CLUSTER_CPUACCT_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(0,1,0,0,PM_TIME_NSEC,0) }, },
+
+ /* cgroup.groups.cpuacct.[<group>.]usage_percpu */
+ { NULL, {PMDA_PMID(CLUSTER_CPUACCT_GROUPS,0), PM_TYPE_U64,
+ CPU_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,1,0,0,PM_TIME_NSEC,0) }, },
+
+ /* cgroup.groups.cpusched.[<group>.]shares */
+ { NULL, {PMDA_PMID(CLUSTER_CPUSCHED_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,0,0,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.cache */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.rss */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.rss_huge */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.mapped_file */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.writeback */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.swap */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.pgpgin */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.pgpgout */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.pgfault */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.pgmajfault */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.inactive_anon */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.active_anon */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.inactive_file */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.active_file */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.unevictable */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.total_cache */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.total_rss */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.total_rss_huge */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.total_mapped_file */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.total_writeback */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.total_swap */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.total_pgpgin */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.total_pgpgout */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.total_pgfault */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.total_pgmajfault */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.total_inactive_anon */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.total_active_anon */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.total_inactive_file */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.total_active_file */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.total_unevictable */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.recent_rotated_anon */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.recent_rotated_file */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.recent_scanned_anon */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.memory.[<group>.]stat.recent_scanned_file */
+ { NULL, {PMDA_PMID(CLUSTER_MEMORY_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.netclass.[<group>.]classid */
+ { NULL, {PMDA_PMID(CLUSTER_NET_CLS_GROUPS,0), PM_TYPE_U64,
+ PM_INDOM_NULL, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_merged.read */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_merged.write */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_merged.sync */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_merged.async */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_merged.total */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_queued.read */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_queued.write */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_queued.sync */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_queued.async */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_queued.total */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_service_bytes.read */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_service_bytes.write */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_service_bytes.sync */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_service_bytes.async */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_service_bytes.total */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_serviced.read */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_serviced.write */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_serviced.sync */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_serviced.async */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_serviced.total */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_service_time.read */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,1,0,0,PM_TIME_NSEC,0) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_service_time.write */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,1,0,0,PM_TIME_NSEC,0) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_service_time.sync */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,1,0,0,PM_TIME_NSEC,0) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_service_time.async */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,1,0,0,PM_TIME_NSEC,0) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_service_time.total */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,1,0,0,PM_TIME_NSEC,0) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_wait_time.read */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,1,0,0,PM_TIME_NSEC,0) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_wait_time.write */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,1,0,0,PM_TIME_NSEC,0) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_wait_time.sync */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,1,0,0,PM_TIME_NSEC,0) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_wait_time.async */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,1,0,0,PM_TIME_NSEC,0) }, },
+
+ /* cgroup.groups.blkio.[<group>.]io_wait_time.total */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,1,0,0,PM_TIME_NSEC,0) }, },
+
+ /* cgroup.groups.blkio.[<group>.]sectors */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
+
+ /* cgroup.groups.blkio.[<group>.]time */
+ { NULL, {PMDA_PMID(CLUSTER_BLKIO_GROUPS,0), PM_TYPE_U64,
+ DISK_INDOM, PM_SEM_COUNTER, PMDA_PMUNITS(0,1,0,0,PM_TIME_MSEC,0) }, },
+
+
+/*
+ * proc/<pid>/fd cluster
+ */
+
+ /* proc.fd.count */
+ { NULL, { PMDA_PMID(CLUSTER_PID_FD,0), PM_TYPE_U32,
+ PROC_INDOM, PM_SEM_INSTANT, PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) } },
+
+/*
+ * Metrics control cluster
+ */
+
+ /* proc.control.all.threads */
+ { &threads, { PMDA_PMID(CLUSTER_CONTROL, 1), PM_TYPE_U32,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(0,0,0,0,0,0) } },
+ /* proc.control.perclient.threads */
+ { NULL, { PMDA_PMID(CLUSTER_CONTROL, 2), PM_TYPE_U32,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(0,0,0,0,0,0) } },
+ /* proc.control.perclient.cgroups */
+ { NULL, { PMDA_PMID(CLUSTER_CONTROL, 3), PM_TYPE_STRING,
+ PM_INDOM_NULL, PM_SEM_INSTANT, PMDA_PMUNITS(0,0,0,0,0,0) } },
+};
+
+pmInDom
+proc_indom(int serial)
+{
+ return indomtab[serial].it_indom;
+}
+
+FILE *
+proc_statsfile(const char *path, char *buffer, int size)
+{
+ snprintf(buffer, size, "%s%s", proc_statspath, path);
+ buffer[size-1] = '\0';
+ return fopen(buffer, "r");
+}
+
+static void
+proc_refresh(pmdaExt *pmda, int *need_refresh)
+{
+ int need_refresh_mtab = 0;
+
+ if (need_refresh[CLUSTER_CPUACCT_GROUPS])
+ refresh_cgroup_cpus(INDOM(CPU_INDOM));
+
+ if (need_refresh[CLUSTER_CGROUP_SUBSYS] ||
+ need_refresh[CLUSTER_CGROUP_MOUNTS] ||
+ need_refresh[CLUSTER_CPUSET_GROUPS] ||
+ need_refresh[CLUSTER_CPUACCT_GROUPS] ||
+ need_refresh[CLUSTER_CPUSCHED_GROUPS] ||
+ need_refresh[CLUSTER_BLKIO_GROUPS] ||
+ need_refresh[CLUSTER_NET_CLS_GROUPS] ||
+ need_refresh[CLUSTER_MEMORY_GROUPS]) {
+ refresh_cgroup_subsys(INDOM(CGROUP_SUBSYS_INDOM));
+ need_refresh_mtab |= refresh_cgroups(pmda, NULL);
+ }
+
+ if (need_refresh_mtab)
+ pmdaDynamicMetricTable(pmda);
+
+ if (need_refresh[CLUSTER_PID_STAT] ||
+ need_refresh[CLUSTER_PID_STATM] ||
+ need_refresh[CLUSTER_PID_STATUS] ||
+ need_refresh[CLUSTER_PID_IO] ||
+ need_refresh[CLUSTER_PID_LABEL] ||
+ need_refresh[CLUSTER_PID_CGROUP] ||
+ need_refresh[CLUSTER_PID_SCHEDSTAT] ||
+ need_refresh[CLUSTER_PID_FD]) {
+ refresh_proc_pid(&proc_pid,
+ proc_ctx_threads(pmda->e_context, threads),
+ proc_ctx_cgroups(pmda->e_context, cgroups));
+ }
+
+ if (need_refresh[CLUSTER_PROC_RUNQ])
+ refresh_proc_runq(&proc_runq);
+}
+
+static int
+proc_instance(pmInDom indom, int inst, char *name, __pmInResult **result, pmdaExt *pmda)
+{
+ __pmInDom_int *indomp = (__pmInDom_int *)&indom;
+ int need_refresh[NUM_CLUSTERS] = { 0 };
+ char newname[16]; /* see Note below */
+ int sts;
+
+ switch (indomp->serial) {
+ case CPU_INDOM:
+ /*
+ * Used by cgroup.groups.cpuacct.[<group>.]usage_percpu
+ * and cgroup.groups.cpuacct.usage_percpu
+ */
+ need_refresh[CLUSTER_CPUACCT_GROUPS]++;
+ break;
+ case DISK_INDOM:
+ need_refresh[CLUSTER_BLKIO_GROUPS]++;
+ break;
+ case PROC_INDOM:
+ need_refresh[CLUSTER_PID_STAT]++;
+ need_refresh[CLUSTER_PID_STATM]++;
+ need_refresh[CLUSTER_PID_STATUS]++;
+ need_refresh[CLUSTER_PID_LABEL]++;
+ need_refresh[CLUSTER_PID_CGROUP]++;
+ need_refresh[CLUSTER_PID_SCHEDSTAT]++;
+ need_refresh[CLUSTER_PID_IO]++;
+ need_refresh[CLUSTER_PID_FD]++;
+ break;
+ case CGROUP_SUBSYS_INDOM:
+ need_refresh[CLUSTER_CGROUP_SUBSYS]++;
+ break;
+ case CGROUP_MOUNTS_INDOM:
+ need_refresh[CLUSTER_CGROUP_MOUNTS]++;
+ break;
+ /* no default label : pmdaInstance will pick up errors */
+ }
+
+ if (indomp->serial == PROC_INDOM && inst == PM_IN_NULL && name != NULL) {
+ /*
+ * For the proc indom, if the name is a pid (as a string), and it
+ * contains only digits (i.e. it's not a full instance name) then
+ * reformat it to be exactly six digits, with leading zeros.
+ *
+ * Note that although format %06d is used here and in proc_pid.c,
+ * the pid could be longer than this (in which case there
+ * are no leading zeroes. The size of newname[] is chosen
+ * to comfortably accommodate a 32-bit pid (Linux maximum),
+ * or max value of 4294967295 (10 digits)
+ */
+ char *p;
+ for (p = name; *p != '\0'; p++) {
+ if (!isdigit((int)*p))
+ break;
+ }
+ if (*p == '\0') {
+ snprintf(newname, sizeof(newname), "%06d", atoi(name));
+ name = newname;
+ }
+ }
+
+ sts = PM_ERR_PERMISSION;
+ have_access = proc_ctx_access(pmda->e_context) || all_access;
+ if (have_access || indomp->serial != PROC_INDOM) {
+ proc_refresh(pmda, need_refresh);
+ sts = pmdaInstance(indom, inst, name, result, pmda);
+ }
+ have_access = proc_ctx_revert(pmda->e_context);
+
+ return sts;
+}
+
+/*
+ * callback provided to pmdaFetch
+ */
+
+static int
+proc_fetchCallBack(pmdaMetric *mdesc, unsigned int inst, pmAtomValue *atom)
+{
+ __pmID_int *idp = (__pmID_int *)&(mdesc->m_desc.pmid);
+ int cluster = proc_pmid_cluster(mdesc->m_desc.pmid);
+ int sts;
+ unsigned long ul;
+ const char *cp;
+ char *f;
+ int *ip;
+ proc_pid_entry_t *entry;
+ void *fsp;
+ static long hz = -1;
+ char *tail;
+
+ if (hz == -1)
+ hz = sysconf(_SC_CLK_TCK);
+
+ if (mdesc->m_user != NULL) {
+ /*
+ * The metric value is extracted directly via the address specified
+ * in metrictab. Note: not all metrics support this - those that
+ * don't have NULL for the m_user field in their respective
+ * metrictab slot.
+ */
+
+ switch (mdesc->m_desc.type) {
+ case PM_TYPE_32:
+ atom->l = *(__int32_t *)mdesc->m_user;
+ break;
+ case PM_TYPE_U32:
+ atom->ul = *(__uint32_t *)mdesc->m_user;
+ break;
+ case PM_TYPE_64:
+ atom->ll = *(__int64_t *)mdesc->m_user;
+ break;
+ case PM_TYPE_U64:
+ atom->ull = *(__uint64_t *)mdesc->m_user;
+ break;
+ case PM_TYPE_FLOAT:
+ atom->f = *(float *)mdesc->m_user;
+ break;
+ case PM_TYPE_DOUBLE:
+ atom->d = *(double *)mdesc->m_user;
+ break;
+ case PM_TYPE_STRING:
+ cp = *(char **)mdesc->m_user;
+ atom->cp = (char *)(cp ? cp : "");
+ break;
+ default:
+ return 0;
+ }
+ }
+ else
+ switch (cluster) {
+
+ case CLUSTER_PID_STAT:
+ if (idp->item == 99) /* proc.nprocs */
+ atom->ul = proc_pid.indom->it_numinst;
+ else {
+ static char ttyname[MAXPATHLEN];
+
+ if (!have_access)
+ return PM_ERR_PERMISSION;
+ if ((entry = fetch_proc_pid_stat(inst, &proc_pid)) == NULL)
+ return PM_ERR_INST;
+
+ switch (idp->item) {
+
+
+ case PROC_PID_STAT_PID:
+ atom->ul = entry->id;
+ break;
+
+ case PROC_PID_STAT_TTYNAME:
+ if ((f = _pm_getfield(entry->stat_buf, PROC_PID_STAT_TTY)) == NULL)
+ atom->cp = "?";
+ else {
+ dev_t dev = (dev_t)atoi(f);
+ atom->cp = get_ttyname_info(inst, dev, ttyname);
+ }
+ break;
+
+ case PROC_PID_STAT_CMD:
+ if ((f = _pm_getfield(entry->stat_buf, idp->item)) == NULL)
+ return PM_ERR_INST;
+ atom->cp = f + 1;
+ atom->cp[strlen(atom->cp)-1] = '\0';
+ break;
+
+ case PROC_PID_STAT_PSARGS:
+ atom->cp = entry->name + 7;
+ break;
+
+ case PROC_PID_STAT_STATE:
+ /*
+ * string
+ */
+ if ((f = _pm_getfield(entry->stat_buf, idp->item)) == NULL)
+ return PM_ERR_INST;
+ atom->cp = f;
+ break;
+
+ case PROC_PID_STAT_VSIZE:
+ case PROC_PID_STAT_RSS_RLIM:
+ /*
+ * bytes converted to kbytes
+ */
+ if ((f = _pm_getfield(entry->stat_buf, idp->item)) == NULL)
+ return PM_ERR_INST;
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ atom->ul /= 1024;
+ break;
+
+ case PROC_PID_STAT_RSS:
+ /*
+ * pages converted to kbytes
+ */
+ if ((f = _pm_getfield(entry->stat_buf, idp->item)) == NULL)
+ return PM_ERR_INST;
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ atom->ul *= _pm_system_pagesize / 1024;
+ break;
+
+ case PROC_PID_STAT_UTIME:
+ case PROC_PID_STAT_STIME:
+ case PROC_PID_STAT_CUTIME:
+ case PROC_PID_STAT_CSTIME:
+ /*
+ * unsigned jiffies converted to unsigned milliseconds
+ */
+ if ((f = _pm_getfield(entry->stat_buf, idp->item)) == NULL)
+ return PM_ERR_INST;
+
+ ul = (__uint32_t)strtoul(f, &tail, 0);
+ _pm_assign_ulong(atom, 1000 * (double)ul / hz);
+ break;
+
+ case PROC_PID_STAT_PRIORITY:
+ case PROC_PID_STAT_NICE:
+ /*
+ * signed decimal int
+ */
+ if ((f = _pm_getfield(entry->stat_buf, idp->item)) == NULL)
+ return PM_ERR_INST;
+ atom->l = (__int32_t)strtol(f, &tail, 0);
+ break;
+
+ case PROC_PID_STAT_WCHAN:
+ if ((f = _pm_getfield(entry->stat_buf, idp->item)) == NULL)
+ return PM_ERR_INST;
+#if defined(HAVE_64BIT_PTR)
+ atom->ull = (__uint64_t)strtoull(f, &tail, 0);
+#else
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+#endif
+ break;
+
+ case PROC_PID_STAT_WCHAN_SYMBOL:
+ if (entry->wchan_buf) /* 2.6 kernel, /proc/<pid>/wchan */
+ atom->cp = entry->wchan_buf;
+ else { /* old school (2.4 kernels, at least) */
+ char *wc;
+ /*
+ * Convert address to symbol name if requested
+ * Added by Mike Mason <mmlnx@us.ibm.com>
+ */
+ f = _pm_getfield(entry->stat_buf, PROC_PID_STAT_WCHAN);
+ if (f == NULL)
+ return PM_ERR_INST;
+#if defined(HAVE_64BIT_PTR)
+ atom->ull = (__uint64_t)strtoull(f, &tail, 0);
+ if ((wc = wchan(atom->ull)))
+ atom->cp = wc;
+ else
+ atom->cp = atom->ull ? f : "";
+#else
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ if ((wc = wchan((__psint_t)atom->ul)))
+ atom->cp = wc;
+ else
+ atom->cp = atom->ul ? f : "";
+#endif
+ }
+ break;
+
+ default:
+ /*
+ * unsigned decimal int
+ */
+ if (idp->item < NR_PROC_PID_STAT) {
+ if ((f = _pm_getfield(entry->stat_buf, idp->item)) == NULL)
+ return PM_ERR_INST;
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ }
+ else
+ return PM_ERR_PMID;
+ break;
+ }
+ }
+ break;
+
+ case CLUSTER_PID_STATM:
+ if (!have_access)
+ return PM_ERR_PERMISSION;
+ if (idp->item == PROC_PID_STATM_MAPS) { /* proc.memory.maps */
+ if ((entry = fetch_proc_pid_maps(inst, &proc_pid)) == NULL)
+ return PM_ERR_INST;
+ atom->cp = entry->maps_buf;
+ } else {
+ if ((entry = fetch_proc_pid_statm(inst, &proc_pid)) == NULL)
+ return PM_ERR_INST;
+
+ if (idp->item <= PROC_PID_STATM_DIRTY) {
+ /* unsigned int */
+ if ((f = _pm_getfield(entry->statm_buf, idp->item)) == NULL)
+ return PM_ERR_INST;
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ atom->ul *= _pm_system_pagesize / 1024;
+ }
+ else
+ return PM_ERR_PMID;
+ }
+ break;
+
+ case CLUSTER_PID_SCHEDSTAT:
+ if (!have_access)
+ return PM_ERR_PERMISSION;
+ if ((entry = fetch_proc_pid_schedstat(inst, &proc_pid)) == NULL)
+ return (oserror() == ENOENT) ? PM_ERR_APPVERSION : PM_ERR_INST;
+
+ if (idp->item < NR_PROC_PID_SCHED) {
+ if ((f = _pm_getfield(entry->schedstat_buf, idp->item)) == NULL)
+ return PM_ERR_INST;
+ if (idp->item == PROC_PID_SCHED_PCOUNT &&
+ mdesc->m_desc.type == PM_TYPE_U32)
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ else
+#if defined(HAVE_64BIT_PTR)
+ atom->ull = (__uint64_t)strtoull(f, &tail, 0);
+#else
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+#endif
+ }
+ else
+ return PM_ERR_PMID;
+ break;
+
+ case CLUSTER_PID_IO:
+ if (!have_access)
+ return PM_ERR_PERMISSION;
+ if ((entry = fetch_proc_pid_io(inst, &proc_pid)) == NULL)
+ return (oserror() == ENOENT) ? PM_ERR_APPVERSION : PM_ERR_INST;
+
+ switch (idp->item) {
+
+ case PROC_PID_IO_RCHAR:
+ if ((f = _pm_getfield(entry->io_lines.rchar, 1)) == NULL)
+ atom->ull = 0;
+ else
+ atom->ull = (__uint64_t)strtoull(f, &tail, 0);
+ break;
+ case PROC_PID_IO_WCHAR:
+ if ((f = _pm_getfield(entry->io_lines.wchar, 1)) == NULL)
+ atom->ull = 0;
+ else
+ atom->ull = (__uint64_t)strtoull(f, &tail, 0);
+ break;
+ case PROC_PID_IO_SYSCR:
+ if ((f = _pm_getfield(entry->io_lines.syscr, 1)) == NULL)
+ atom->ull = 0;
+ else
+ atom->ull = (__uint64_t)strtoull(f, &tail, 0);
+ break;
+ case PROC_PID_IO_SYSCW:
+ if ((f = _pm_getfield(entry->io_lines.syscw, 1)) == NULL)
+ atom->ull = 0;
+ else
+ atom->ull = (__uint64_t)strtoull(f, &tail, 0);
+ break;
+ case PROC_PID_IO_READ_BYTES:
+ if ((f = _pm_getfield(entry->io_lines.readb, 1)) == NULL)
+ atom->ull = 0;
+ else
+ atom->ull = (__uint64_t)strtoull(f, &tail, 0);
+ break;
+ case PROC_PID_IO_WRITE_BYTES:
+ if ((f = _pm_getfield(entry->io_lines.writeb, 1)) == NULL)
+ atom->ull = 0;
+ else
+ atom->ull = (__uint64_t)strtoull(f, &tail, 0);
+ break;
+ case PROC_PID_IO_CANCELLED_BYTES:
+ if ((f = _pm_getfield(entry->io_lines.cancel, 1)) == NULL)
+ atom->ull = 0;
+ else
+ atom->ull = (__uint64_t)strtoull(f, &tail, 0);
+ break;
+
+ default:
+ return PM_ERR_PMID;
+ }
+ break;
+
+ /*
+ * Cluster added by Mike Mason <mmlnx@us.ibm.com>
+ */
+ case CLUSTER_PID_STATUS:
+ if (!have_access)
+ return PM_ERR_PERMISSION;
+ if ((entry = fetch_proc_pid_status(inst, &proc_pid)) == NULL)
+ return PM_ERR_INST;
+
+ switch (idp->item) {
+
+ case PROC_PID_STATUS_UID:
+ case PROC_PID_STATUS_EUID:
+ case PROC_PID_STATUS_SUID:
+ case PROC_PID_STATUS_FSUID:
+ case PROC_PID_STATUS_UID_NM:
+ case PROC_PID_STATUS_EUID_NM:
+ case PROC_PID_STATUS_SUID_NM:
+ case PROC_PID_STATUS_FSUID_NM:
+ {
+ struct passwd *pwe;
+
+ if ((f = _pm_getfield(entry->status_lines.uid, (idp->item % 4) + 1)) == NULL)
+ return PM_ERR_INST;
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ if (idp->item > PROC_PID_STATUS_FSUID) {
+ if ((pwe = getpwuid((uid_t)atom->ul)) != NULL)
+ atom->cp = pwe->pw_name;
+ else
+ atom->cp = "UNKNOWN";
+ }
+ }
+ break;
+
+ case PROC_PID_STATUS_GID:
+ case PROC_PID_STATUS_EGID:
+ case PROC_PID_STATUS_SGID:
+ case PROC_PID_STATUS_FSGID:
+ case PROC_PID_STATUS_GID_NM:
+ case PROC_PID_STATUS_EGID_NM:
+ case PROC_PID_STATUS_SGID_NM:
+ case PROC_PID_STATUS_FSGID_NM:
+ {
+ struct group *gre;
+
+ if ((f = _pm_getfield(entry->status_lines.gid, (idp->item % 4) + 1)) == NULL)
+ return PM_ERR_INST;
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ if (idp->item > PROC_PID_STATUS_FSGID) {
+ if ((gre = getgrgid((gid_t)atom->ul)) != NULL) {
+ atom->cp = gre->gr_name;
+ } else {
+ atom->cp = "UNKNOWN";
+ }
+ }
+ }
+ break;
+
+ case PROC_PID_STATUS_SIGNAL:
+ if ((atom->cp = _pm_getfield(entry->status_lines.sigpnd, 1)) == NULL)
+ return PM_ERR_INST;
+ break;
+
+ case PROC_PID_STATUS_BLOCKED:
+ if ((atom->cp = _pm_getfield(entry->status_lines.sigblk, 1)) == NULL)
+ return PM_ERR_INST;
+ break;
+
+ case PROC_PID_STATUS_SIGCATCH:
+ if ((atom->cp = _pm_getfield(entry->status_lines.sigcgt, 1)) == NULL)
+ return PM_ERR_INST;
+ break;
+
+ case PROC_PID_STATUS_SIGIGNORE:
+ if ((atom->cp = _pm_getfield(entry->status_lines.sigign, 1)) == NULL)
+ return PM_ERR_INST;
+ break;
+
+ case PROC_PID_STATUS_VMSIZE:
+ if ((f = _pm_getfield(entry->status_lines.vmsize, 1)) == NULL)
+ atom->ul = 0;
+ else
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ break;
+
+ case PROC_PID_STATUS_VMLOCK:
+ if ((f = _pm_getfield(entry->status_lines.vmlck, 1)) == NULL)
+ atom->ul = 0;
+ else
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ break;
+
+ case PROC_PID_STATUS_VMRSS:
+ if ((f = _pm_getfield(entry->status_lines.vmrss, 1)) == NULL)
+ atom->ul = 0;
+ else
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ break;
+
+ case PROC_PID_STATUS_VMDATA:
+ if ((f = _pm_getfield(entry->status_lines.vmdata, 1)) == NULL)
+ atom->ul = 0;
+ else
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ break;
+
+ case PROC_PID_STATUS_VMSTACK:
+ if ((f = _pm_getfield(entry->status_lines.vmstk, 1)) == NULL)
+ atom->ul = 0;
+ else
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ break;
+
+ case PROC_PID_STATUS_VMEXE:
+ if ((f = _pm_getfield(entry->status_lines.vmexe, 1)) == NULL)
+ atom->ul = 0;
+ else
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ break;
+
+ case PROC_PID_STATUS_VMLIB:
+ if ((f = _pm_getfield(entry->status_lines.vmlib, 1)) == NULL)
+ atom->ul = 0;
+ else
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ break;
+
+ case PROC_PID_STATUS_VMSWAP:
+ if ((f = _pm_getfield(entry->status_lines.vmswap, 1)) == NULL)
+ atom->ul = 0;
+ else
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ break;
+
+ case PROC_PID_STATUS_THREADS:
+ if ((f = _pm_getfield(entry->status_lines.threads, 1)) == NULL)
+ atom->ul = 0;
+ else
+ atom->ul = (__uint32_t)strtoul(f, &tail, 0);
+ break;
+
+ default:
+ return PM_ERR_PMID;
+ }
+ break;
+
+ case CLUSTER_CGROUP_SUBSYS:
+ switch (idp->item) {
+ case 0: /* cgroup.subsys.hierarchy */
+ sts = pmdaCacheLookup(INDOM(CGROUP_SUBSYS_INDOM), inst, NULL, (void **)&ip);
+ if (sts < 0)
+ return sts;
+ if (sts != PMDA_CACHE_ACTIVE)
+ return PM_ERR_INST;
+ atom->ul = *ip;
+ break;
+
+ case 1: /* cgroup.subsys.count */
+ atom->ul = pmdaCacheOp(INDOM(CGROUP_SUBSYS_INDOM), PMDA_CACHE_SIZE_ACTIVE);
+ break;
+ }
+ break;
+
+ case CLUSTER_CGROUP_MOUNTS:
+ switch (idp->item) {
+ case 0: /* cgroup.mounts.subsys */
+ sts = pmdaCacheLookup(INDOM(CGROUP_MOUNTS_INDOM), inst, NULL, &fsp);
+ if (sts < 0)
+ return sts;
+ if (sts != PMDA_CACHE_ACTIVE)
+ return PM_ERR_INST;
+ atom->cp = cgroup_find_subsys(INDOM(CGROUP_SUBSYS_INDOM), fsp);
+ break;
+
+ case 1: /* cgroup.mounts.count */
+ atom->ul = pmdaCacheOp(INDOM(CGROUP_MOUNTS_INDOM), PMDA_CACHE_SIZE_ACTIVE);
+ break;
+ }
+ break;
+
+ case CLUSTER_CPUSET_GROUPS:
+ case CLUSTER_CPUACCT_GROUPS:
+ case CLUSTER_CPUSCHED_GROUPS:
+ case CLUSTER_MEMORY_GROUPS:
+ case CLUSTER_NET_CLS_GROUPS:
+ case CLUSTER_BLKIO_GROUPS:
+ return cgroup_group_fetch(mdesc->m_desc.pmid, inst, atom);
+
+ case CLUSTER_PID_FD:
+ if (!have_access)
+ return PM_ERR_PERMISSION;
+ if (idp->item > PROC_PID_FD_COUNT)
+ return PM_ERR_PMID;
+ if ((entry = fetch_proc_pid_fd(inst, &proc_pid)) == NULL)
+ return PM_ERR_INST;
+ atom->ul = entry->fd_count;
+ break;
+
+ case CLUSTER_PID_CGROUP:
+ if (!have_access)
+ return PM_ERR_PERMISSION;
+ if (idp->item > PROC_PID_CGROUP)
+ return PM_ERR_PMID;
+ if ((entry = fetch_proc_pid_cgroup(inst, &proc_pid)) == NULL) {
+ if (oserror() == ENOENT) return PM_ERR_APPVERSION;
+ if (oserror() != ENODATA) return PM_ERR_INST;
+ atom->cp = "";
+ } else {
+ atom->cp = proc_strings_lookup(entry->cgroup_id);
+ }
+ break;
+
+ case CLUSTER_PID_LABEL:
+ if (!have_access)
+ return PM_ERR_PERMISSION;
+ if (idp->item > PROC_PID_LABEL)
+ return PM_ERR_PMID;
+ if ((entry = fetch_proc_pid_label(inst, &proc_pid)) == NULL) {
+ if (oserror() == ENOENT) return PM_ERR_APPVERSION;
+ if (oserror() != ENODATA) return PM_ERR_INST;
+ atom->cp = "";
+ } else {
+ atom->cp = proc_strings_lookup(entry->label_id);
+ }
+ break;
+
+ case CLUSTER_CONTROL:
+ switch (idp->item) {
+ /* case 1: not reached -- proc.control.all.threads is direct */
+ case 2: /* proc.control.perclient.threads */
+ atom->ul = proc_ctx_threads(pmdaGetContext(), threads);
+ break;
+ case 3: /* proc.control.perclient.cgroups */
+ cp = proc_ctx_cgroups(pmdaGetContext(), cgroups);
+ atom->cp = (char *)(cp ? cp : "");
+ break;
+ default:
+ return PM_ERR_PMID;
+ }
+ break;
+
+ default: /* unknown cluster */
+ return PM_ERR_PMID;
+ }
+
+ return PMDA_FETCH_STATIC;
+}
+
+static int
+proc_fetch(int numpmid, pmID pmidlist[], pmResult **resp, pmdaExt *pmda)
+{
+ int i, sts, cluster;
+ int need_refresh[NUM_CLUSTERS] = { 0 };
+
+ for (i = 0; i < numpmid; i++) {
+ cluster = proc_pmid_cluster(pmidlist[i]);
+ if (cluster >= MIN_CLUSTER && cluster < NUM_CLUSTERS)
+ need_refresh[cluster]++;
+ }
+
+ have_access = proc_ctx_access(pmda->e_context) || all_access;
+ proc_refresh(pmda, need_refresh);
+ sts = pmdaFetch(numpmid, pmidlist, resp, pmda);
+ have_access = proc_ctx_revert(pmda->e_context);
+ return sts;
+}
+
+static int
+proc_store(pmResult *result, pmdaExt *pmda)
+{
+ int i, sts = 0;
+
+ have_access = proc_ctx_access(pmda->e_context) || all_access;
+
+ for (i = 0; i < result->numpmid; i++) {
+ pmValueSet *vsp = result->vset[i];
+ __pmID_int *idp = (__pmID_int *)&(vsp->pmid);
+ pmAtomValue av;
+
+ if (idp->cluster != CLUSTER_CONTROL)
+ sts = PM_ERR_PERMISSION;
+ else if (vsp->numval != 1)
+ sts = PM_ERR_INST;
+ else switch (idp->item) {
+ case 1: /* proc.control.all.threads */
+ if (!have_access)
+ sts = PM_ERR_PERMISSION;
+ else if ((sts = pmExtractValue(vsp->valfmt, &vsp->vlist[0],
+ PM_TYPE_U32, &av, PM_TYPE_U32)) >= 0) {
+ if (av.ul > 1) /* only zero or one allowed */
+ sts = PM_ERR_CONV;
+ else
+ threads = av.ul;
+ }
+ break;
+ case 2: /* proc.control.perclient.threads */
+ if ((sts = pmExtractValue(vsp->valfmt, &vsp->vlist[0],
+ PM_TYPE_U32, &av, PM_TYPE_U32)) >= 0) {
+ sts = proc_ctx_set_threads(pmda->e_context, av.ul);
+ }
+ break;
+ case 3: /* proc.control.perclient.cgroups */
+ if ((sts = pmExtractValue(vsp->valfmt, &vsp->vlist[0],
+ PM_TYPE_STRING, &av, PM_TYPE_STRING)) >= 0) {
+ if ((sts = proc_ctx_set_cgroups(pmda->e_context, av.cp)) < 0)
+ free(av.cp);
+ }
+ break;
+ default:
+ sts = PM_ERR_PERMISSION;
+ }
+ if (sts < 0)
+ break;
+ }
+
+ have_access = proc_ctx_revert(pmda->e_context);
+ return sts;
+}
+
+static int
+proc_text(int ident, int type, char **buf, pmdaExt *pmda)
+{
+ if ((type & PM_TEXT_PMID) == PM_TEXT_PMID) {
+ int sts = pmdaDynamicLookupText(ident, type, buf, pmda);
+ if (sts != -ENOENT)
+ return sts;
+ }
+ return pmdaText(ident, type, buf, pmda);
+}
+
+static int
+proc_pmid(const char *name, pmID *pmid, pmdaExt *pmda)
+{
+ pmdaNameSpace *tree = pmdaDynamicLookupName(pmda, name);
+ if (tree == NULL)
+ return PM_ERR_NAME;
+ if (pmDebug & DBG_TRACE_APPL2) {
+ fprintf(stderr, "proc_pmid: name=%s tree:\n", name);
+ __pmDumpNameNode(stderr, tree->root, 1);
+ }
+ return pmdaTreePMID(tree, name, pmid);
+}
+
+static int
+proc_name(pmID pmid, char ***nameset, pmdaExt *pmda)
+{
+ pmdaNameSpace *tree = pmdaDynamicLookupPMID(pmda, pmid);
+ if (tree == NULL)
+ return PM_ERR_PMID;
+ if (pmDebug & DBG_TRACE_APPL2) {
+ fprintf(stderr, "proc_name: pmid=%s tree:\n", pmIDStr(pmid));
+ __pmDumpNameNode(stderr, tree->root, 1);
+ }
+ return pmdaTreeName(tree, pmid, nameset);
+}
+
+static int
+proc_children(const char *name, int flag, char ***kids, int **sts, pmdaExt *pmda)
+{
+ pmdaNameSpace *tree = pmdaDynamicLookupName(pmda, name);
+ if (tree == NULL)
+ return PM_ERR_NAME;
+ if (pmDebug & DBG_TRACE_APPL2) {
+ fprintf(stderr, "proc_children: name=%s flag=%d tree:\n", name, flag);
+ __pmDumpNameNode(stderr, tree->root, 1);
+ }
+ return pmdaTreeChildren(tree, name, flag, kids, sts);
+}
+
+/*
+ * Helper routines for accessing a generic static string dictionary
+ */
+
+char *
+proc_strings_lookup(int index)
+{
+ char *value;
+ pmInDom dict = INDOM(STRINGS_INDOM);
+
+ if (pmdaCacheLookup(dict, index, &value, NULL) == PMDA_CACHE_ACTIVE)
+ return value;
+ return "";
+}
+
+int
+proc_strings_insert(const char *buf)
+{
+ pmInDom dict = INDOM(STRINGS_INDOM);
+ return pmdaCacheStore(dict, PMDA_CACHE_ADD, buf, NULL);
+}
+
+/*
+ * Initialise the agent (both daemon and DSO).
+ */
+
+void
+__PMDA_INIT_CALL
+proc_init(pmdaInterface *dp)
+{
+ int nindoms = sizeof(indomtab)/sizeof(indomtab[0]);
+ int nmetrics = sizeof(metrictab)/sizeof(metrictab[0]);
+ char *envpath;
+
+ _pm_system_pagesize = getpagesize();
+ if ((envpath = getenv("PROC_STATSPATH")) != NULL)
+ proc_statspath = envpath;
+
+ if (_isDSO) {
+ char helppath[MAXPATHLEN];
+ int sep = __pmPathSeparator();
+ snprintf(helppath, sizeof(helppath), "%s%c" "proc" "%c" "help",
+ pmGetConfig("PCP_PMDAS_DIR"), sep, sep);
+ pmdaDSO(dp, PMDA_INTERFACE_6, "proc DSO", helppath);
+ }
+
+ if (dp->status != 0)
+ return;
+ dp->comm.flags |= PDU_FLAG_AUTH;
+
+ dp->version.six.instance = proc_instance;
+ dp->version.six.store = proc_store;
+ dp->version.six.fetch = proc_fetch;
+ dp->version.six.text = proc_text;
+ dp->version.six.pmid = proc_pmid;
+ dp->version.six.name = proc_name;
+ dp->version.six.children = proc_children;
+ dp->version.six.attribute = proc_ctx_attrs;
+ pmdaSetEndContextCallBack(dp, proc_ctx_end);
+ pmdaSetFetchCallBack(dp, proc_fetchCallBack);
+
+ /*
+ * Initialize the instance domain table.
+ */
+ indomtab[CPU_INDOM].it_indom = CPU_INDOM;
+ indomtab[DISK_INDOM].it_indom = DISK_INDOM;
+ indomtab[DEVT_INDOM].it_indom = DEVT_INDOM;
+ indomtab[PROC_INDOM].it_indom = PROC_INDOM;
+ indomtab[STRINGS_INDOM].it_indom = STRINGS_INDOM;
+ indomtab[CGROUP_SUBSYS_INDOM].it_indom = CGROUP_SUBSYS_INDOM;
+ indomtab[CGROUP_MOUNTS_INDOM].it_indom = CGROUP_MOUNTS_INDOM;
+
+ proc_pid.indom = &indomtab[PROC_INDOM];
+
+ /*
+ * Read System.map and /proc/ksyms. Used to translate wait channel
+ * addresses to symbol names.
+ * Added by Mike Mason <mmlnx@us.ibm.com>
+ */
+ read_ksym_sources(kernel_uname.release);
+
+ cgroup_init(metrictab, nmetrics);
+ proc_ctx_init();
+
+ pmdaSetFlags(dp, PMDA_EXT_FLAG_HASHED);
+ pmdaInit(dp, indomtab, nindoms, metrictab, nmetrics);
+
+ /* string metrics use the pmdaCache API for value indexing */
+ pmdaCacheOp(INDOM(STRINGS_INDOM), PMDA_CACHE_STRINGS);
+
+ /* cgroup metrics use the pmdaCache API for indom indexing */
+ pmdaCacheOp(INDOM(CPU_INDOM), PMDA_CACHE_CULL);
+ pmdaCacheOp(INDOM(DISK_INDOM), PMDA_CACHE_CULL);
+ pmdaCacheOp(INDOM(CGROUP_SUBSYS_INDOM), PMDA_CACHE_CULL);
+ pmdaCacheOp(INDOM(CGROUP_MOUNTS_INDOM), PMDA_CACHE_CULL);
+}
+
+pmLongOptions longopts[] = {
+ PMDA_OPTIONS_HEADER("Options"),
+ PMOPT_DEBUG,
+ { "no-access-checks", 0, 'A', 0, "no access checks will be performed (insecure, beware!)" },
+ PMDAOPT_DOMAIN,
+ PMDAOPT_LOGFILE,
+ { "with-threads", 0, 'L', 0, "include threads in the all-processes instance domain" },
+ { "from-cgroup", 1, 'r', "NAME", "restrict monitoring to processes in the named cgroup" },
+ PMDAOPT_USERNAME,
+ PMOPT_HELP,
+ PMDA_OPTIONS_END
+};
+
+pmdaOptions opts = {
+ .short_options = "AD:d:l:Lr:U:?",
+ .long_options = longopts,
+};
+
+int
+main(int argc, char **argv)
+{
+ int c, sep = __pmPathSeparator();
+ pmdaInterface dispatch;
+ char helppath[MAXPATHLEN];
+ char *username = "root";
+
+ _isDSO = 0;
+ __pmSetProgname(argv[0]);
+ snprintf(helppath, sizeof(helppath), "%s%c" "proc" "%c" "help",
+ pmGetConfig("PCP_PMDAS_DIR"), sep, sep);
+ pmdaDaemon(&dispatch, PMDA_INTERFACE_6, pmProgname, PROC, "proc.log", helppath);
+
+ while ((c = pmdaGetOptions(argc, argv, &opts, &dispatch)) != EOF) {
+ switch (c) {
+ case 'A':
+ all_access = 1;
+ break;
+ case 'L':
+ threads = 1;
+ break;
+ case 'r':
+ cgroups = opts.optarg;
+ break;
+ }
+ }
+
+ if (opts.errors) {
+ pmdaUsageMessage(&opts);
+ exit(1);
+ }
+ if (opts.username)
+ username = opts.username;
+
+ pmdaOpenLog(&dispatch);
+ __pmSetProcessIdentity(username);
+
+ proc_init(&dispatch);
+ pmdaConnect(&dispatch);
+ pmdaMain(&dispatch);
+ exit(0);
+}
diff --git a/src/pmdas/linux_proc/proc_pid.c b/src/pmdas/linux_proc/proc_pid.c
new file mode 100644
index 0000000..152d96c
--- /dev/null
+++ b/src/pmdas/linux_proc/proc_pid.c
@@ -0,0 +1,957 @@
+/*
+ * Linux proc/<pid>/{stat,statm,status,...} Clusters
+ *
+ * Copyright (c) 2013 Red Hat.
+ * Copyright (c) 2000,2004,2006 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2010 Aconex. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "pmapi.h"
+#include "impl.h"
+#include "pmda.h"
+#include <ctype.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include "proc_pid.h"
+#include "indom.h"
+
+static proc_pid_list_t pids;
+
+static int
+compare_pid(const void *pa, const void *pb)
+{
+ int a = *(int *)pa;
+ int b = *(int *)pb;
+ return a - b;
+}
+
+static void
+pidlist_append_pid(int pid)
+{
+ if (pids.count >= pids.size) {
+ pids.size += 64;
+ if (!(pids.pids = (int *)realloc(pids.pids, pids.size * sizeof(int)))) {
+ perror("pidlist_append: out of memory");
+ pids.size = pids.count = 0;
+ return; /* soldier on bravely */
+ }
+ }
+ pids.pids[pids.count++] = pid;
+}
+
+static void
+pidlist_append(const char *pidname)
+{
+ pidlist_append_pid(atoi(pidname));
+}
+
+static void
+tasklist_append(const char *pid)
+{
+ DIR *taskdirp;
+ struct dirent *tdp;
+ char taskpath[1024];
+
+ sprintf(taskpath, "%s/proc/%s/task", proc_statspath, pid);
+ if ((taskdirp = opendir(taskpath)) != NULL) {
+ while ((tdp = readdir(taskdirp)) != NULL) {
+ if (!isdigit((int)tdp->d_name[0]) || strcmp(pid, tdp->d_name) == 0)
+ continue;
+ pidlist_append(tdp->d_name);
+ }
+ closedir(taskdirp);
+ }
+}
+
+static int
+refresh_cgroup_pidlist(int want_threads, const char *cgroup)
+{
+ char path[MAXPATHLEN];
+ FILE *fp;
+ int pid;
+
+ /*
+ * We're running in cgroups mode where a subset of the processes is
+ * going to be returned based on the cgroup specified earlier via a
+ * store into the proc.control.{all,perclient}.cgroups metric.
+ *
+ * Use the "cgroup.procs" or "tasks" file depending on want_threads.
+ * Note that both these files are already sorted, ascending numeric.
+ */
+ if (want_threads)
+ snprintf(path, sizeof(path), "%s%s/tasks", proc_statspath, cgroup);
+ else
+ snprintf(path, sizeof(path), "%s%s/cgroup.procs", proc_statspath, cgroup);
+
+ if ((fp = fopen(path, "r")) != NULL) {
+ while (fscanf(fp, "%d\n", &pid) == 1)
+ pidlist_append_pid(pid);
+ fclose(fp);
+ }
+ return 0;
+}
+
+static int
+refresh_global_pidlist(int want_threads)
+{
+ DIR *dirp;
+ struct dirent *dp;
+ char path[MAXPATHLEN];
+
+ snprintf(path, sizeof(path), "%s/proc", proc_statspath);
+ if ((dirp = opendir(path)) == NULL)
+ return -oserror();
+
+ /* note: readdir on /proc ignores threads */
+ while ((dp = readdir(dirp)) != NULL) {
+ if (isdigit((int)dp->d_name[0])) {
+ pidlist_append(dp->d_name);
+ if (want_threads)
+ tasklist_append(dp->d_name);
+ }
+ }
+ closedir(dirp);
+
+ qsort(pids.pids, pids.count, sizeof(int), compare_pid);
+ return 0;
+}
+
+static void
+refresh_proc_pidlist(proc_pid_t *proc_pid)
+{
+ int i;
+ int fd;
+ char *p;
+ char buf[MAXPATHLEN];
+ __pmHashNode *node, *next, *prev;
+ proc_pid_entry_t *ep;
+ pmdaIndom *indomp = proc_pid->indom;
+
+ if (indomp->it_numinst < pids.count)
+ indomp->it_set = (pmdaInstid *)realloc(indomp->it_set,
+ pids.count * sizeof(pmdaInstid));
+ indomp->it_numinst = pids.count;
+
+ /*
+ * invalidate all entries so we can harvest pids that have exited
+ */
+ for (i=0; i < proc_pid->pidhash.hsize; i++) {
+ for (node=proc_pid->pidhash.hash[i]; node != NULL; node = node->next) {
+ ep = (proc_pid_entry_t *)node->data;
+ ep->flags = 0;
+ }
+ }
+
+ /*
+ * walk pid list and add new pids to the hash table,
+ * marking entries valid as we go ...
+ */
+ for (i=0; i < pids.count; i++) {
+ node = __pmHashSearch(pids.pids[i], &proc_pid->pidhash);
+ if (node == NULL) {
+ int k = 0;
+
+ ep = (proc_pid_entry_t *)malloc(sizeof(proc_pid_entry_t));
+ memset(ep, 0, sizeof(proc_pid_entry_t));
+
+ ep->id = pids.pids[i];
+
+ snprintf(buf, sizeof(buf), "%s/proc/%d/cmdline", proc_statspath, pids.pids[i]);
+ if ((fd = open(buf, O_RDONLY)) >= 0) {
+ sprintf(buf, "%06d ", pids.pids[i]);
+ if ((k = read(fd, buf+7, sizeof(buf)-8)) > 0) {
+ p = buf + k +7;
+ *p-- = '\0';
+ /* Skip trailing nils, i.e. don't replace them */
+ while (buf+7 < p) {
+ if (*p-- != '\0') {
+ break;
+ }
+ }
+ /* Remove NULL terminators from cmdline string array */
+ /* Suggested by Mike Mason <mmlnx@us.ibm.com> */
+ while (buf+7 < p) {
+ if (*p == '\0') *p = ' ';
+ p--;
+ }
+ }
+ close(fd);
+ }
+
+ if (k == 0) {
+ /*
+ * If a process is swapped out, /proc/<pid>/cmdline
+ * returns an empty string so we have to get it
+ * from /proc/<pid>/status or /proc/<pid>/stat
+ */
+ sprintf(buf, "%s/proc/%d/status", proc_statspath, pids.pids[i]);
+ if ((fd = open(buf, O_RDONLY)) >= 0) {
+ /* We engage in a bit of a hanky-panky here:
+ * the string should look like "123456 (name)",
+ * we get it from /proc/XX/status as "Name: name\n...",
+ * to fit the 6 digits of PID and opening parenthesis,
+ * save 2 bytes at the start of the buffer.
+ * And don't forget to leave 2 bytes for the trailing
+ * parenthesis and the nil. Here is
+ * an example of what we're trying to achieve:
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * | | | N| a| m| e| :|\t| i| n| i| t|\n| S|...
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * | 0| 0| 0| 0| 0| 1| | (| i| n| i| t| )|\0|...
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+ */
+ if ((k = read(fd, buf+2, sizeof(buf)-4)) > 0) {
+ int bc;
+
+ if ((p = strchr(buf+2, '\n')) == NULL)
+ p = buf+k;
+ p[0] = ')';
+ p[1] = '\0';
+ bc = sprintf(buf, "%06d ", pids.pids[i]);
+ buf[bc] = '(';
+ }
+ close(fd);
+ }
+ }
+
+ if (k <= 0) {
+ /* hmm .. must be exiting */
+ sprintf(buf, "%06d <exiting>", pids.pids[i]);
+ }
+
+ ep->name = strdup(buf);
+
+ __pmHashAdd(pids.pids[i], (void *)ep, &proc_pid->pidhash);
+ // fprintf(stderr, "## ADDED \"%s\" to hash table\n", buf);
+ }
+ else
+ ep = (proc_pid_entry_t *)node->data;
+
+ /* mark pid as still existing */
+ ep->flags |= PROC_PID_FLAG_VALID;
+
+ /* refresh the indom pointer */
+ indomp->it_set[i].i_inst = ep->id;
+ indomp->it_set[i].i_name = ep->name;
+ }
+
+ /*
+ * harvest exited pids from the pid hash table
+ */
+ for (i=0; i < proc_pid->pidhash.hsize; i++) {
+ for (prev=NULL, node=proc_pid->pidhash.hash[i]; node != NULL;) {
+ next = node->next;
+ ep = (proc_pid_entry_t *)node->data;
+ // fprintf(stderr, "CHECKING key=%d node=" PRINTF_P_PFX "%p prev=" PRINTF_P_PFX "%p next=" PRINTF_P_PFX "%p ep=" PRINTF_P_PFX "%p valid=%d\n",
+ // ep->id, node, prev, node->next, ep, ep->valid);
+ if (!(ep->flags & PROC_PID_FLAG_VALID)) {
+ // fprintf(stderr, "DELETED key=%d name=\"%s\"\n", ep->id, ep->name);
+ if (ep->name != NULL)
+ free(ep->name);
+ if (ep->stat_buf != NULL)
+ free(ep->stat_buf);
+ if (ep->status_buf != NULL)
+ free(ep->status_buf);
+ if (ep->statm_buf != NULL)
+ free(ep->statm_buf);
+ if (ep->maps_buf != NULL)
+ free(ep->maps_buf);
+ if (ep->schedstat_buf != NULL)
+ free(ep->schedstat_buf);
+ if (ep->io_buf != NULL)
+ free(ep->io_buf);
+ if (ep->wchan_buf != NULL)
+ free(ep->wchan_buf);
+
+ if (prev == NULL)
+ proc_pid->pidhash.hash[i] = node->next;
+ else
+ prev->next = node->next;
+ free(ep);
+ free(node);
+ }
+ else {
+ prev = node;
+ }
+ if ((node = next) == NULL)
+ break;
+ }
+ }
+}
+
+int
+refresh_proc_pid(proc_pid_t *proc_pid, int threads, const char *cgroups)
+{
+ int sts;
+
+ pids.count = 0;
+ pids.threads = threads;
+
+ sts = (cgroups && cgroups[0] != '\0') ?
+ refresh_cgroup_pidlist(threads, cgroups) :
+ refresh_global_pidlist(threads);
+ if (sts < 0)
+ return sts;
+
+#if PCP_DEBUG
+ if (pmDebug & DBG_TRACE_LIBPMDA)
+ fprintf(stderr,
+ "refresh_proc_pid: %d pids (threads=%d, cgroups=\"%s\")\n",
+ sts, threads, cgroups ? cgroups : "");
+#endif
+
+ refresh_proc_pidlist(proc_pid);
+ return 0;
+}
+
+
+/*
+ * Open a proc file, taking into account that we may want thread info
+ * rather than process information.
+ *
+ * We make (ab)use of some obscure Linux procfs mechanisms here!
+ * Even though readdir(/proc) does not contain tasks, we can still open
+ * taskid directory files; on top of that, the tasks sub-directory in a
+ * task group has all (peer) tasks in that group, even for "children".
+ */
+static int
+proc_open(const char *base, proc_pid_entry_t *ep)
+{
+ int fd;
+ char buf[128];
+
+ if (pids.threads) {
+ sprintf(buf, "%s/proc/%d/task/%d/%s", proc_statspath, ep->id, ep->id, base);
+ if ((fd = open(buf, O_RDONLY)) >= 0)
+ return fd;
+ /* fallback to /proc path if task path open fails */
+ }
+ sprintf(buf, "%s/proc/%d/%s", proc_statspath, ep->id, base);
+ return open(buf, O_RDONLY);
+}
+
+static DIR *
+proc_opendir(const char *base, proc_pid_entry_t *ep)
+{
+ DIR *dir;
+ char buf[128];
+
+ if (pids.threads) {
+ sprintf(buf, "%s/proc/%d/task/%d/%s", proc_statspath, ep->id, ep->id, base);
+ if ((dir = opendir(buf)) != NULL)
+ return dir;
+ /* fallback to /proc path if task path opendir fails */
+ }
+ sprintf(buf, "%s/proc/%d/%s", proc_statspath, ep->id, base);
+ return opendir(buf);
+}
+
+/*
+ * fetch a proc/<pid>/stat entry for pid
+ */
+proc_pid_entry_t *
+fetch_proc_pid_stat(int id, proc_pid_t *proc_pid)
+{
+ int fd;
+ int sts = 0;
+ int n;
+ __pmHashNode *node = __pmHashSearch(id, &proc_pid->pidhash);
+ proc_pid_entry_t *ep;
+ char buf[1024];
+
+ if (node == NULL) {
+#if PCP_DEBUG
+ if ((pmDebug & (DBG_TRACE_LIBPMDA|DBG_TRACE_DESPERATE)) == (DBG_TRACE_LIBPMDA|DBG_TRACE_DESPERATE)) {
+ char ibuf[1024];
+ fprintf(stderr, "fetch_proc_pid_stat: __pmHashSearch(%d, hash[%s]) -> NULL\n", id, pmInDomStr_r(proc_pid->indom->it_indom, ibuf, sizeof(ibuf)));
+ }
+#endif
+ return NULL;
+ }
+ ep = (proc_pid_entry_t *)node->data;
+
+ if (!(ep->flags & PROC_PID_FLAG_STAT_FETCHED)) {
+ if ((fd = proc_open("stat", ep)) < 0) {
+ sts = -oserror();
+#if PCP_DEBUG
+ if ((pmDebug & (DBG_TRACE_LIBPMDA|DBG_TRACE_DESPERATE)) == (DBG_TRACE_LIBPMDA|DBG_TRACE_DESPERATE)) {
+ char ibuf[1024];
+ char ebuf[1024];
+ fprintf(stderr, "fetch_proc_pid_stat: proc_open(\"stat\", ...) failed: id=%d, indom=%s, sts=%s\n", id, pmInDomStr_r(proc_pid->indom->it_indom, ibuf, sizeof(ibuf)), pmErrStr_r(sts, ebuf, sizeof(ebuf)));
+ }
+#endif
+ }
+ else {
+ if ((n = read(fd, buf, sizeof(buf))) < 0) {
+ sts = -oserror();
+#if PCP_DEBUG
+ if ((pmDebug & (DBG_TRACE_LIBPMDA|DBG_TRACE_DESPERATE)) == (DBG_TRACE_LIBPMDA|DBG_TRACE_DESPERATE)) {
+ char ibuf[1024];
+ char ebuf[1024];
+ fprintf(stderr, "fetch_proc_pid_stat: read \"stat\" failed: id=%d, indom=%s, sts=%s\n", id, pmInDomStr_r(proc_pid->indom->it_indom, ibuf, sizeof(ibuf)), pmErrStr_r(sts, ebuf, sizeof(ebuf)));
+ }
+#endif
+ }
+ else {
+ if (n == 0) {
+ /* eh? */
+ sts = -1;
+#if PCP_DEBUG
+ if ((pmDebug & (DBG_TRACE_LIBPMDA|DBG_TRACE_DESPERATE)) == (DBG_TRACE_LIBPMDA|DBG_TRACE_DESPERATE)) {
+ char ibuf[1024];
+ fprintf(stderr, "fetch_proc_pid_stat: read \"stat\" EOF?: id=%d, indom=%s\n", id, pmInDomStr_r(proc_pid->indom->it_indom, ibuf, sizeof(ibuf)));
+ }
+#endif
+ }
+ else {
+ if (ep->stat_buflen <= n) {
+ ep->stat_buflen = n;
+ ep->stat_buf = (char *)realloc(ep->stat_buf, n);
+ }
+ memcpy(ep->stat_buf, buf, n);
+ ep->stat_buf[n-1] = '\0';
+ sts = 0;
+ }
+ }
+ }
+ if (fd >= 0)
+ close(fd);
+ ep->flags |= PROC_PID_FLAG_STAT_FETCHED;
+ }
+
+ if (!(ep->flags & PROC_PID_FLAG_WCHAN_FETCHED)) {
+ if ((fd = proc_open("wchan", ep)) < 0) {
+ /* ignore failure here, backwards compat */
+ ;
+ }
+ else {
+ if ((n = read(fd, buf, sizeof(buf)-1)) < 0) {
+ sts = -oserror();
+#if PCP_DEBUG
+ if ((pmDebug & (DBG_TRACE_LIBPMDA|DBG_TRACE_DESPERATE)) == (DBG_TRACE_LIBPMDA|DBG_TRACE_DESPERATE)) {
+ char ibuf[1024];
+ char ebuf[1024];
+ fprintf(stderr, "fetch_proc_pid_stat: read \"wchan\" failed: id=%d, indom=%s, sts=%s\n", id, pmInDomStr_r(proc_pid->indom->it_indom, ibuf, sizeof(ibuf)), pmErrStr_r(sts, ebuf, sizeof(ebuf)));
+ }
+#endif
+ }
+ else {
+ if (n == 0) {
+ /* wchan is empty, nothing to add here */
+ ;
+ }
+ else {
+ n++; /* no terminating null (from kernel) */
+ if (ep->wchan_buflen <= n) {
+ ep->wchan_buflen = n;
+ ep->wchan_buf = (char *)realloc(ep->wchan_buf, n);
+ }
+ memcpy(ep->wchan_buf, buf, n-1);
+ ep->wchan_buf[n-1] = '\0';
+ }
+ }
+ }
+ if (fd >= 0)
+ close(fd);
+ ep->flags |= PROC_PID_FLAG_WCHAN_FETCHED;
+ }
+
+ if (sts < 0)
+ return NULL;
+ return ep;
+}
+
+/*
+ * fetch a proc/<pid>/status entry for pid
+ * Added by Mike Mason <mmlnx@us.ibm.com>
+ */
+proc_pid_entry_t *
+fetch_proc_pid_status(int id, proc_pid_t *proc_pid)
+{
+ int sts = 0;
+ __pmHashNode *node = __pmHashSearch(id, &proc_pid->pidhash);
+ proc_pid_entry_t *ep;
+
+ if (node == NULL)
+ return NULL;
+ ep = (proc_pid_entry_t *)node->data;
+
+ if (!(ep->flags & PROC_PID_FLAG_STATUS_FETCHED)) {
+ int fd;
+ int n;
+ char buf[1024];
+ char *curline;
+
+ if ((fd = proc_open("status", ep)) < 0)
+ sts = -oserror();
+ else if ((n = read(fd, buf, sizeof(buf))) < 0)
+ sts = -oserror();
+ else {
+ if (n == 0)
+ sts = -1;
+ else {
+ if (ep->status_buflen < n) {
+ ep->status_buflen = n;
+ ep->status_buf = (char *)realloc(ep->status_buf, n);
+ }
+
+ if (ep->status_buf == NULL)
+ sts = -1;
+ else {
+ memcpy(ep->status_buf, buf, n);
+ ep->status_buf[n-1] = '\0';
+ }
+ }
+ }
+
+ if (sts == 0) {
+ /* assign pointers to individual lines in buffer */
+ curline = ep->status_buf;
+
+ while (strncmp(curline, "Uid:", 4)) {
+ curline = index(curline, '\n') + 1;
+ }
+
+ /* user & group IDs */
+ ep->status_lines.uid = strsep(&curline, "\n");
+ ep->status_lines.gid = strsep(&curline, "\n");
+
+ while (curline) {
+ if (strncmp(curline, "VmSize:", 7) == 0) {
+ /* memory info - these lines don't exist for kernel threads */
+ ep->status_lines.vmsize = strsep(&curline, "\n");
+ ep->status_lines.vmlck = strsep(&curline, "\n");
+ if (strncmp(curline, "VmRSS:", 6) != 0)
+ curline = index(curline, '\n') + 1; // Have VmPin: ?
+ if (strncmp(curline, "VmRSS:", 6) != 0)
+ curline = index(curline, '\n') + 1; // Have VmHWM: ?
+ ep->status_lines.vmrss = strsep(&curline, "\n");
+ ep->status_lines.vmdata = strsep(&curline, "\n");
+ ep->status_lines.vmstk = strsep(&curline, "\n");
+ ep->status_lines.vmexe = strsep(&curline, "\n");
+ ep->status_lines.vmlib = strsep(&curline, "\n");
+ curline = index(curline, '\n') + 1; // skip VmPTE
+ ep->status_lines.vmswap = strsep(&curline, "\n");
+ ep->status_lines.threads = strsep(&curline, "\n");
+ } else
+ if (strncmp(curline, "SigPnd:", 7) == 0) {
+ /* signal masks */
+ ep->status_lines.sigpnd = strsep(&curline, "\n");
+ ep->status_lines.sigblk = strsep(&curline, "\n");
+ ep->status_lines.sigign = strsep(&curline, "\n");
+ ep->status_lines.sigcgt = strsep(&curline, "\n");
+ break; /* we're done */
+ } else {
+ curline = index(curline, '\n') + 1;
+ }
+ }
+ }
+ if (fd >= 0)
+ close(fd);
+ ep->flags |= PROC_PID_FLAG_STATUS_FETCHED;
+ }
+
+ return (sts < 0) ? NULL : ep;
+}
+
+/*
+ * fetch a proc/<pid>/statm entry for pid
+ */
+proc_pid_entry_t *
+fetch_proc_pid_statm(int id, proc_pid_t *proc_pid)
+{
+ int sts = 0;
+ __pmHashNode *node = __pmHashSearch(id, &proc_pid->pidhash);
+ proc_pid_entry_t *ep;
+
+ if (node == NULL)
+ return NULL;
+ ep = (proc_pid_entry_t *)node->data;
+
+ if (!(ep->flags & PROC_PID_FLAG_STATM_FETCHED)) {
+ char buf[1024];
+ int fd, n;
+
+ if ((fd = proc_open("statm", ep)) < 0)
+ sts = -oserror();
+ else
+ if ((n = read(fd, buf, sizeof(buf))) < 0)
+ sts = -oserror();
+ else {
+ if (n == 0)
+ /* eh? */
+ sts = -1;
+ else {
+ if (ep->statm_buflen <= n) {
+ ep->statm_buflen = n;
+ ep->statm_buf = (char *)realloc(ep->statm_buf, n);
+ }
+ memcpy(ep->statm_buf, buf, n);
+ ep->statm_buf[n-1] = '\0';
+ }
+ }
+
+ if (fd >= 0)
+ close(fd);
+ ep->flags |= PROC_PID_FLAG_STATM_FETCHED;
+ }
+
+ return (sts < 0) ? NULL : ep;
+}
+
+
+/*
+ * fetch a proc/<pid>/maps entry for pid
+ * WARNING: This can be very large! Only ask for it if you really need it.
+ * Added by Mike Mason <mmlnx@us.ibm.com>
+ */
+proc_pid_entry_t *
+fetch_proc_pid_maps(int id, proc_pid_t *proc_pid)
+{
+ int sts = 0;
+ __pmHashNode *node = __pmHashSearch(id, &proc_pid->pidhash);
+ proc_pid_entry_t *ep;
+ char *maps_bufptr = NULL;
+
+ if (node == NULL)
+ return NULL;
+ ep = (proc_pid_entry_t *)node->data;
+
+ if (!(ep->flags & PROC_PID_FLAG_MAPS_FETCHED)) {
+ int fd;
+
+ if ((fd = proc_open("maps", ep)) < 0)
+ sts = -oserror();
+ else {
+ char buf[1024];
+ int n, len = 0;
+
+ while ((n = read(fd, buf, sizeof(buf))) > 0) {
+ len += n;
+ if (ep->maps_buflen <= len) {
+ ep->maps_buflen = len + 1;
+ ep->maps_buf = (char *)realloc(ep->maps_buf, ep->maps_buflen);
+ }
+ maps_bufptr = ep->maps_buf + len - n;
+ memcpy(maps_bufptr, buf, n);
+ }
+ ep->flags |= PROC_PID_FLAG_MAPS_FETCHED;
+ /* If there are no maps, make maps_buf point to a zero length string. */
+ if (ep->maps_buflen == 0) {
+ ep->maps_buf = (char *)malloc(1);
+ ep->maps_buflen = 1;
+ }
+ ep->maps_buf[ep->maps_buflen - 1] = '\0';
+ close(fd);
+ }
+ }
+
+ return (sts < 0) ? NULL : ep;
+}
+
+/*
+ * fetch a proc/<pid>/schedstat entry for pid
+ */
+proc_pid_entry_t *
+fetch_proc_pid_schedstat(int id, proc_pid_t *proc_pid)
+{
+ int sts = 0;
+ __pmHashNode *node = __pmHashSearch(id, &proc_pid->pidhash);
+ proc_pid_entry_t *ep;
+
+ if (node == NULL)
+ return NULL;
+ ep = (proc_pid_entry_t *)node->data;
+
+ if (!(ep->flags & PROC_PID_FLAG_SCHEDSTAT_FETCHED)) {
+ int fd, n;
+ char buf[1024];
+
+ if ((fd = proc_open("schedstat", ep)) < 0)
+ sts = -oserror();
+ else
+ if ((n = read(fd, buf, sizeof(buf))) < 0)
+ sts = -oserror();
+ else {
+ if (n == 0)
+ /* eh? */
+ sts = -1;
+ else {
+ if (ep->schedstat_buflen <= n) {
+ ep->schedstat_buflen = n;
+ ep->schedstat_buf = (char *)realloc(ep->schedstat_buf, n);
+ }
+ memcpy(ep->schedstat_buf, buf, n);
+ ep->schedstat_buf[n-1] = '\0';
+ }
+ }
+ if (fd >= 0) {
+ close(fd);
+ }
+ ep->flags |= PROC_PID_FLAG_SCHEDSTAT_FETCHED;
+ }
+
+ return (sts < 0) ? NULL : ep;
+}
+
+/*
+ * fetch a proc/<pid>/io entry for pid
+ *
+ * Depends on kernel built with CONFIG_TASK_IO_ACCOUNTING=y
+ * which means the following must also be set:
+ * CONFIG_TASKSTATS=y
+ * CONFIG_TASK_DELAY_ACCT=y
+ * CONFIG_TASK_XACCT=y
+ */
+proc_pid_entry_t *
+fetch_proc_pid_io(int id, proc_pid_t *proc_pid)
+{
+ int sts = 0;
+ __pmHashNode *node = __pmHashSearch(id, &proc_pid->pidhash);
+ proc_pid_entry_t *ep;
+
+ if (node == NULL)
+ return NULL;
+ ep = (proc_pid_entry_t *)node->data;
+
+ if (!(ep->flags & PROC_PID_FLAG_IO_FETCHED)) {
+ int fd, n;
+ char buf[1024];
+ char *curline;
+
+ if ((fd = proc_open("io", ep)) < 0)
+ sts = -oserror();
+ else if ((n = read(fd, buf, sizeof(buf))) < 0)
+ sts = -oserror();
+ else {
+ if (n == 0)
+ sts = -1;
+ else {
+ if (ep->io_buflen < n) {
+ ep->io_buflen = n;
+ ep->io_buf = (char *)realloc(ep->io_buf, n);
+ }
+
+ if (ep->io_buf == NULL)
+ sts = -1;
+ else {
+ memcpy(ep->io_buf, buf, n);
+ ep->io_buf[n-1] = '\0';
+ }
+ }
+ }
+
+ if (sts == 0) {
+ /* assign pointers to individual lines in buffer */
+ curline = ep->io_buf;
+ ep->io_lines.rchar = strsep(&curline, "\n");
+ ep->io_lines.wchar = strsep(&curline, "\n");
+ ep->io_lines.syscr = strsep(&curline, "\n");
+ ep->io_lines.syscw = strsep(&curline, "\n");
+ ep->io_lines.readb = strsep(&curline, "\n");
+ ep->io_lines.writeb = strsep(&curline, "\n");
+ ep->io_lines.cancel = strsep(&curline, "\n");
+ ep->flags |= PROC_PID_FLAG_IO_FETCHED;
+ }
+ if (fd >= 0)
+ close(fd);
+ }
+
+ return (sts < 0) ? NULL : ep;
+}
+
+/*
+ * fetch a proc/<pid>/fd entry for pid
+ */
+proc_pid_entry_t *
+fetch_proc_pid_fd(int id, proc_pid_t *proc_pid)
+{
+ __pmHashNode *node = __pmHashSearch(id, &proc_pid->pidhash);
+ proc_pid_entry_t *ep;
+
+ if (node == NULL)
+ return NULL;
+ ep = (proc_pid_entry_t *)node->data;
+
+ if (!(ep->flags & PROC_PID_FLAG_FD_FETCHED)) {
+ uint32_t de_count = 0;
+ DIR *dir = proc_opendir("fd", ep);
+
+ if (dir == NULL) {
+#if PCP_DEBUG
+ if (pmDebug & DBG_TRACE_LIBPMDA)
+ fprintf(stderr, "failed to open fd path for pid %d\n", ep->id);
+#endif
+ return NULL;
+ }
+ while (readdir(dir) != NULL) {
+ de_count++;
+ }
+ closedir(dir);
+ ep->fd_count = de_count - 2; /* subtract cwd and parent entries */
+ ep->flags |= PROC_PID_FLAG_FD_FETCHED;
+ }
+
+ return ep;
+}
+
+/*
+ * From the kernel format for a single process cgroup set:
+ * 2:cpu:/
+ * 1:cpuset:/
+ *
+ * Produce the same one-line format string that "ps" uses:
+ * "cpu:/;cpuset:/"
+ */
+static void
+proc_cgroup_reformat(char *buf, int len, char *fmt)
+{
+ char *target = fmt, *p, *s = NULL;
+
+ *target = '\0';
+ for (p = buf; p - buf < len; p++) {
+ if (*p == '\0')
+ break;
+ if (*p == ':' && !s) /* position "s" at start */
+ s = p + 1;
+ if (*p != '\n' || !s) /* find end of this line */
+ continue;
+ if (target != fmt) /* not the first cgroup? */
+ strncat(target, ";", 2);
+ /* have a complete cgroup line now, copy it over */
+ strncat(target, s, (p - s));
+ target += (p - s);
+ s = NULL; /* reset it for new line */
+ }
+}
+
+/*
+ * fetch a proc/<pid>/cgroup entry for pid
+ */
+proc_pid_entry_t *
+fetch_proc_pid_cgroup(int id, proc_pid_t *proc_pid)
+{
+ __pmHashNode *node = __pmHashSearch(id, &proc_pid->pidhash);
+ proc_pid_entry_t *ep;
+ int sts = 0;
+
+ if (node == NULL)
+ return NULL;
+ ep = (proc_pid_entry_t *)node->data;
+
+ if (!(ep->flags & PROC_PID_FLAG_CGROUP_FETCHED)) {
+ char buf[1024];
+ char fmt[1024];
+ int n, fd;
+
+ if ((fd = proc_open("cgroup", ep)) < 0)
+ sts = -oserror();
+ else if ((n = read(fd, buf, sizeof(buf))) < 0)
+ sts = -oserror();
+ else {
+ if (n == 0) {
+ setoserror(ENODATA);
+ sts = -1;
+ }
+ else {
+ /* reformat the buffer to match "ps" output format, then hash */
+ proc_cgroup_reformat(&buf[0], n, &fmt[0]);
+ ep->cgroup_id = proc_strings_insert(fmt);
+ }
+ }
+ if (fd >= 0)
+ close(fd);
+ ep->flags |= PROC_PID_FLAG_CGROUP_FETCHED;
+ }
+
+ return (sts < 0) ? NULL : ep;
+}
+
+/*
+ * fetch a proc/<pid>/attr/current entry for pid
+ */
+proc_pid_entry_t *
+fetch_proc_pid_label(int id, proc_pid_t *proc_pid)
+{
+ __pmHashNode *node = __pmHashSearch(id, &proc_pid->pidhash);
+ proc_pid_entry_t *ep;
+ int sts = 0;
+
+ if (node == NULL)
+ return NULL;
+ ep = (proc_pid_entry_t *)node->data;
+
+ if (!(ep->flags & PROC_PID_FLAG_LABEL_FETCHED)) {
+ char buf[1024];
+ int n, fd;
+
+ if ((fd = proc_open("attr/current", ep)) < 0)
+ sts = -oserror();
+ else if ((n = read(fd, buf, sizeof(buf))) < 0)
+ sts = -oserror();
+ else {
+ if (n == 0) {
+ setoserror(ENODATA);
+ sts = -1;
+ } else {
+ /* buffer matches "ps" output format, direct hash */
+ buf[sizeof(buf)-1] = '\0';
+ ep->label_id = proc_strings_insert(buf);
+ }
+ }
+ if (fd >= 0)
+ close(fd);
+ ep->flags |= PROC_PID_FLAG_LABEL_FETCHED;
+ }
+
+ return (sts < 0) ? NULL : ep;
+}
+
+/*
+ * Extract the ith (space separated) field from a char buffer.
+ * The first field starts at zero.
+ * BEWARE: return copy is in a static buffer.
+ */
+char *
+_pm_getfield(char *buf, int field)
+{
+ static int retbuflen = 0;
+ static char *retbuf = NULL;
+ char *p;
+ int i;
+
+ if (buf == NULL)
+ return NULL;
+
+ for (p=buf, i=0; i < field; i++) {
+ /* skip to the next space */
+ for (; *p && !isspace((int)*p); p++) {;}
+
+ /* skip to the next word */
+ for (; *p && isspace((int)*p); p++) {;}
+ }
+
+ /* return a null terminated copy of the field */
+ for (i=0; ; i++) {
+ if (isspace((int)p[i]) || p[i] == '\0' || p[i] == '\n')
+ break;
+ }
+
+ if (i >= retbuflen) {
+ retbuflen = i+4;
+ retbuf = (char *)realloc(retbuf, retbuflen);
+ }
+ memcpy(retbuf, p, i);
+ retbuf[i] = '\0';
+
+ return retbuf;
+}
diff --git a/src/pmdas/linux_proc/proc_pid.h b/src/pmdas/linux_proc/proc_pid.h
new file mode 100644
index 0000000..8835157
--- /dev/null
+++ b/src/pmdas/linux_proc/proc_pid.h
@@ -0,0 +1,289 @@
+/*
+ * Linux /proc/<pid>/... Clusters
+ *
+ * Copyright (c) 2013 Red Hat.
+ * Copyright (c) 2000,2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _PROC_PID_H
+#define _PROC_PID_H
+
+/*
+ * /proc/<pid>/stat metrics
+ */
+#define PROC_PID_STAT_PID 0
+#define PROC_PID_STAT_CMD 1
+#define PROC_PID_STAT_STATE 2
+#define PROC_PID_STAT_PPID 3
+#define PROC_PID_STAT_PGRP 4
+#define PROC_PID_STAT_SESSION 5
+#define PROC_PID_STAT_TTY 6
+#define PROC_PID_STAT_TTY_PGRP 7
+#define PROC_PID_STAT_FLAGS 8
+#define PROC_PID_STAT_MINFLT 9
+#define PROC_PID_STAT_CMIN_FLT 10
+#define PROC_PID_STAT_MAJ_FLT 11
+#define PROC_PID_STAT_CMAJ_FLT 12
+#define PROC_PID_STAT_UTIME 13
+#define PROC_PID_STAT_STIME 14
+#define PROC_PID_STAT_CUTIME 15
+#define PROC_PID_STAT_CSTIME 16
+#define PROC_PID_STAT_PRIORITY 17
+#define PROC_PID_STAT_NICE 18
+#define PROC_PID_STAT_REMOVED 19
+#define PROC_PID_STAT_IT_REAL_VALUE 20
+#define PROC_PID_STAT_START_TIME 21
+#define PROC_PID_STAT_VSIZE 22
+#define PROC_PID_STAT_RSS 23
+#define PROC_PID_STAT_RSS_RLIM 24
+#define PROC_PID_STAT_START_CODE 25
+#define PROC_PID_STAT_END_CODE 26
+#define PROC_PID_STAT_START_STACK 27
+#define PROC_PID_STAT_ESP 28
+#define PROC_PID_STAT_EIP 29
+#define PROC_PID_STAT_SIGNAL 30
+#define PROC_PID_STAT_BLOCKED 31
+#define PROC_PID_STAT_SIGIGNORE 32
+#define PROC_PID_STAT_SIGCATCH 33
+#define PROC_PID_STAT_WCHAN 34
+#define PROC_PID_STAT_NSWAP 35
+#define PROC_PID_STAT_CNSWAP 36
+#define PROC_PID_STAT_EXIT_SIGNAL 37
+#define PROC_PID_STAT_PROCESSOR 38
+#define PROC_PID_STAT_TTYNAME 39
+#define PROC_PID_STAT_WCHAN_SYMBOL 40
+#define PROC_PID_STAT_PSARGS 41
+
+/* number of fields in proc_pid_stat_entry_t */
+#define NR_PROC_PID_STAT 42
+
+/*
+ * metrics in /proc/<pid>/status
+ * Added by Mike Mason <mmlnx@us.ibm.com>
+ */
+#define PROC_PID_STATUS_UID 0
+#define PROC_PID_STATUS_EUID 1
+#define PROC_PID_STATUS_SUID 2
+#define PROC_PID_STATUS_FSUID 3
+#define PROC_PID_STATUS_GID 4
+#define PROC_PID_STATUS_EGID 5
+#define PROC_PID_STATUS_SGID 6
+#define PROC_PID_STATUS_FSGID 7
+#define PROC_PID_STATUS_UID_NM 8
+#define PROC_PID_STATUS_EUID_NM 9
+#define PROC_PID_STATUS_SUID_NM 10
+#define PROC_PID_STATUS_FSUID_NM 11
+#define PROC_PID_STATUS_GID_NM 12
+#define PROC_PID_STATUS_EGID_NM 13
+#define PROC_PID_STATUS_SGID_NM 14
+#define PROC_PID_STATUS_FSGID_NM 15
+#define PROC_PID_STATUS_SIGNAL 16
+#define PROC_PID_STATUS_BLOCKED 17
+#define PROC_PID_STATUS_SIGIGNORE 18
+#define PROC_PID_STATUS_SIGCATCH 19
+#define PROC_PID_STATUS_VMSIZE 20
+#define PROC_PID_STATUS_VMLOCK 21
+#define PROC_PID_STATUS_VMRSS 22
+#define PROC_PID_STATUS_VMDATA 23
+#define PROC_PID_STATUS_VMSTACK 24
+#define PROC_PID_STATUS_VMEXE 25
+#define PROC_PID_STATUS_VMLIB 26
+#define PROC_PID_STATUS_VMSWAP 27
+#define PROC_PID_STATUS_THREADS 28
+
+/* number of metrics from /proc/<pid>/status */
+#define NR_PROC_PID_STATUS 27
+
+/*
+ * metrics in /proc/<pid>/statm & /proc/<pid>/maps
+ */
+#define PROC_PID_STATM_SIZE 0
+#define PROC_PID_STATM_RSS 1
+#define PROC_PID_STATM_SHARE 2
+#define PROC_PID_STATM_TEXTRS 3
+#define PROC_PID_STATM_LIBRS 4
+#define PROC_PID_STATM_DATRS 5
+#define PROC_PID_STATM_DIRTY 6
+#define PROC_PID_STATM_MAPS 7
+
+/* number of fields in proc_pid_statm_entry_t */
+#define NR_PROC_PID_STATM 8
+
+/*
+ * metrics in /proc/<pid>/schedstat
+ */
+#define PROC_PID_SCHED_CPUTIME 0
+#define PROC_PID_SCHED_RUNDELAY 1
+#define PROC_PID_SCHED_PCOUNT 2
+#define NR_PROC_PID_SCHED 3
+
+/*
+ * metrics in /proc/<pid>/io
+ */
+#define PROC_PID_IO_RCHAR 0
+#define PROC_PID_IO_WCHAR 1
+#define PROC_PID_IO_SYSCR 2
+#define PROC_PID_IO_SYSCW 3
+#define PROC_PID_IO_READ_BYTES 4
+#define PROC_PID_IO_WRITE_BYTES 5
+#define PROC_PID_IO_CANCELLED_BYTES 6
+
+/*
+ * metrics in /proc/<pid>/fd
+ */
+#define PROC_PID_FD_COUNT 0
+
+
+/*
+ * metrics in /proc/<pid>/cgroup
+ */
+#define PROC_PID_CGROUP 0
+
+/*
+ * metrics in /proc/<pid>/attr/current
+ */
+#define PROC_PID_LABEL 0
+
+typedef struct { /* /proc/<pid>/status */
+ char *uid;
+ char *gid;
+ char *sigpnd;
+ char *sigblk;
+ char *sigign;
+ char *sigcgt;
+ char *vmsize;
+ char *vmlck;
+ char *vmrss;
+ char *vmdata;
+ char *vmstk;
+ char *vmexe;
+ char *vmlib;
+ char *vmswap;
+ char *threads;
+} status_lines_t;
+
+typedef struct { /* /proc/<pid>/io */
+ char *rchar;
+ char *wchar;
+ char *syscr;
+ char *syscw;
+ char *readb;
+ char *writeb;
+ char *cancel;
+} io_lines_t;
+
+enum {
+ PROC_PID_FLAG_VALID = 1<<0,
+ PROC_PID_FLAG_STAT_FETCHED = 1<<1,
+ PROC_PID_FLAG_STATM_FETCHED = 1<<2,
+ PROC_PID_FLAG_MAPS_FETCHED = 1<<3,
+ PROC_PID_FLAG_STATUS_FETCHED = 1<<4,
+ PROC_PID_FLAG_SCHEDSTAT_FETCHED = 1<<5,
+ PROC_PID_FLAG_IO_FETCHED = 1<<6,
+ PROC_PID_FLAG_WCHAN_FETCHED = 1<<7,
+ PROC_PID_FLAG_FD_FETCHED = 1<<8,
+ PROC_PID_FLAG_CGROUP_FETCHED = 1<<9,
+ PROC_PID_FLAG_LABEL_FETCHED = 1<<10,
+};
+
+typedef struct {
+ int id; /* pid, hash key and internal instance id */
+ int flags; /* combinations of PROC_PID_FLAG_* values */
+ char *name; /* external instance name (<pid> cmdline) */
+
+ /* /proc/<pid>/stat cluster */
+ int stat_buflen;
+ char *stat_buf;
+
+ /* /proc/<pid>/statm and /proc/<pid>/maps cluster */
+ int statm_buflen;
+ char *statm_buf;
+ int maps_buflen;
+ char *maps_buf;
+
+ /* /proc/<pid>/status cluster */
+ int status_buflen;
+ char *status_buf;
+ status_lines_t status_lines;
+
+ /* /proc/<pid>/schedstat cluster */
+ int schedstat_buflen;
+ char *schedstat_buf;
+
+ /* /proc/<pid>/io cluster */
+ int io_buflen;
+ char *io_buf;
+ io_lines_t io_lines;
+
+ /* /proc/<pid>/wchan cluster */
+ int wchan_buflen;
+ char *wchan_buf;
+
+ /* /proc/<pid>/fd cluster */
+ int fd_buflen;
+ uint32_t fd_count;
+ char *fd_buf;
+
+ /* /proc/<pid>/cgroup cluster */
+ int cgroup_id;
+
+ /* /proc/<pid>/attr/current cluster */
+ int label_id;
+} proc_pid_entry_t;
+
+typedef struct {
+ __pmHashCtl pidhash; /* hash table for current pids */
+ pmdaIndom *indom; /* instance domain table */
+} proc_pid_t;
+
+typedef struct {
+ int count; /* number of processes in the list */
+ int size; /* size of the buffer (pids) allocated */
+ int *pids; /* array of process identifiers */
+ int threads; /* /proc/PID/{xxx,task/PID/xxx} flag */
+} proc_pid_list_t;
+
+/* refresh the proc indom, reset all "fetched" flags */
+extern int refresh_proc_pid(proc_pid_t *, int, const char *);
+
+/* fetch a proc/<pid>/stat entry for pid */
+extern proc_pid_entry_t *fetch_proc_pid_stat(int, proc_pid_t *);
+
+/* fetch a proc/<pid>/statm entry for pid */
+extern proc_pid_entry_t *fetch_proc_pid_statm(int, proc_pid_t *);
+
+/* fetch a proc/<pid>/status entry for pid */
+extern proc_pid_entry_t *fetch_proc_pid_status(int, proc_pid_t *);
+
+/* fetch a proc/<pid>/maps entry for pid */
+extern proc_pid_entry_t *fetch_proc_pid_maps(int, proc_pid_t *);
+
+/* fetch a proc/<pid>/schedstat entry for pid */
+extern proc_pid_entry_t *fetch_proc_pid_schedstat(int, proc_pid_t *);
+
+/* fetch a proc/<pid>/io entry for pid */
+extern proc_pid_entry_t *fetch_proc_pid_io(int, proc_pid_t *);
+
+/* fetch a proc/<pid>/fd entry for pid */
+extern proc_pid_entry_t *fetch_proc_pid_fd(int, proc_pid_t *);
+
+/* fetch a proc/<pid>/cgroup entry for pid */
+extern proc_pid_entry_t *fetch_proc_pid_cgroup(int, proc_pid_t *);
+
+/* fetch a proc/<pid>/attr/current entry for pid */
+extern proc_pid_entry_t *fetch_proc_pid_label(int, proc_pid_t *);
+
+/* extract the ith space separated field from a buffer */
+extern char *_pm_getfield(char *, int);
+
+#endif /* _PROC_PID_H */
diff --git a/src/pmdas/linux_proc/proc_runq.c b/src/pmdas/linux_proc/proc_runq.c
new file mode 100644
index 0000000..07b68dc
--- /dev/null
+++ b/src/pmdas/linux_proc/proc_runq.c
@@ -0,0 +1,123 @@
+/*
+ * Linux /proc/runq metrics cluster
+ *
+ * Copyright (c) 2000,2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "pmapi.h"
+#include "impl.h"
+#include "pmda.h"
+#include <ctype.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include "proc_pid.h"
+#include "proc_runq.h"
+
+int
+refresh_proc_runq(proc_runq_t *proc_runq)
+{
+ int sz;
+ int fd;
+ char *p;
+ int sname;
+ DIR *dir;
+ struct dirent *d;
+ char fullpath[MAXPATHLEN];
+ char buf[4096];
+
+ memset(proc_runq, 0, sizeof(proc_runq_t));
+ if ((dir = opendir("/proc")) == NULL)
+ return -oserror();
+
+ while((d = readdir(dir)) != NULL) {
+ if (!isdigit((int)d->d_name[0]))
+ continue;
+ sprintf(fullpath, "/proc/%s/stat", d->d_name);
+ if ((fd = open(fullpath, O_RDONLY)) < 0)
+ continue;
+ sz = read(fd, buf, sizeof(buf));
+ close(fd);
+ buf[sizeof(buf)-1] = '\0';
+
+ /*
+ * defunct (state name is 'Z')
+ */
+ if (sz <= 0 || (p = _pm_getfield(buf, PROC_PID_STAT_STATE)) == NULL) {
+ proc_runq->unknown++;
+ continue;
+ }
+ if ((sname = *p) == 'Z') {
+ proc_runq->defunct++;
+ continue;
+ }
+
+ /*
+ * kernel process (not defunct and virtual size is zero)
+ */
+ if ((p = _pm_getfield(buf, PROC_PID_STAT_VSIZE)) == NULL) {
+ proc_runq->unknown++;
+ continue;
+ }
+ if (strcmp(p, "0") == 0) {
+ proc_runq->kernel++;
+ continue;
+ }
+
+ /*
+ * swapped (resident set size is zero)
+ */
+ if ((p = _pm_getfield(buf, PROC_PID_STAT_RSS)) == NULL) {
+ proc_runq->unknown++;
+ continue;
+ }
+ if (strcmp(p, "0") == 0) {
+ proc_runq->swapped++;
+ continue;
+ }
+
+ /*
+ * All other states
+ */
+ switch (sname) {
+ case 'R':
+ proc_runq->runnable++;
+ break;
+ case 'S':
+ proc_runq->sleeping++;
+ break;
+ case 'T':
+ proc_runq->stopped++;
+ break;
+ case 'D':
+ proc_runq->blocked++;
+ break;
+ /* case 'Z':
+ break; -- already counted above */
+ default:
+ fprintf(stderr, "UNKNOWN %c : %s\n", sname, buf);
+ proc_runq->unknown++;
+ break;
+ }
+ }
+ closedir(dir);
+
+#if PCP_DEBUG
+ if (pmDebug & DBG_TRACE_LIBPMDA) {
+ fprintf(stderr, "refresh_runq: runnable=%d sleeping=%d stopped=%d blocked=%d unknown=%d\n",
+ proc_runq->runnable, proc_runq->sleeping, proc_runq->stopped,
+ proc_runq->blocked, proc_runq->unknown);
+ }
+#endif
+
+ return 0;
+}
diff --git a/src/pmdas/linux_proc/proc_runq.h b/src/pmdas/linux_proc/proc_runq.h
new file mode 100644
index 0000000..9739208
--- /dev/null
+++ b/src/pmdas/linux_proc/proc_runq.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2000,2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _PROC_RUNQ_H
+#define _PROC_RUNQ_H
+
+typedef struct {
+ int runnable;
+ int blocked;
+ int sleeping;
+ int stopped;
+ int swapped;
+ int kernel;
+ int defunct;
+ int unknown;
+} proc_runq_t;
+
+extern int refresh_proc_runq(proc_runq_t *);
+
+#endif /* _PROC_RUNQ_H */
diff --git a/src/pmdas/linux_proc/root b/src/pmdas/linux_proc/root
new file mode 100644
index 0000000..5f26a89
--- /dev/null
+++ b/src/pmdas/linux_proc/root
@@ -0,0 +1,6 @@
+/*
+ * fake "root" for validating the local PMNS subtree
+ */
+
+#include <stdpmid>
+#include "root_xfs"
diff --git a/src/pmdas/linux_proc/root_proc b/src/pmdas/linux_proc/root_proc
new file mode 100644
index 0000000..91b8654
--- /dev/null
+++ b/src/pmdas/linux_proc/root_proc
@@ -0,0 +1,181 @@
+/*
+ * Metrics for the Linux proc PMDA
+ *
+ * Note:
+ * names and pmids migrated from the Linux PMDA, with the domain
+ * number changed from LINUX (60) to 3 (3)
+ */
+
+#ifndef PROC
+#define PROC 3
+#endif
+
+root {
+ cgroup
+ proc
+}
+
+cgroup {
+ subsys
+ mounts
+ groups PROC:*:*
+}
+
+cgroup.subsys {
+ hierarchy PROC:37:0
+ count PROC:37:1
+}
+
+cgroup.mounts {
+ subsys PROC:38:0
+ count PROC:38:1
+}
+
+proc {
+ nprocs PROC:8:99
+ psinfo
+ memory
+ runq
+ id
+ io
+ schedstat
+ fd
+ control
+}
+
+proc.psinfo {
+ pid PROC:8:0
+ cmd PROC:8:1
+ sname PROC:8:2
+ ppid PROC:8:3
+ pgrp PROC:8:4
+ session PROC:8:5
+ tty PROC:8:6
+ tty_pgrp PROC:8:7
+ flags PROC:8:8
+ minflt PROC:8:9
+ cmin_flt PROC:8:10
+ maj_flt PROC:8:11
+ cmaj_flt PROC:8:12
+ utime PROC:8:13
+ stime PROC:8:14
+ cutime PROC:8:15
+ cstime PROC:8:16
+ priority PROC:8:17
+ nice PROC:8:18
+ /* not valid in 2.2.1 PROC:8:19 */
+ it_real_value PROC:8:20
+ start_time PROC:8:21
+ vsize PROC:8:22
+ rss PROC:8:23
+ rss_rlim PROC:8:24
+ start_code PROC:8:25
+ end_code PROC:8:26
+ start_stack PROC:8:27
+ esp PROC:8:28
+ eip PROC:8:29
+ signal PROC:8:30
+ blocked PROC:8:31
+ sigignore PROC:8:32
+ sigcatch PROC:8:33
+ wchan PROC:8:34
+ nswap PROC:8:35
+ cnswap PROC:8:36
+ exit_signal PROC:8:37
+ processor PROC:8:38
+ ttyname PROC:8:39
+ wchan_s PROC:8:40
+ psargs PROC:8:41
+ signal_s PROC:24:16
+ blocked_s PROC:24:17
+ sigignore_s PROC:24:18
+ sigcatch_s PROC:24:19
+ threads PROC:24:28
+ cgroups PROC:11:0
+ labels PROC:12:0
+}
+
+proc.id {
+ uid PROC:24:0
+ euid PROC:24:1
+ suid PROC:24:2
+ fsuid PROC:24:3
+ gid PROC:24:4
+ egid PROC:24:5
+ sgid PROC:24:6
+ fsgid PROC:24:7
+ uid_nm PROC:24:8
+ euid_nm PROC:24:9
+ suid_nm PROC:24:10
+ fsuid_nm PROC:24:11
+ gid_nm PROC:24:12
+ egid_nm PROC:24:13
+ sgid_nm PROC:24:14
+ fsgid_nm PROC:24:15
+}
+
+proc.memory {
+ size PROC:9:0
+ rss PROC:9:1
+ share PROC:9:2
+ textrss PROC:9:3
+ librss PROC:9:4
+ datrss PROC:9:5
+ dirty PROC:9:6
+ maps PROC:9:7
+ vmsize PROC:24:20
+ vmlock PROC:24:21
+ vmrss PROC:24:22
+ vmdata PROC:24:23
+ vmstack PROC:24:24
+ vmexe PROC:24:25
+ vmlib PROC:24:26
+ vmswap PROC:24:27
+}
+
+proc.runq {
+ runnable PROC:13:0
+ blocked PROC:13:1
+ sleeping PROC:13:2
+ stopped PROC:13:3
+ swapped PROC:13:4
+ defunct PROC:13:5
+ unknown PROC:13:6
+ kernel PROC:13:7
+}
+
+proc.io {
+ rchar PROC:32:0
+ wchar PROC:32:1
+ syscr PROC:32:2
+ syscw PROC:32:3
+ read_bytes PROC:32:4
+ write_bytes PROC:32:5
+ cancelled_write_bytes PROC:32:6
+}
+
+proc.schedstat {
+ cpu_time PROC:31:0
+ run_delay PROC:31:1
+ pcount PROC:31:2
+}
+
+proc.fd {
+ count PROC:51:0
+}
+
+proc.control {
+ all
+ perclient
+}
+
+proc.control.all {
+ threads PROC:10:1
+}
+
+proc.control.perclient {
+ threads PROC:10:2
+ cgroups PROC:10:3
+}
+
+#undef PROC