summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason King <jason.king@joyent.com>2018-10-19 21:13:06 -0500
committerJason King <jason.king@joyent.com>2020-12-08 19:13:03 +0000
commit88880f2384ccc4a2382a0fd56138ff5a6862866f (patch)
tree0f598e8d6cc6e10c04026af5ecee92a5a8afaa20
parente751b029b28ab40ab0a552ff16815ed0be38d284 (diff)
downloadillumos-joyent-88880f2384ccc4a2382a0fd56138ff5a6862866f.tar.gz
More comments
-rw-r--r--usr/src/cmd/intrd/intrd.c53
-rw-r--r--usr/src/cmd/intrd/intrd.h30
-rw-r--r--usr/src/cmd/intrd/intrd_kstat.c109
3 files changed, 133 insertions, 59 deletions
diff --git a/usr/src/cmd/intrd/intrd.c b/usr/src/cmd/intrd/intrd.c
index 4a7faf360a..fa0ef14e45 100644
--- a/usr/src/cmd/intrd/intrd.c
+++ b/usr/src/cmd/intrd/intrd.c
@@ -47,6 +47,10 @@ static void delta_save(stats_t **, size_t, stats_t *, uint_t);
static load_t *calc_load(stats_t *);
static void load_free(load_t *);
+uint_t cfg_interval = 10;
+uint_t cfg_retry_interval = 1;
+uint_t cfg_idle_interval = 45;
+
uint_t max_cpu;
#ifdef DEBUG
@@ -264,8 +268,17 @@ loop(const config_t *restrict cfg, kstat_ctl_t *restrict kcp)
for (;; sleep(interval)) {
stats_free(stats[gen]);
- if ((stats[gen] = stats_get(cfg, kcp, interval)) == NULL)
+
+ /*
+ * If there was a temporary error, retry sooner than a
+ * regular interval.
+ */
+ if ((stats[gen] = stats_get(cfg, kcp, interval)) == NULL) {
+ interval = cfg_retry_interval;
continue;
+ }
+
+ interval = cfg_interval;
delta = stats_delta(stats[gen], stats[gen ^ 1]);
gen ^= 1;
@@ -282,6 +295,7 @@ loop(const config_t *restrict cfg, kstat_ctl_t *restrict kcp)
stats_dump(sum);
+ /* XXX: temporary just to show the data */
{
stats_t *st = (sum != NULL) ? sum : delta;
load_t *load = calc_load(st);
@@ -292,6 +306,10 @@ loop(const config_t *restrict cfg, kstat_ctl_t *restrict kcp)
}
}
+/*
+ * Add newdelta to the front of deltas, and remove any entries in delta
+ * from more than statslen seconds ago.
+ */
static void
delta_save(stats_t **deltas, size_t n, stats_t *newdelta, uint_t statslen)
{
@@ -395,6 +413,39 @@ load_free(load_t *ld)
free(ld);
}
+/*
+ * Like nicenum, but assumes the value is * 10^(-9) units
+ */
+void
+nanonicenum(uint64_t val, char *buf, size_t buflen)
+{
+ static const char units[] = "num KMGTPE";
+ static const size_t index_max = 9;
+ uint64_t divisor = 1;
+ int index = 0;
+ char u;
+
+ while (index < index_max) {
+ uint64_t newdiv = divisor * 1024;
+
+ if (val < newdiv)
+ break;
+ divisor = newdiv;
+ index++;
+ }
+ u = units[index];
+
+ if (val % divisor == 0) {
+ (void) snprintf(buf, buflen, "%llu%c", val / divisor, u);
+ } else {
+ for (int i = 2; i >= 0; i--) {
+ if (snprintf(buf, buflen, "%.*f%c", i,
+ (double)val / divisor, u) <= 5)
+ return;
+ }
+ }
+}
+
char *
xstrdup(const char *s)
{
diff --git a/usr/src/cmd/intrd/intrd.h b/usr/src/cmd/intrd/intrd.h
index c4bf5d96c4..3bc00d2ec3 100644
--- a/usr/src/cmd/intrd/intrd.h
+++ b/usr/src/cmd/intrd/intrd.h
@@ -40,6 +40,9 @@ typedef struct config {
double cfg_mindelta;
} config_t;
+/*
+ * An interrupt vector, corresponding to the data from a pci_intrs kstat.
+ */
typedef struct ivec {
list_node_t ivec_node;
hrtime_t ivec_snaptime;
@@ -57,6 +60,10 @@ typedef struct ivec {
char ivec_type[16]; /* sizeof kstat_named_t.value.c */
} ivec_t;
+/*
+ * The stats corresponding to the cpu:sys kstat, as well as the locality
+ * group this CPU is in, and all the interrupts assigned to this CPU.
+ */
typedef struct cpustat {
hrtime_t cs_snaptime;
int cs_cpuid;
@@ -70,6 +77,9 @@ typedef struct cpustat {
size_t cs_nivecs;
} cpustat_t;
+/*
+ * The locality group data.
+ */
typedef struct cpugrp {
lgrp_id_t cg_id;
lgrp_id_t cg_parent;
@@ -101,6 +111,14 @@ typedef struct stats {
} stats_t;
#define STATS_CPU(_st, _id) (_st)->sts_cpu[(_id)]
+/*
+ * The calculations used to evaluate the interrupt load on the system are
+ * both the ratio of time spent servicing device interrupts compared to the
+ * total time, as well as the average interrupt time per cpu. In addition,
+ * we keep a reference to ivec consuming the most time. A load_t is created
+ * per cpu, as well as per locality group. The load_t's for lgrps are
+ * aggregated over all the cpus in a given lgrp.
+ */
typedef struct load {
uint64_t ld_total;
uint64_t ld_intrtotal;
@@ -115,6 +133,9 @@ typedef struct load {
((LOAD_BIGINT_LOAD(_l1) > LOAD_BIGINT_LOAD(_l2)) ? \
(_l1)->ld_bigint : (_l2)->ld_bigint)
+extern uint_t cfg_interval;
+extern uint_t cfg_retry_interval;
+extern uint_t cfg_idle_interval;
extern uint_t max_cpu;
typedef enum intrd_walk_ret {
@@ -129,18 +150,9 @@ intrd_walk_ret_t cpu_iter(stats_t *, cpu_itercb_t, void *);
stats_t *stats_get(const config_t *restrict, kstat_ctl_t *restrict, uint_t);
stats_t *stats_delta(const stats_t *, const stats_t *);
stats_t *stats_sum(stats_t * const*, size_t, size_t *);
-stats_t *stats_dup(const stats_t *);
void stats_free(stats_t *);
void stats_dump(const stats_t *);
-cpustat_t *cpustat_new(void);
-cpustat_t *cpustat_dup(const cpustat_t *);
-void cpustat_free(cpustat_t *);
-
-ivec_t *ivec_new(void);
-ivec_t *ivec_dup(const ivec_t *);
-void ivec_free(ivec_t *);
-
char *xstrdup(const char *);
void *xcalloc(size_t, size_t);
void *xreallocarray(void *, size_t, size_t);
diff --git a/usr/src/cmd/intrd/intrd_kstat.c b/usr/src/cmd/intrd/intrd_kstat.c
index ba737eee60..c6e68e6ef2 100644
--- a/usr/src/cmd/intrd/intrd_kstat.c
+++ b/usr/src/cmd/intrd/intrd_kstat.c
@@ -56,7 +56,22 @@ static boolean_t ivec_shared_intr(const ivec_t *, const ivec_t *);
static boolean_t ivec_shared_msi(const ivec_t *, const ivec_t *);
static stats_t *stats_new(void);
+static stats_t *stats_dup(const stats_t*);
+static cpustat_t *cpustat_new(void);
+static cpustat_t *cpustat_dup(const cpustat_t *);
+static void cpustat_free(cpustat_t *);
+
+static ivec_t *ivec_new(void);
+static ivec_t *ivec_dup(const ivec_t *);
+static void ivec_free(ivec_t *);
+
+/*
+ * Get a kstat snapshot and assemble it into a stats_t. If the fraction
+ * of the time spent creating the stats_t compared to the polling interval
+ * (interval) is > cfg_tooslow, we return NULL. If we encounter a non-fatal
+ * error reading the kstats, we asll return NULL.
+ */
stats_t *
stats_get(const config_t *restrict cfg, kstat_ctl_t *restrict kcp,
uint_t interval)
@@ -90,6 +105,7 @@ stats_get(const config_t *restrict cfg, kstat_ctl_t *restrict kcp,
}
if (getstat_tooslow(sts, interval, cfg->cfg_tooslow)) {
+ errno = ETIME;
goto fail;
}
@@ -105,7 +121,6 @@ stats_get(const config_t *restrict cfg, kstat_ctl_t *restrict kcp,
return (sts);
fail:
- printf("%s fail\n", __func__);
stats_free(sts);
return (NULL);
}
@@ -189,11 +204,6 @@ fail:
return (B_FALSE);
}
-/*
- * Combine ivec_t's for any shared interrupts into a single consolidated
- * entry (since they have to move together). On X86, also group MSI
- * interrupts for the same device (for similar reasons).
- */
static int
ivec_cmp(const void *a, const void *b)
{
@@ -218,6 +228,11 @@ ivec_cmp(const void *a, const void *b)
return (0);
}
+/*
+ * Combine ivec_t's for any shared interrupts into a single consolidated
+ * entry (since they have to move together). On X86, also group MSI
+ * interrupts for the same device (for similar reasons).
+ */
static intrd_walk_ret_t
consolidate_ivec_cb(stats_t *stp, cpustat_t *cs, void *arg)
{
@@ -229,6 +244,11 @@ consolidate_ivec_cb(stats_t *stp, cpustat_t *cs, void *arg)
ivec_t *temp[cs->cs_nivecs];
size_t n, i, j;
+ /*
+ * We create an array of pointers to all the ivec_t's on this cpu,
+ * then sort them so that any potential shared interrupts will
+ * be adjacent in the array.
+ */
n = 0;
for (iv = list_head(ivlist); iv != NULL; iv = list_next(ivlist, iv))
temp[n++] = iv;
@@ -236,6 +256,10 @@ consolidate_ivec_cb(stats_t *stp, cpustat_t *cs, void *arg)
qsort(temp, n, sizeof (ivec_t *), ivec_cmp);
+ /*
+ * For each ivec, look at the next ivec in the array and consolidate
+ * each successive ivec as long as they are shared.
+ */
for (i = 0; i < n; i = j) {
iv = temp[i];
@@ -268,6 +292,9 @@ consolidate_ivecs(stats_t *stp)
static boolean_t
ivec_shared_intr(const ivec_t *i1, const ivec_t *i2)
{
+ /*
+ * XXX This needs to be revisited
+ */
#if 0
if (i1->ivec_ino != i2->ivec_ino)
return (B_FALSE);
@@ -292,6 +319,7 @@ ivec_shared_msi(const ivec_t *i1, const ivec_t *i2)
#endif
}
+#if 0
static int
ivec_cmp_msi(const void *a, const void *b)
{
@@ -315,6 +343,7 @@ ivec_cmp_msi(const void *a, const void *b)
return (0);
return ((l->ivec_instance < r->ivec_instance) ? -1 : 1);
}
+#endif
static intrd_walk_ret_t
get_cpu(kstat_ctl_t *restrict kcp, kstat_t *restrict ksp, void *restrict arg)
@@ -394,6 +423,9 @@ get_cpu(kstat_ctl_t *restrict kcp, kstat_t *restrict ksp, void *restrict arg)
static intrd_walk_ret_t
get_ivecs(kstat_ctl_t *restrict kcp, kstat_t *restrict ksp, void *restrict arg)
{
+ /*
+ * Also cache the field indexes for the pci_intrs kstat.
+ */
static int cookie = -1;
static int cpu = -1;
static int buspath = -1;
@@ -484,8 +516,8 @@ get_ivecs(kstat_ctl_t *restrict kcp, kstat_t *restrict ksp, void *restrict arg)
}
/*
- * Determine if the amount of time spent collecting our stats, as well as set
- * the min and max timestamp of all the stats collected in stp.
+ * Determine if the ratio of time spent creating stp compared to the polling
+ * interval is > tooslow.
*/
static boolean_t
getstat_tooslow(stats_t *stp, uint_t interval, double tooslow)
@@ -499,7 +531,7 @@ getstat_tooslow(stats_t *stp, uint_t interval, double tooslow)
diff = stp->sts_maxtime - stp->sts_mintime;
nanonicenum(diff, numbuf, sizeof (numbuf));
- portion = (double)diff / (double)(interval * NANOSEC);
+ portion = (double)diff / (double)((uint64_t)interval * NANOSEC);
syslog(LOG_DEBUG,
"spent %.1f%% of the polling interval collecting stats "
@@ -574,6 +606,10 @@ stats_delta_cb(stats_t *stp, cpustat_t *cs, void *arg)
return (INTRD_WALK_NEXT);
}
+/*
+ * If a change in the system configuration (different cpus online, new/deleted
+ * interrupts, etc.) return B_TRUE, otherwise return B_FALSE.
+ */
static boolean_t
stats_differ(const stats_t *s1, const stats_t *s2)
{
@@ -617,6 +653,9 @@ stats_differ(const stats_t *s1, const stats_t *s2)
return (B_FALSE);
}
+/*
+ * Compute what is effectively s1 - prev.
+ */
stats_t *
stats_delta(const stats_t *restrict st, const stats_t *restrict prev)
{
@@ -684,6 +723,11 @@ stats_sum_cb(stats_t *sum, cpustat_t *cs, void *arg)
return (INTRD_WALK_NEXT);
}
+/*
+ * Given a collection of n deltas, combine them together to represent a larger
+ * time interval, skipping any that don't represent the same configuration.
+ * Sets *total to the number of deltas that were used.
+ */
stats_t *
stats_sum(stats_t * const *restrict deltas, size_t n, size_t *restrict total)
{
@@ -733,7 +777,7 @@ stlgrp_copy(const cpugrp_t *src, cpugrp_t *dst)
dst->cg_nchildren = src->cg_nchildren;
}
-stats_t *
+static stats_t *
stats_dup(const stats_t *src)
{
stats_t *stp;
@@ -755,39 +799,6 @@ stats_dup(const stats_t *src)
return (stp);
}
-/*
- * Like nicenum, but assumes the value is * 10^(-9) units
- */
-void
-nanonicenum(uint64_t val, char *buf, size_t buflen)
-{
- static const char units[] = "num KMGTPE";
- static const size_t index_max = 9;
- uint64_t divisor = 1;
- int index = 0;
- char u;
-
- while (index < index_max) {
- uint64_t newdiv = divisor * 1024;
-
- if (val < newdiv)
- break;
- divisor = newdiv;
- index++;
- }
- u = units[index];
-
- if (val % divisor == 0) {
- (void) snprintf(buf, buflen, "%llu%c", val / divisor, u);
- } else {
- for (int i = 2; i >= 0; i--) {
- if (snprintf(buf, buflen, "%.*f%c", i,
- (double)val / divisor, u) <= 5)
- return;
- }
- }
-}
-
static intrd_walk_ret_t
stats_dump_cb(stats_t *stp, cpustat_t *cs, void *dummy __unused)
{
@@ -896,7 +907,7 @@ stats_free(stats_t *stp)
umem_cache_free(stats_cache, stp);
}
-ivec_t *
+static ivec_t *
ivec_dup(const ivec_t *iv)
{
ivec_t *newiv = ivec_new();
@@ -918,13 +929,13 @@ ivec_dup(const ivec_t *iv)
return (newiv);
}
-ivec_t *
+static ivec_t *
ivec_new(void)
{
return (umem_cache_alloc(ivec_cache, UMEM_NOFAIL));
}
-void
+static void
ivec_free(ivec_t *iv)
{
if (iv == NULL)
@@ -942,13 +953,13 @@ ivec_free(ivec_t *iv)
umem_cache_free(ivec_cache, iv);
}
-cpustat_t *
+static cpustat_t *
cpustat_new(void)
{
return (umem_cache_alloc(cpustat_cache, UMEM_NOFAIL));
}
-void
+static void
cpustat_free(cpustat_t *cs)
{
if (cs == NULL)
@@ -971,7 +982,7 @@ cpustat_free(cpustat_t *cs)
umem_cache_free(cpustat_cache, cs);
}
-cpustat_t *
+static cpustat_t *
cpustat_dup(const cpustat_t *src)
{
cpustat_t *cs = cpustat_new();