More comments

author: Jason King <jason.king@joyent.com> 2018-10-19 21:13:06 -0500
committer: Jason King <jason.king@joyent.com> 2020-12-08 19:13:03 +0000
commit: 88880f2384ccc4a2382a0fd56138ff5a6862866f (patch)
tree: 0f598e8d6cc6e10c04026af5ecee92a5a8afaa20
parent: e751b029b28ab40ab0a552ff16815ed0be38d284 (diff)
download: illumos-joyent-88880f2384ccc4a2382a0fd56138ff5a6862866f.tar.gz
3 files changed, 133 insertions, 59 deletions
diff --git a/usr/src/cmd/intrd/intrd.c b/usr/src/cmd/intrd/intrd.c
index 4a7faf360a..fa0ef14e45 100644
--- a/usr/src/cmd/intrd/intrd.c
+++ b/usr/src/cmd/intrd/intrd.c
@@ -47,6 +47,10 @@ static void delta_save(stats_t **, size_t, stats_t *, uint_t);
 static load_t *calc_load(stats_t *);
 static void load_free(load_t *);
 
+uint_t cfg_interval = 10;
+uint_t cfg_retry_interval = 1;
+uint_t cfg_idle_interval = 45;
+
 uint_t max_cpu;
 
 #ifdef DEBUG
@@ -264,8 +268,17 @@ loop(const config_t *restrict cfg, kstat_ctl_t *restrict kcp)
 
 	for (;; sleep(interval)) {
 		stats_free(stats[gen]);
-		if ((stats[gen] = stats_get(cfg, kcp, interval)) == NULL)
+
+		/*
+		 * If there was a temporary error, retry sooner than a
+		 * regular interval.
+		 */
+		if ((stats[gen] = stats_get(cfg, kcp, interval)) == NULL) {
+			interval = cfg_retry_interval;
 			continue;
+		}
+
+		interval = cfg_interval;
 
 		delta = stats_delta(stats[gen], stats[gen ^ 1]);
 		gen ^= 1;
@@ -282,6 +295,7 @@ loop(const config_t *restrict cfg, kstat_ctl_t *restrict kcp)
 
 		stats_dump(sum);
 
+		/*  XXX: temporary just to show the data */
 		{
 			stats_t *st = (sum != NULL) ? sum : delta;
 			load_t *load = calc_load(st);
@@ -292,6 +306,10 @@ loop(const config_t *restrict cfg, kstat_ctl_t *restrict kcp)
 	}
 }
 
+/*
+ * Add newdelta to the front of deltas, and remove any entries in delta
+ * from more than statslen seconds ago.
+ */
 static void
 delta_save(stats_t **deltas, size_t n, stats_t *newdelta, uint_t statslen)
 {
@@ -395,6 +413,39 @@ load_free(load_t *ld)
 	free(ld);
 }
 
+/*
+ * Like nicenum, but assumes the value is * 10^(-9) units
+ */
+void
+nanonicenum(uint64_t val, char *buf, size_t buflen)
+{
+	static const char units[] = "num KMGTPE";
+	static const size_t index_max = 9;
+	uint64_t divisor = 1;
+	int index = 0;
+	char u;
+
+	while (index < index_max) {
+		uint64_t newdiv = divisor * 1024;
+
+		if (val < newdiv)
+			break;
+		divisor = newdiv;
+		index++;
+	}
+	u = units[index];
+
+	if (val % divisor == 0) {
+		(void) snprintf(buf, buflen, "%llu%c", val / divisor, u);
+	} else {
+		for (int i = 2; i >= 0; i--) {
+			if (snprintf(buf, buflen, "%.*f%c", i,
+			    (double)val / divisor, u) <= 5)
+				return;
+		}
+	}
+}
+
 char *
 xstrdup(const char *s)
 {
diff --git a/usr/src/cmd/intrd/intrd.h b/usr/src/cmd/intrd/intrd.h
index c4bf5d96c4..3bc00d2ec3 100644
--- a/usr/src/cmd/intrd/intrd.h
+++ b/usr/src/cmd/intrd/intrd.h
@@ -40,6 +40,9 @@ typedef struct config {
 	double	cfg_mindelta;
 } config_t;
 
+/*
+ * An interrupt vector, corresponding to the data from a pci_intrs kstat.
+ */
 typedef struct ivec {
 	list_node_t	ivec_node;
 	hrtime_t	ivec_snaptime;
@@ -57,6 +60,10 @@ typedef struct ivec {
 	char		ivec_type[16];	/* sizeof kstat_named_t.value.c */
 } ivec_t;
 
+/*
+ * The stats corresponding to the cpu:sys kstat, as well as the locality
+ * group this CPU is in, and all the interrupts assigned to this CPU.
+ */
 typedef struct cpustat {
 	hrtime_t	cs_snaptime;
 	int		cs_cpuid;
@@ -70,6 +77,9 @@ typedef struct cpustat {
 	size_t		cs_nivecs;
 } cpustat_t;
 
+/*
+ * The locality group data.
+ */
 typedef struct cpugrp {
 	lgrp_id_t	cg_id;
 	lgrp_id_t	cg_parent;
@@ -101,6 +111,14 @@ typedef struct stats {
 } stats_t;
 #define STATS_CPU(_st, _id) (_st)->sts_cpu[(_id)]
 
+/*
+ * The calculations used to evaluate the interrupt load on the system are
+ * both the ratio of time spent servicing device interrupts compared to the
+ * total time, as well as the average interrupt time per cpu.  In addition,
+ * we keep a reference to ivec consuming the most time.  A load_t is created
+ * per cpu, as well as per locality group.  The load_t's for lgrps are
+ * aggregated over all the cpus in a given lgrp.
+ */
 typedef struct load {
 	uint64_t	ld_total;
 	uint64_t	ld_intrtotal;
@@ -115,6 +133,9 @@ typedef struct load {
     ((LOAD_BIGINT_LOAD(_l1) > LOAD_BIGINT_LOAD(_l2)) ? \
      (_l1)->ld_bigint : (_l2)->ld_bigint)
 
+extern uint_t cfg_interval;
+extern uint_t cfg_retry_interval;
+extern uint_t cfg_idle_interval;
 extern uint_t max_cpu;
 
 typedef enum intrd_walk_ret {
@@ -129,18 +150,9 @@ intrd_walk_ret_t cpu_iter(stats_t *, cpu_itercb_t, void *);
 stats_t *stats_get(const config_t *restrict, kstat_ctl_t *restrict, uint_t);
 stats_t *stats_delta(const stats_t *, const stats_t *);
 stats_t *stats_sum(stats_t * const*, size_t, size_t *);
-stats_t *stats_dup(const stats_t *);
 void stats_free(stats_t *);
 void stats_dump(const stats_t *);
 
-cpustat_t *cpustat_new(void);
-cpustat_t *cpustat_dup(const cpustat_t *);
-void cpustat_free(cpustat_t *);
-
-ivec_t *ivec_new(void);
-ivec_t *ivec_dup(const ivec_t *);
-void ivec_free(ivec_t *);
-
 char *xstrdup(const char *);
 void *xcalloc(size_t, size_t);
 void *xreallocarray(void *, size_t, size_t);
diff --git a/usr/src/cmd/intrd/intrd_kstat.c b/usr/src/cmd/intrd/intrd_kstat.c
index ba737eee60..c6e68e6ef2 100644
--- a/usr/src/cmd/intrd/intrd_kstat.c
+++ b/usr/src/cmd/intrd/intrd_kstat.c
@@ -56,7 +56,22 @@ static boolean_t ivec_shared_intr(const ivec_t *, const ivec_t *);
 static boolean_t ivec_shared_msi(const ivec_t *, const ivec_t *);
 
 static stats_t *stats_new(void);
+static stats_t *stats_dup(const stats_t*);
 
+static cpustat_t *cpustat_new(void);
+static cpustat_t *cpustat_dup(const cpustat_t *);
+static void cpustat_free(cpustat_t *);
+
+static ivec_t *ivec_new(void);
+static ivec_t *ivec_dup(const ivec_t *);
+static void ivec_free(ivec_t *);
+
+/*
+ * Get a kstat snapshot and assemble it into a stats_t.  If the fraction
+ * of the time spent creating the stats_t compared to the polling interval
+ * (interval) is > cfg_tooslow, we return NULL.  If we encounter a non-fatal
+ * error reading the kstats, we asll return NULL.
+ */
 stats_t *
 stats_get(const config_t *restrict cfg, kstat_ctl_t *restrict kcp,
     uint_t interval)
@@ -90,6 +105,7 @@ stats_get(const config_t *restrict cfg, kstat_ctl_t *restrict kcp,
 	}
 
 	if (getstat_tooslow(sts, interval, cfg->cfg_tooslow)) {
+		errno = ETIME;
 		goto fail;
 	}
 
@@ -105,7 +121,6 @@ stats_get(const config_t *restrict cfg, kstat_ctl_t *restrict kcp,
 	return (sts);
 
 fail:
-	printf("%s fail\n", __func__);
 	stats_free(sts);
 	return (NULL);
 }
@@ -189,11 +204,6 @@ fail:
 	return (B_FALSE);
 }
 
-/*
- * Combine ivec_t's for any shared interrupts into a single consolidated
- * entry (since they have to move together).  On X86, also group MSI
- * interrupts for the same device (for similar reasons).
- */
 static int
 ivec_cmp(const void *a, const void *b)
 {
@@ -218,6 +228,11 @@ ivec_cmp(const void *a, const void *b)
 	return (0);
 }
 
+/*
+ * Combine ivec_t's for any shared interrupts into a single consolidated
+ * entry (since they have to move together).  On X86, also group MSI
+ * interrupts for the same device (for similar reasons).
+ */
 static intrd_walk_ret_t
 consolidate_ivec_cb(stats_t *stp, cpustat_t *cs, void *arg)
 {
@@ -229,6 +244,11 @@ consolidate_ivec_cb(stats_t *stp, cpustat_t *cs, void *arg)
 	ivec_t *temp[cs->cs_nivecs];
 	size_t n, i, j;
 
+	/*
+	 * We create an array of pointers to all the ivec_t's on this cpu,
+	 * then sort them so that any potential shared interrupts will
+	 * be adjacent in the array.
+	 */
 	n = 0;
 	for (iv = list_head(ivlist); iv != NULL; iv = list_next(ivlist, iv))
 		temp[n++] = iv;
@@ -236,6 +256,10 @@ consolidate_ivec_cb(stats_t *stp, cpustat_t *cs, void *arg)
 
 	qsort(temp, n, sizeof (ivec_t *), ivec_cmp);
 
+	/*
+	 * For each ivec, look at the next ivec in the array and consolidate
+	 * each successive ivec as long as they are shared.
+	 */
 	for (i = 0; i < n; i = j) {
 		iv = temp[i];
 
@@ -268,6 +292,9 @@ consolidate_ivecs(stats_t *stp)
 static boolean_t
 ivec_shared_intr(const ivec_t *i1, const ivec_t *i2)
 {
+	/*
+	 * XXX This needs to be revisited
+	 */
 #if 0
 	if (i1->ivec_ino != i2->ivec_ino)
 		return (B_FALSE);
@@ -292,6 +319,7 @@ ivec_shared_msi(const ivec_t *i1, const ivec_t *i2)
 #endif
 }
 
+#if 0
 static int
 ivec_cmp_msi(const void *a, const void *b)
 {
@@ -315,6 +343,7 @@ ivec_cmp_msi(const void *a, const void *b)
 		return (0);
 	return ((l->ivec_instance < r->ivec_instance) ? -1 : 1);
 }
+#endif
 
 static intrd_walk_ret_t
 get_cpu(kstat_ctl_t *restrict kcp, kstat_t *restrict ksp, void *restrict arg)
@@ -394,6 +423,9 @@ get_cpu(kstat_ctl_t *restrict kcp, kstat_t *restrict ksp, void *restrict arg)
 static intrd_walk_ret_t
 get_ivecs(kstat_ctl_t *restrict kcp, kstat_t *restrict ksp, void *restrict arg)
 {
+	/*
+	 * Also cache the field indexes for the pci_intrs kstat.
+	 */
 	static int cookie = -1;
 	static int cpu = -1;
 	static int buspath = -1;
@@ -484,8 +516,8 @@ get_ivecs(kstat_ctl_t *restrict kcp, kstat_t *restrict ksp, void *restrict arg)
 }
 
 /*
- * Determine if the amount of time spent collecting our stats, as well as set
- * the min and max timestamp of all the stats collected in stp.
+ * Determine if the ratio of time spent creating stp compared to the polling
+ * interval is > tooslow.
  */
 static boolean_t
 getstat_tooslow(stats_t *stp, uint_t interval, double tooslow)
@@ -499,7 +531,7 @@ getstat_tooslow(stats_t *stp, uint_t interval, double tooslow)
 	diff = stp->sts_maxtime - stp->sts_mintime;
 	nanonicenum(diff, numbuf, sizeof (numbuf));
 
-	portion = (double)diff / (double)(interval * NANOSEC);
+	portion = (double)diff / (double)((uint64_t)interval * NANOSEC);
 
 	syslog(LOG_DEBUG,
 	    "spent %.1f%% of the polling interval collecting stats "
@@ -574,6 +606,10 @@ stats_delta_cb(stats_t *stp, cpustat_t *cs, void *arg)
 	return (INTRD_WALK_NEXT);
 }
 
+/*
+ * If a change in the system configuration (different cpus online, new/deleted
+ * interrupts, etc.) return B_TRUE, otherwise return B_FALSE.
+ */
 static boolean_t
 stats_differ(const stats_t *s1, const stats_t *s2)
 {
@@ -617,6 +653,9 @@ stats_differ(const stats_t *s1, const stats_t *s2)
 	return (B_FALSE);
 }
 
+/*
+ * Compute what is effectively s1 - prev.
+ */
 stats_t *
 stats_delta(const stats_t *restrict st, const stats_t *restrict prev)
 {
@@ -684,6 +723,11 @@ stats_sum_cb(stats_t *sum, cpustat_t *cs, void *arg)
 	return (INTRD_WALK_NEXT);
 }
 
+/*
+ * Given a collection of n deltas, combine them together to represent a larger
+ * time interval, skipping any that don't represent the same configuration.
+ * Sets *total to the number of deltas that were used.
+ */
 stats_t *
 stats_sum(stats_t * const *restrict deltas, size_t n, size_t *restrict total)
 {
@@ -733,7 +777,7 @@ stlgrp_copy(const cpugrp_t *src, cpugrp_t *dst)
 	dst->cg_nchildren = src->cg_nchildren;
 }
 
-stats_t *
+static stats_t *
 stats_dup(const stats_t *src)
 {
 	stats_t *stp;
@@ -755,39 +799,6 @@ stats_dup(const stats_t *src)
 	return (stp);
 }
 
-/*
- * Like nicenum, but assumes the value is * 10^(-9) units
- */
-void
-nanonicenum(uint64_t val, char *buf, size_t buflen)
-{
-	static const char units[] = "num KMGTPE";
-	static const size_t index_max = 9;
-	uint64_t divisor = 1;
-	int index = 0;
-	char u;
-
-	while (index < index_max) {
-		uint64_t newdiv = divisor * 1024;
-
-		if (val < newdiv)
-			break;
-		divisor = newdiv;
-		index++;
-	}
-	u = units[index];
-
-	if (val % divisor == 0) {
-		(void) snprintf(buf, buflen, "%llu%c", val / divisor, u);
-	} else {
-		for (int i = 2; i >= 0; i--) {
-			if (snprintf(buf, buflen, "%.*f%c", i,
-			    (double)val / divisor, u) <= 5)
-				return;
-		}
-	}
-}
-
 static intrd_walk_ret_t
 stats_dump_cb(stats_t *stp, cpustat_t *cs, void *dummy __unused)
 {
@@ -896,7 +907,7 @@ stats_free(stats_t *stp)
 	umem_cache_free(stats_cache, stp);
 }
 
-ivec_t *
+static ivec_t *
 ivec_dup(const ivec_t *iv)
 {
 	ivec_t *newiv = ivec_new();
@@ -918,13 +929,13 @@ ivec_dup(const ivec_t *iv)
 	return (newiv);
 }
 
-ivec_t *
+static ivec_t *
 ivec_new(void)
 {
 	return (umem_cache_alloc(ivec_cache, UMEM_NOFAIL));
 }
 
-void
+static void
 ivec_free(ivec_t *iv)
 {
 	if (iv == NULL)
@@ -942,13 +953,13 @@ ivec_free(ivec_t *iv)
 	umem_cache_free(ivec_cache, iv);
 }
 
-cpustat_t *
+static cpustat_t *
 cpustat_new(void)
 {
 	return (umem_cache_alloc(cpustat_cache, UMEM_NOFAIL));
 }
 
-void
+static void
 cpustat_free(cpustat_t *cs)
 {
 	if (cs == NULL)
@@ -971,7 +982,7 @@ cpustat_free(cpustat_t *cs)
 	umem_cache_free(cpustat_cache, cs);
 }
 
-cpustat_t *
+static cpustat_t *
 cpustat_dup(const cpustat_t *src)
 {
 	cpustat_t *cs = cpustat_new();
author	Jason King <jason.king@joyent.com>	2018-10-19 21:13:06 -0500
committer	Jason King <jason.king@joyent.com>	2020-12-08 19:13:03 +0000
commit	88880f2384ccc4a2382a0fd56138ff5a6862866f (patch)
tree	0f598e8d6cc6e10c04026af5ecee92a5a8afaa20
parent	e751b029b28ab40ab0a552ff16815ed0be38d284 (diff)
download	illumos-joyent-88880f2384ccc4a2382a0fd56138ff5a6862866f.tar.gz