summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/sys/cpu_uarray.h
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/sys/cpu_uarray.h')
-rw-r--r--usr/src/uts/common/sys/cpu_uarray.h81
1 files changed, 81 insertions, 0 deletions
diff --git a/usr/src/uts/common/sys/cpu_uarray.h b/usr/src/uts/common/sys/cpu_uarray.h
new file mode 100644
index 0000000000..9cad772597
--- /dev/null
+++ b/usr/src/uts/common/sys/cpu_uarray.h
@@ -0,0 +1,81 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2018, Joyent, Inc.
+ */
+
+/*
+ * Use a cpu_uarray_t for an array of uint64_t values that are written on a
+ * per-CPU basis. We align each CPU on a 128-byte boundary (so two cachelines).
+ * It's not clear why, but this can have a significant effect in multi-socket
+ * systems running certain benchmarks on a relatively current Intel system.
+ *
+ * So the layout is like this, for example:
+ *
+ * 0: STAT1 for CPU 0
+ * 8: STAT2 for CPU 0
+ * 16: STAT3 for CPU 0
+ * 24: padding
+ * 128: STAT1 for CPU 1
+ * 136: STAT2 for CPU 1
+ * ...
+ *
+ * At collection time, cpu_uarray_sum() can be used to sum the given value index
+ * across all CPUs, or cpu_uarray_sum_all() sums all stats across all CPUs.
+ * The summation is done such that it saturates at UINT64_MAX.
+ */
+
+#ifndef _SYS_CPU_UARRAY_H
+#define _SYS_CPU_UARRAY_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+/*
+ * Trying to include sysmacros.h for P2ROUNDUP() here is just too painful.
+ */
+#define CUA_ROUNDUP(x, align) (-(-(x) & -(align)))
+#define CUA_ALIGN (128)
+#define CUA_CPU_STRIDE(nr_items) \
+ CUA_ROUNDUP((nr_items), CUA_ALIGN / sizeof (uint64_t))
+#define CUA_INDEX(nr_items, c, i) (((c) * CUA_CPU_STRIDE(nr_items)) + (i))
+
+#define CPU_UARRAY_VAL(cua, cpu_index, stat_index) \
+ ((cua)->cu_vals[CUA_INDEX((cua)->cu_nr_items, cpu_index, stat_index)])
+
+typedef struct {
+ uint64_t cu_nr_items;
+ char cu_pad[CUA_ALIGN - sizeof (uint64_t)];
+#ifdef __lint
+ volatile uint64_t cu_vals[1];
+#else
+ volatile uint64_t cu_vals[];
+#endif
+} cpu_uarray_t __aligned(CUA_ALIGN);
+
+extern cpu_uarray_t *cpu_uarray_zalloc(size_t, int);
+extern void cpu_uarray_free(cpu_uarray_t *);
+extern uint64_t cpu_uarray_sum(cpu_uarray_t *, size_t);
+extern uint64_t cpu_uarray_sum_all(cpu_uarray_t *);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CPU_UARRAY_H */