summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/sys
diff options
context:
space:
mode:
authorJohn Levon <john.levon@joyent.com>2018-11-21 10:02:46 +0000
committerRichard Lowe <richlowe@richlowe.net>2018-11-21 20:07:44 +0000
commit2918c4a32d09a835c1eba8b0b02fe1dcb7a83175 (patch)
tree2a30c95e297083f3a778b96ebc3aa87034625ed3 /usr/src/uts/common/sys
parent8e9dfb97a84bbf797669c3ee2aabcc48b9ee13ff (diff)
downloadillumos-gate-2918c4a32d09a835c1eba8b0b02fe1dcb7a83175.tar.gz
9936 atomic ops in syscall_mstate() induce significant overhead
9942 zone secflags are not initialized correctly Reviewed by: Patrick Mooney <patrick.mooney@joyent.com> Reviewed by: Robert Mustacchi <rm@joyent.com> Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Andy Fiddaman <andy@omniosce.org> Reviewed by: Toomas Soome <tsoome@me.com> Approved by: Richard Lowe <richlowe@richlowe.net>
Diffstat (limited to 'usr/src/uts/common/sys')
-rw-r--r--usr/src/uts/common/sys/Makefile3
-rw-r--r--usr/src/uts/common/sys/cpu_uarray.h81
-rw-r--r--usr/src/uts/common/sys/sysmacros.h13
-rw-r--r--usr/src/uts/common/sys/zone.h46
4 files changed, 119 insertions, 24 deletions
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index 66433e9181..8d26a71342 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -21,7 +21,7 @@
#
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2014, Joyent, Inc. All rights reserved.
+# Copyright (c) 2018, Joyent, Inc.
# Copyright 2013 Garrett D'Amore <garrett@damore.org>
# Copyright 2013 Saso Kiselkov. All rights reserved.
# Copyright 2015 Igor Kozhukhov <ikozhukhov@gmail.com>
@@ -138,6 +138,7 @@ CHKHDRS= \
cpc_impl.h \
cpc_pcbe.h \
cpr.h \
+ cpu_uarray.h \
cpupart.h \
cpuvar.h \
crc32.h \
diff --git a/usr/src/uts/common/sys/cpu_uarray.h b/usr/src/uts/common/sys/cpu_uarray.h
new file mode 100644
index 0000000000..9cad772597
--- /dev/null
+++ b/usr/src/uts/common/sys/cpu_uarray.h
@@ -0,0 +1,81 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2018, Joyent, Inc.
+ */
+
+/*
+ * Use a cpu_uarray_t for an array of uint64_t values that are written on a
+ * per-CPU basis. We align each CPU on a 128-byte boundary (so two cachelines).
+ * It's not clear why, but this can have a significant effect in multi-socket
+ * systems running certain benchmarks on a relatively current Intel system.
+ *
+ * So the layout is like this, for example:
+ *
+ * 0: STAT1 for CPU 0
+ * 8: STAT2 for CPU 0
+ * 16: STAT3 for CPU 0
+ * 24: padding
+ * 128: STAT1 for CPU 1
+ * 136: STAT2 for CPU 1
+ * ...
+ *
+ * At collection time, cpu_uarray_sum() can be used to sum the given value index
+ * across all CPUs, or cpu_uarray_sum_all() sums all stats across all CPUs.
+ * The summation is done such that it saturates at UINT64_MAX.
+ */
+
+#ifndef _SYS_CPU_UARRAY_H
+#define _SYS_CPU_UARRAY_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+/*
+ * Trying to include sysmacros.h for P2ROUNDUP() here is just too painful.
+ */
+#define CUA_ROUNDUP(x, align) (-(-(x) & -(align)))
+#define CUA_ALIGN (128)
+#define CUA_CPU_STRIDE(nr_items) \
+ CUA_ROUNDUP((nr_items), CUA_ALIGN / sizeof (uint64_t))
+#define CUA_INDEX(nr_items, c, i) (((c) * CUA_CPU_STRIDE(nr_items)) + (i))
+
+#define CPU_UARRAY_VAL(cua, cpu_index, stat_index) \
+ ((cua)->cu_vals[CUA_INDEX((cua)->cu_nr_items, cpu_index, stat_index)])
+
+typedef struct {
+ uint64_t cu_nr_items;
+ char cu_pad[CUA_ALIGN - sizeof (uint64_t)];
+#ifdef __lint
+ volatile uint64_t cu_vals[1];
+#else
+ volatile uint64_t cu_vals[];
+#endif
+} cpu_uarray_t __aligned(CUA_ALIGN);
+
+extern cpu_uarray_t *cpu_uarray_zalloc(size_t, int);
+extern void cpu_uarray_free(cpu_uarray_t *);
+extern uint64_t cpu_uarray_sum(cpu_uarray_t *, size_t);
+extern uint64_t cpu_uarray_sum_all(cpu_uarray_t *);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CPU_UARRAY_H */
diff --git a/usr/src/uts/common/sys/sysmacros.h b/usr/src/uts/common/sys/sysmacros.h
index 6f5882b54b..5dc6eee0ec 100644
--- a/usr/src/uts/common/sys/sysmacros.h
+++ b/usr/src/uts/common/sys/sysmacros.h
@@ -373,6 +373,19 @@ extern unsigned char bcd_to_byte[256];
#define ARRAY_SIZE(x) (sizeof (x) / sizeof (x[0]))
#endif
+/*
+ * Add a value to a uint64_t that saturates at UINT64_MAX instead of wrapping
+ * around.
+ */
+#define UINT64_OVERFLOW_ADD(val, add) \
+ ((val) > ((val) + (add)) ? (UINT64_MAX) : ((val) + (add)))
+
+/*
+ * Convert to an int64, saturating at INT64_MAX.
+ */
+#define UINT64_OVERFLOW_TO_INT64(uval) \
+ (((uval) > INT64_MAX) ? INT64_MAX : (int64_t)(uval))
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index 27f52c57e2..56fa4b8d87 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2015 Joyent, Inc. All rights reserved.
+ * Copyright 2018 Joyent, Inc.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright 2014 Igor Kozhukhov <ikozhukhov@gmail.com>.
*/
@@ -42,6 +42,7 @@
#include <sys/socket_impl.h>
#include <sys/secflags.h>
#include <netinet/in.h>
+#include <sys/cpu_uarray.h>
#ifdef __cplusplus
extern "C" {
@@ -330,6 +331,15 @@ typedef struct zone_net_data {
#define GLOBAL_ZONEUNIQID 0 /* uniqid of the global zone */
+/*
+ * Indexes into ->zone_ustate array, summing the micro state of all threads in a
+ * particular zone.
+ */
+#define ZONE_USTATE_STIME (0)
+#define ZONE_USTATE_UTIME (1)
+#define ZONE_USTATE_WTIME (2)
+#define ZONE_USTATE_MAX (3)
+
struct pool;
struct brand;
@@ -433,13 +443,13 @@ typedef struct zone {
/* if not emulated */
/*
* zone_lock protects the following fields of a zone_t:
- * zone_ref
- * zone_cred_ref
- * zone_subsys_ref
- * zone_ref_list
- * zone_ntasks
- * zone_flags
- * zone_zsd
+ * zone_ref
+ * zone_cred_ref
+ * zone_subsys_ref
+ * zone_ref_list
+ * zone_ntasks
+ * zone_flags
+ * zone_zsd
* zone_pfexecd
*/
kmutex_t zone_lock;
@@ -543,7 +553,7 @@ typedef struct zone {
boolean_t zone_restart_init; /* Restart init if it dies? */
struct brand *zone_brand; /* zone's brand */
- void *zone_brand_data; /* store brand specific data */
+ void *zone_brand_data; /* store brand specific data */
id_t zone_defaultcid; /* dflt scheduling class id */
kstat_t *zone_swapresv_kstat;
kstat_t *zone_lockedmem_kstat;
@@ -584,22 +594,12 @@ typedef struct zone {
/*
* Misc. kstats and counters for zone cpu-usage aggregation.
- * The zone_Xtime values are the sum of the micro-state accounting
- * values for all threads that are running or have run in the zone.
- * This is tracked in msacct.c as threads change state.
- * The zone_stime is the sum of the LMS_SYSTEM times.
- * The zone_utime is the sum of the LMS_USER times.
- * The zone_wtime is the sum of the LMS_WAIT_CPU times.
- * As with per-thread micro-state accounting values, these values are
- * not scaled to nanosecs. The scaling is done by the
- * zone_misc_kstat_update function when kstats are requested.
*/
kmutex_t zone_misc_lock; /* protects misc statistics */
kstat_t *zone_misc_ksp;
zone_misc_kstat_t *zone_misc_stats;
- uint64_t zone_stime; /* total system time */
- uint64_t zone_utime; /* total user time */
- uint64_t zone_wtime; /* total time waiting in runq */
+ /* Accumulated microstate for all threads in this zone. */
+ cpu_uarray_t *zone_ustate;
/* fork-fail kstat tracking */
uint32_t zone_ffcap; /* hit an rctl cap */
uint32_t zone_ffnoproc; /* get proc/lwp error */
@@ -681,7 +681,7 @@ typedef uint_t zone_key_t;
extern void zone_key_create(zone_key_t *, void *(*)(zoneid_t),
void (*)(zoneid_t, void *), void (*)(zoneid_t, void *));
-extern int zone_key_delete(zone_key_t);
+extern int zone_key_delete(zone_key_t);
extern void *zone_getspecific(zone_key_t, zone_t *);
extern int zone_setspecific(zone_key_t, zone_t *, const void *);
@@ -707,7 +707,7 @@ struct zsd_entry {
void (*zsd_shutdown)(zoneid_t, void *);
void (*zsd_destroy)(zoneid_t, void *);
list_node_t zsd_linkage;
- uint16_t zsd_flags; /* See below */
+ uint16_t zsd_flags; /* See below */
kcondvar_t zsd_cv;
};