summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/sys/cpu_uarray.h
blob: 9cad7725976f55a62d0d1a7aca64d766a79fbc51 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
/*
 * This file and its contents are supplied under the terms of the
 * Common Development and Distribution License ("CDDL"), version 1.0.
 * You may only use this file in accordance with the terms of version
 * 1.0 of the CDDL.
 *
 * A full copy of the text of the CDDL should have accompanied this
 * source.  A copy of the CDDL is also available via the Internet at
 * http://www.illumos.org/license/CDDL.
 */

/*
 * Copyright (c) 2018, Joyent, Inc.
 */

/*
 * Use a cpu_uarray_t for an array of uint64_t values that are written on a
 * per-CPU basis.  We align each CPU on a 128-byte boundary (so two cachelines).
 * It's not clear why, but this can have a significant effect in multi-socket
 * systems running certain benchmarks on a relatively current Intel system.
 *
 * So the layout is like this, for example:
 *
 * 0:	STAT1 for CPU 0
 * 8:	STAT2 for CPU 0
 * 16:	STAT3 for CPU 0
 * 24:	padding
 * 128: STAT1 for CPU 1
 * 136: STAT2 for CPU 1
 * ...
 *
 * At collection time, cpu_uarray_sum() can be used to sum the given value index
 * across all CPUs, or cpu_uarray_sum_all() sums all stats across all CPUs.
 * The summation is done such that it saturates at UINT64_MAX.
 */

#ifndef	_SYS_CPU_UARRAY_H
#define	_SYS_CPU_UARRAY_H

#include <sys/types.h>

#ifdef	__cplusplus
extern "C" {
#endif

#ifdef _KERNEL

/*
 * Trying to include sysmacros.h for P2ROUNDUP() here is just too painful.
 */
#define	CUA_ROUNDUP(x, align) (-(-(x) & -(align)))
#define	CUA_ALIGN (128)
#define	CUA_CPU_STRIDE(nr_items) \
	CUA_ROUNDUP((nr_items), CUA_ALIGN / sizeof (uint64_t))
#define	CUA_INDEX(nr_items, c, i) (((c) * CUA_CPU_STRIDE(nr_items)) + (i))

#define	CPU_UARRAY_VAL(cua, cpu_index, stat_index) \
	((cua)->cu_vals[CUA_INDEX((cua)->cu_nr_items, cpu_index, stat_index)])

typedef struct {
	uint64_t cu_nr_items;
	char cu_pad[CUA_ALIGN - sizeof (uint64_t)];
#ifdef	__lint
	volatile uint64_t cu_vals[1];
#else
	volatile uint64_t cu_vals[];
#endif
} cpu_uarray_t __aligned(CUA_ALIGN);

extern cpu_uarray_t *cpu_uarray_zalloc(size_t, int);
extern void cpu_uarray_free(cpu_uarray_t *);
extern uint64_t cpu_uarray_sum(cpu_uarray_t *, size_t);
extern uint64_t cpu_uarray_sum_all(cpu_uarray_t *);

#endif /* _KERNEL */

#ifdef	__cplusplus
}
#endif

#endif	/* _SYS_CPU_UARRAY_H */