xref: /illumos-gate/usr/src/uts/common/sys/cpu_uarray.h (revision 2918c4a3)
1*2918c4a3SJohn Levon /*
2*2918c4a3SJohn Levon  * This file and its contents are supplied under the terms of the
3*2918c4a3SJohn Levon  * Common Development and Distribution License ("CDDL"), version 1.0.
4*2918c4a3SJohn Levon  * You may only use this file in accordance with the terms of version
5*2918c4a3SJohn Levon  * 1.0 of the CDDL.
6*2918c4a3SJohn Levon  *
7*2918c4a3SJohn Levon  * A full copy of the text of the CDDL should have accompanied this
8*2918c4a3SJohn Levon  * source.  A copy of the CDDL is also available via the Internet at
9*2918c4a3SJohn Levon  * http://www.illumos.org/license/CDDL.
10*2918c4a3SJohn Levon  */
11*2918c4a3SJohn Levon 
12*2918c4a3SJohn Levon /*
13*2918c4a3SJohn Levon  * Copyright (c) 2018, Joyent, Inc.
14*2918c4a3SJohn Levon  */
15*2918c4a3SJohn Levon 
16*2918c4a3SJohn Levon /*
17*2918c4a3SJohn Levon  * Use a cpu_uarray_t for an array of uint64_t values that are written on a
18*2918c4a3SJohn Levon  * per-CPU basis.  We align each CPU on a 128-byte boundary (so two cachelines).
19*2918c4a3SJohn Levon  * It's not clear why, but this can have a significant effect in multi-socket
20*2918c4a3SJohn Levon  * systems running certain benchmarks on a relatively current Intel system.
21*2918c4a3SJohn Levon  *
22*2918c4a3SJohn Levon  * So the layout is like this, for example:
23*2918c4a3SJohn Levon  *
24*2918c4a3SJohn Levon  * 0:	STAT1 for CPU 0
25*2918c4a3SJohn Levon  * 8:	STAT2 for CPU 0
26*2918c4a3SJohn Levon  * 16:	STAT3 for CPU 0
27*2918c4a3SJohn Levon  * 24:	padding
28*2918c4a3SJohn Levon  * 128: STAT1 for CPU 1
29*2918c4a3SJohn Levon  * 136: STAT2 for CPU 1
30*2918c4a3SJohn Levon  * ...
31*2918c4a3SJohn Levon  *
32*2918c4a3SJohn Levon  * At collection time, cpu_uarray_sum() can be used to sum the given value index
33*2918c4a3SJohn Levon  * across all CPUs, or cpu_uarray_sum_all() sums all stats across all CPUs.
34*2918c4a3SJohn Levon  * The summation is done such that it saturates at UINT64_MAX.
35*2918c4a3SJohn Levon  */
36*2918c4a3SJohn Levon 
37*2918c4a3SJohn Levon #ifndef	_SYS_CPU_UARRAY_H
38*2918c4a3SJohn Levon #define	_SYS_CPU_UARRAY_H
39*2918c4a3SJohn Levon 
40*2918c4a3SJohn Levon #include <sys/types.h>
41*2918c4a3SJohn Levon 
42*2918c4a3SJohn Levon #ifdef	__cplusplus
43*2918c4a3SJohn Levon extern "C" {
44*2918c4a3SJohn Levon #endif
45*2918c4a3SJohn Levon 
46*2918c4a3SJohn Levon #ifdef _KERNEL
47*2918c4a3SJohn Levon 
48*2918c4a3SJohn Levon /*
49*2918c4a3SJohn Levon  * Trying to include sysmacros.h for P2ROUNDUP() here is just too painful.
50*2918c4a3SJohn Levon  */
51*2918c4a3SJohn Levon #define	CUA_ROUNDUP(x, align) (-(-(x) & -(align)))
52*2918c4a3SJohn Levon #define	CUA_ALIGN (128)
53*2918c4a3SJohn Levon #define	CUA_CPU_STRIDE(nr_items) \
54*2918c4a3SJohn Levon 	CUA_ROUNDUP((nr_items), CUA_ALIGN / sizeof (uint64_t))
55*2918c4a3SJohn Levon #define	CUA_INDEX(nr_items, c, i) (((c) * CUA_CPU_STRIDE(nr_items)) + (i))
56*2918c4a3SJohn Levon 
57*2918c4a3SJohn Levon #define	CPU_UARRAY_VAL(cua, cpu_index, stat_index) \
58*2918c4a3SJohn Levon 	((cua)->cu_vals[CUA_INDEX((cua)->cu_nr_items, cpu_index, stat_index)])
59*2918c4a3SJohn Levon 
60*2918c4a3SJohn Levon typedef struct {
61*2918c4a3SJohn Levon 	uint64_t cu_nr_items;
62*2918c4a3SJohn Levon 	char cu_pad[CUA_ALIGN - sizeof (uint64_t)];
63*2918c4a3SJohn Levon #ifdef	__lint
64*2918c4a3SJohn Levon 	volatile uint64_t cu_vals[1];
65*2918c4a3SJohn Levon #else
66*2918c4a3SJohn Levon 	volatile uint64_t cu_vals[];
67*2918c4a3SJohn Levon #endif
68*2918c4a3SJohn Levon } cpu_uarray_t __aligned(CUA_ALIGN);
69*2918c4a3SJohn Levon 
70*2918c4a3SJohn Levon extern cpu_uarray_t *cpu_uarray_zalloc(size_t, int);
71*2918c4a3SJohn Levon extern void cpu_uarray_free(cpu_uarray_t *);
72*2918c4a3SJohn Levon extern uint64_t cpu_uarray_sum(cpu_uarray_t *, size_t);
73*2918c4a3SJohn Levon extern uint64_t cpu_uarray_sum_all(cpu_uarray_t *);
74*2918c4a3SJohn Levon 
75*2918c4a3SJohn Levon #endif /* _KERNEL */
76*2918c4a3SJohn Levon 
77*2918c4a3SJohn Levon #ifdef	__cplusplus
78*2918c4a3SJohn Levon }
79*2918c4a3SJohn Levon #endif
80*2918c4a3SJohn Levon 
81*2918c4a3SJohn Levon #endif	/* _SYS_CPU_UARRAY_H */
82