1*2918c4a3SJohn Levon /* 2*2918c4a3SJohn Levon * This file and its contents are supplied under the terms of the 3*2918c4a3SJohn Levon * Common Development and Distribution License ("CDDL"), version 1.0. 4*2918c4a3SJohn Levon * You may only use this file in accordance with the terms of version 5*2918c4a3SJohn Levon * 1.0 of the CDDL. 6*2918c4a3SJohn Levon * 7*2918c4a3SJohn Levon * A full copy of the text of the CDDL should have accompanied this 8*2918c4a3SJohn Levon * source. A copy of the CDDL is also available via the Internet at 9*2918c4a3SJohn Levon * http://www.illumos.org/license/CDDL. 10*2918c4a3SJohn Levon */ 11*2918c4a3SJohn Levon 12*2918c4a3SJohn Levon /* 13*2918c4a3SJohn Levon * Copyright (c) 2018, Joyent, Inc. 14*2918c4a3SJohn Levon */ 15*2918c4a3SJohn Levon 16*2918c4a3SJohn Levon /* 17*2918c4a3SJohn Levon * Use a cpu_uarray_t for an array of uint64_t values that are written on a 18*2918c4a3SJohn Levon * per-CPU basis. We align each CPU on a 128-byte boundary (so two cachelines). 19*2918c4a3SJohn Levon * It's not clear why, but this can have a significant effect in multi-socket 20*2918c4a3SJohn Levon * systems running certain benchmarks on a relatively current Intel system. 21*2918c4a3SJohn Levon * 22*2918c4a3SJohn Levon * So the layout is like this, for example: 23*2918c4a3SJohn Levon * 24*2918c4a3SJohn Levon * 0: STAT1 for CPU 0 25*2918c4a3SJohn Levon * 8: STAT2 for CPU 0 26*2918c4a3SJohn Levon * 16: STAT3 for CPU 0 27*2918c4a3SJohn Levon * 24: padding 28*2918c4a3SJohn Levon * 128: STAT1 for CPU 1 29*2918c4a3SJohn Levon * 136: STAT2 for CPU 1 30*2918c4a3SJohn Levon * ... 31*2918c4a3SJohn Levon * 32*2918c4a3SJohn Levon * At collection time, cpu_uarray_sum() can be used to sum the given value index 33*2918c4a3SJohn Levon * across all CPUs, or cpu_uarray_sum_all() sums all stats across all CPUs. 34*2918c4a3SJohn Levon * The summation is done such that it saturates at UINT64_MAX. 35*2918c4a3SJohn Levon */ 36*2918c4a3SJohn Levon 37*2918c4a3SJohn Levon #ifndef _SYS_CPU_UARRAY_H 38*2918c4a3SJohn Levon #define _SYS_CPU_UARRAY_H 39*2918c4a3SJohn Levon 40*2918c4a3SJohn Levon #include <sys/types.h> 41*2918c4a3SJohn Levon 42*2918c4a3SJohn Levon #ifdef __cplusplus 43*2918c4a3SJohn Levon extern "C" { 44*2918c4a3SJohn Levon #endif 45*2918c4a3SJohn Levon 46*2918c4a3SJohn Levon #ifdef _KERNEL 47*2918c4a3SJohn Levon 48*2918c4a3SJohn Levon /* 49*2918c4a3SJohn Levon * Trying to include sysmacros.h for P2ROUNDUP() here is just too painful. 50*2918c4a3SJohn Levon */ 51*2918c4a3SJohn Levon #define CUA_ROUNDUP(x, align) (-(-(x) & -(align))) 52*2918c4a3SJohn Levon #define CUA_ALIGN (128) 53*2918c4a3SJohn Levon #define CUA_CPU_STRIDE(nr_items) \ 54*2918c4a3SJohn Levon CUA_ROUNDUP((nr_items), CUA_ALIGN / sizeof (uint64_t)) 55*2918c4a3SJohn Levon #define CUA_INDEX(nr_items, c, i) (((c) * CUA_CPU_STRIDE(nr_items)) + (i)) 56*2918c4a3SJohn Levon 57*2918c4a3SJohn Levon #define CPU_UARRAY_VAL(cua, cpu_index, stat_index) \ 58*2918c4a3SJohn Levon ((cua)->cu_vals[CUA_INDEX((cua)->cu_nr_items, cpu_index, stat_index)]) 59*2918c4a3SJohn Levon 60*2918c4a3SJohn Levon typedef struct { 61*2918c4a3SJohn Levon uint64_t cu_nr_items; 62*2918c4a3SJohn Levon char cu_pad[CUA_ALIGN - sizeof (uint64_t)]; 63*2918c4a3SJohn Levon #ifdef __lint 64*2918c4a3SJohn Levon volatile uint64_t cu_vals[1]; 65*2918c4a3SJohn Levon #else 66*2918c4a3SJohn Levon volatile uint64_t cu_vals[]; 67*2918c4a3SJohn Levon #endif 68*2918c4a3SJohn Levon } cpu_uarray_t __aligned(CUA_ALIGN); 69*2918c4a3SJohn Levon 70*2918c4a3SJohn Levon extern cpu_uarray_t *cpu_uarray_zalloc(size_t, int); 71*2918c4a3SJohn Levon extern void cpu_uarray_free(cpu_uarray_t *); 72*2918c4a3SJohn Levon extern uint64_t cpu_uarray_sum(cpu_uarray_t *, size_t); 73*2918c4a3SJohn Levon extern uint64_t cpu_uarray_sum_all(cpu_uarray_t *); 74*2918c4a3SJohn Levon 75*2918c4a3SJohn Levon #endif /* _KERNEL */ 76*2918c4a3SJohn Levon 77*2918c4a3SJohn Levon #ifdef __cplusplus 78*2918c4a3SJohn Levon } 79*2918c4a3SJohn Levon #endif 80*2918c4a3SJohn Levon 81*2918c4a3SJohn Levon #endif /* _SYS_CPU_UARRAY_H */ 82