xref: /illumos-gate/usr/src/uts/intel/pcbe/core_pcbe.c (revision a1e3874e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2019 Joyent, Inc.
24  */
25 
26 /*
27  * This file contains preset event names from the Performance Application
28  * Programming Interface v3.5 which included the following notice:
29  *
30  *                             Copyright (c) 2005,6
31  *                           Innovative Computing Labs
32  *                         Computer Science Department,
33  *                            University of Tennessee,
34  *                                 Knoxville, TN.
35  *                              All Rights Reserved.
36  *
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions are met:
40  *
41  *    * Redistributions of source code must retain the above copyright notice,
42  *      this list of conditions and the following disclaimer.
43  *    * Redistributions in binary form must reproduce the above copyright
44  *      notice, this list of conditions and the following disclaimer in the
45  *      documentation and/or other materials provided with the distribution.
46  *    * Neither the name of the University of Tennessee nor the names of its
47  *      contributors may be used to endorse or promote products derived from
48  *      this software without specific prior written permission.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
51  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
54  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
55  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
56  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
57  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
58  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
59  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
60  * POSSIBILITY OF SUCH DAMAGE.
61  *
62  *
63  * This open source software license conforms to the BSD License template.
64  */
65 
66 
67 /*
68  * Performance Counter Back-End for Intel processors supporting Architectural
69  * Performance Monitoring.
70  */
71 
72 #include <sys/cpuvar.h>
73 #include <sys/param.h>
74 #include <sys/cpc_impl.h>
75 #include <sys/cpc_pcbe.h>
76 #include <sys/modctl.h>
77 #include <sys/inttypes.h>
78 #include <sys/systm.h>
79 #include <sys/cmn_err.h>
80 #include <sys/x86_archext.h>
81 #include <sys/sdt.h>
82 #include <sys/archsystm.h>
83 #include <sys/privregs.h>
84 #include <sys/ddi.h>
85 #include <sys/sunddi.h>
86 #include <sys/cred.h>
87 #include <sys/policy.h>
88 
89 #include "core_pcbe_table.h"
90 #include <core_pcbe_cpcgen.h>
91 
92 static int core_pcbe_init(void);
93 static uint_t core_pcbe_ncounters(void);
94 static const char *core_pcbe_impl_name(void);
95 static const char *core_pcbe_cpuref(void);
96 static char *core_pcbe_list_events(uint_t picnum);
97 static char *core_pcbe_list_attrs(void);
98 static uint64_t core_pcbe_event_coverage(char *event);
99 static uint64_t core_pcbe_overflow_bitmap(void);
100 static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
101     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
102     void *token);
103 static void core_pcbe_program(void *token);
104 static void core_pcbe_allstop(void);
105 static void core_pcbe_sample(void *token);
106 static void core_pcbe_free(void *config);
107 
108 #define	FALSE	0
109 #define	TRUE	1
110 
111 /* Counter Type */
112 #define	CORE_GPC	0	/* General-Purpose Counter (GPC) */
113 #define	CORE_FFC	1	/* Fixed-Function Counter (FFC) */
114 
115 /* MSR Addresses */
116 #define	GPC_BASE_PMC		0x00c1	/* First GPC */
117 #define	GPC_BASE_PES		0x0186	/* First GPC Event Select register */
118 #define	FFC_BASE_PMC		0x0309	/* First FFC */
119 #define	PERF_FIXED_CTR_CTRL	0x038d	/* Used to enable/disable FFCs */
120 #define	PERF_GLOBAL_STATUS	0x038e	/* Overflow status register */
121 #define	PERF_GLOBAL_CTRL	0x038f	/* Used to enable/disable counting */
122 #define	PERF_GLOBAL_OVF_CTRL	0x0390	/* Used to clear overflow status */
123 
124 /*
125  * Processor Event Select register fields
126  */
127 #define	CORE_USR	(1ULL << 16)	/* Count while not in ring 0 */
128 #define	CORE_OS		(1ULL << 17)	/* Count while in ring 0 */
129 #define	CORE_EDGE	(1ULL << 18)	/* Enable edge detection */
130 #define	CORE_PC		(1ULL << 19)	/* Enable pin control */
131 #define	CORE_INT	(1ULL << 20)	/* Enable interrupt on overflow */
132 #define	CORE_EN		(1ULL << 22)	/* Enable counting */
133 #define	CORE_INV	(1ULL << 23)	/* Invert the CMASK */
134 #define	CORE_ANYTHR	(1ULL << 21)	/* Count event for any thread on core */
135 
136 #define	CORE_UMASK_SHIFT	8
137 #define	CORE_UMASK_MASK		0xffu
138 #define	CORE_CMASK_SHIFT	24
139 #define	CORE_CMASK_MASK		0xffu
140 
141 /*
142  * Fixed-function counter attributes
143  */
144 #define	CORE_FFC_OS_EN	(1ULL << 0)	/* Count while not in ring 0 */
145 #define	CORE_FFC_USR_EN	(1ULL << 1)	/* Count while in ring 1 */
146 #define	CORE_FFC_ANYTHR	(1ULL << 2)	/* Count event for any thread on core */
147 #define	CORE_FFC_PMI	(1ULL << 3)	/* Enable interrupt on overflow */
148 
149 /*
150  * Number of bits for specifying each FFC's attributes in the control register
151  */
152 #define	CORE_FFC_ATTR_SIZE	4
153 
154 /*
155  * CondChgd and OvfBuffer fields of global status and overflow control registers
156  */
157 #define	CONDCHGD	(1ULL << 63)
158 #define	OVFBUFFER	(1ULL << 62)
159 #define	MASK_CONDCHGD_OVFBUFFER	(CONDCHGD | OVFBUFFER)
160 
161 #define	ALL_STOPPED	0ULL
162 
163 #define	BITMASK_XBITS(x)	((1ull << (x)) - 1ull)
164 
165 /*
166  * Only the lower 32-bits can be written to in the general-purpose
167  * counters.  The higher bits are extended from bit 31; all ones if
168  * bit 31 is one and all zeros otherwise.
169  *
170  * The fixed-function counters do not have this restriction.
171  */
172 #define	BITS_EXTENDED_FROM_31	(BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31))
173 
174 #define	WRMSR(msr, value)						\
175 	wrmsr((msr), (value));						\
176 	DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value));
177 
178 #define	RDMSR(msr, value)						\
179 	(value) = rdmsr((msr));						\
180 	DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value));
181 
182 typedef struct core_pcbe_config {
183 	uint64_t	core_rawpic;
184 	uint64_t	core_ctl;	/* Event Select bits */
185 	uint64_t	core_pmc;	/* Counter register address */
186 	uint64_t	core_pes;	/* Event Select register address */
187 	uint_t		core_picno;
188 	uint8_t		core_pictype;	/* CORE_GPC or CORE_FFC */
189 } core_pcbe_config_t;
190 
191 pcbe_ops_t core_pcbe_ops = {
192 	PCBE_VER_1,			/* pcbe_ver */
193 	CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE,	/* pcbe_caps */
194 	core_pcbe_ncounters,		/* pcbe_ncounters */
195 	core_pcbe_impl_name,		/* pcbe_impl_name */
196 	core_pcbe_cpuref,		/* pcbe_cpuref */
197 	core_pcbe_list_events,		/* pcbe_list_events */
198 	core_pcbe_list_attrs,		/* pcbe_list_attrs */
199 	core_pcbe_event_coverage,	/* pcbe_event_coverage */
200 	core_pcbe_overflow_bitmap,	/* pcbe_overflow_bitmap */
201 	core_pcbe_configure,		/* pcbe_configure */
202 	core_pcbe_program,		/* pcbe_program */
203 	core_pcbe_allstop,		/* pcbe_allstop */
204 	core_pcbe_sample,		/* pcbe_sample */
205 	core_pcbe_free			/* pcbe_free */
206 };
207 
208 struct nametable_core_uarch {
209 	const char	*name;
210 	uint64_t	restricted_bits;
211 	uint8_t		event_num;
212 };
213 
214 /*
215  * Counting an event for all cores or all bus agents requires cpc_cpu privileges
216  */
217 #define	ALL_CORES	(1ULL << 15)
218 #define	ALL_AGENTS	(1ULL << 13)
219 
220 struct generic_events {
221 	const char	*name;
222 	uint8_t		event_num;
223 	uint8_t		umask;
224 };
225 
226 static const struct generic_events cmn_generic_events[] = {
227 	{ "PAPI_tot_cyc", 0x3c, 0x00 }, /* cpu_clk_unhalted.thread_p/core */
228 	{ "PAPI_tot_ins", 0xc0, 0x00 }, /* inst_retired.any_p		  */
229 	{ "PAPI_br_ins",  0xc4, 0x0c }, /* br_inst_retired.taken	  */
230 	{ "PAPI_br_msp",  0xc5, 0x00 }, /* br_inst_retired.mispred	  */
231 	{ "PAPI_br_ntk",  0xc4, 0x03 },
232 				/* br_inst_retired.pred_not_taken|pred_taken */
233 	{ "PAPI_br_prc",  0xc4, 0x05 },
234 				/* br_inst_retired.pred_not_taken|pred_taken */
235 	{ "PAPI_hw_int",  0xc8, 0x00 }, /* hw_int_rvc			  */
236 	{ "PAPI_tot_iis", 0xaa, 0x01 }, /* macro_insts.decoded		  */
237 	{ "PAPI_l1_dca",  0x43, 0x01 }, /* l1d_all_ref			  */
238 	{ "PAPI_l1_icm",  0x81, 0x00 }, /* l1i_misses			  */
239 	{ "PAPI_l1_icr",  0x80, 0x00 }, /* l1i_reads			  */
240 	{ "PAPI_l1_tcw",  0x41, 0x0f }, /* l1d_cache_st.mesi		  */
241 	{ "PAPI_l2_stm",  0x2a, 0x41 }, /* l2_st.self.i_state		  */
242 	{ "PAPI_l2_tca",  0x2e, 0x4f }, /* l2_rqsts.self.demand.mesi	  */
243 	{ "PAPI_l2_tch",  0x2e, 0x4e }, /* l2_rqsts.mes			  */
244 	{ "PAPI_l2_tcm",  0x2e, 0x41 }, /* l2_rqsts.self.demand.i_state   */
245 	{ "PAPI_l2_tcw",  0x2a, 0x4f }, /* l2_st.self.mesi		  */
246 	{ "PAPI_ld_ins",  0xc0, 0x01 }, /* inst_retired.loads		  */
247 	{ "PAPI_lst_ins", 0xc0, 0x03 }, /* inst_retired.loads|stores	  */
248 	{ "PAPI_sr_ins",  0xc0, 0x02 }, /* inst_retired.stores		  */
249 	{ "PAPI_tlb_dm",  0x08, 0x01 }, /* dtlb_misses.any		  */
250 	{ "PAPI_tlb_im",  0x82, 0x12 }, /* itlb.small_miss|large_miss	  */
251 	{ "PAPI_tlb_tl",  0x0c, 0x03 }, /* page_walks			  */
252 	{ "",		  NT_END, 0  }
253 };
254 
255 static const struct generic_events generic_events_pic0[] = {
256 	{ "PAPI_l1_dcm",  0xcb, 0x01 }, /* mem_load_retired.l1d_miss */
257 	{ "",		  NT_END, 0  }
258 };
259 
260 /*
261  * The events listed in the following table can be counted on all
262  * general-purpose counters on processors that are of Penryn and Merom Family
263  */
264 static const struct nametable_core_uarch cmn_gpc_events_core_uarch[] = {
265 	/* Alphabetical order of event name */
266 
267 	{ "baclears",			0x0,	0xe6 },
268 	{ "bogus_br",			0x0,	0xe4 },
269 	{ "br_bac_missp_exec",		0x0,	0x8a },
270 
271 	{ "br_call_exec",		0x0,	0x92 },
272 	{ "br_call_missp_exec",		0x0,	0x93 },
273 	{ "br_cnd_exec",		0x0,	0x8b },
274 
275 	{ "br_cnd_missp_exec",		0x0,	0x8c },
276 	{ "br_ind_call_exec",		0x0,	0x94 },
277 	{ "br_ind_exec",		0x0,	0x8d },
278 
279 	{ "br_ind_missp_exec",		0x0,	0x8e },
280 	{ "br_inst_decoded",		0x0,	0xe0 },
281 	{ "br_inst_exec",		0x0,	0x88 },
282 
283 	{ "br_inst_retired",		0x0,	0xc4 },
284 	{ "br_inst_retired_mispred",	0x0,	0xc5 },
285 	{ "br_missp_exec",		0x0,	0x89 },
286 
287 	{ "br_ret_bac_missp_exec",	0x0,	0x91 },
288 	{ "br_ret_exec",		0x0,	0x8f },
289 	{ "br_ret_missp_exec",		0x0,	0x90 },
290 
291 	{ "br_tkn_bubble_1",		0x0,	0x97 },
292 	{ "br_tkn_bubble_2",		0x0,	0x98 },
293 	{ "bus_bnr_drv",		ALL_AGENTS,	0x61 },
294 
295 	{ "bus_data_rcv",		ALL_CORES,	0x64 },
296 	{ "bus_drdy_clocks",		ALL_AGENTS,	0x62 },
297 	{ "bus_hit_drv",		ALL_AGENTS,	0x7a },
298 
299 	{ "bus_hitm_drv",		ALL_AGENTS,	0x7b },
300 	{ "bus_io_wait",		ALL_CORES,	0x7f },
301 	{ "bus_lock_clocks",		ALL_CORES | ALL_AGENTS,	0x63 },
302 
303 	{ "bus_request_outstanding",	ALL_CORES | ALL_AGENTS,	0x60 },
304 	{ "bus_trans_any",		ALL_CORES | ALL_AGENTS,	0x70 },
305 	{ "bus_trans_brd",		ALL_CORES | ALL_AGENTS,	0x65 },
306 
307 	{ "bus_trans_burst",		ALL_CORES | ALL_AGENTS,	0x6e },
308 	{ "bus_trans_def",		ALL_CORES | ALL_AGENTS,	0x6d },
309 	{ "bus_trans_ifetch",		ALL_CORES | ALL_AGENTS,	0x68 },
310 
311 	{ "bus_trans_inval",		ALL_CORES | ALL_AGENTS,	0x69 },
312 	{ "bus_trans_io",		ALL_CORES | ALL_AGENTS,	0x6c },
313 	{ "bus_trans_mem",		ALL_CORES | ALL_AGENTS,	0x6f },
314 
315 	{ "bus_trans_p",		ALL_CORES | ALL_AGENTS,	0x6b },
316 	{ "bus_trans_pwr",		ALL_CORES | ALL_AGENTS,	0x6a },
317 	{ "bus_trans_rfo",		ALL_CORES | ALL_AGENTS,	0x66 },
318 
319 	{ "bus_trans_wb",		ALL_CORES | ALL_AGENTS,	0x67 },
320 	{ "busq_empty",			ALL_CORES,	0x7d },
321 	{ "cmp_snoop",			ALL_CORES,	0x78 },
322 
323 	{ "cpu_clk_unhalted",		0x0,	0x3c },
324 	{ "cycles_int",			0x0,	0xc6 },
325 	{ "cycles_l1i_mem_stalled",	0x0,	0x86 },
326 
327 	{ "dtlb_misses",		0x0,	0x08 },
328 	{ "eist_trans",			0x0,	0x3a },
329 	{ "esp",			0x0,	0xab },
330 
331 	{ "ext_snoop",			ALL_AGENTS,	0x77 },
332 	{ "fp_mmx_trans",		0x0,	0xcc },
333 	{ "hw_int_rcv",			0x0,	0xc8 },
334 
335 	{ "ild_stall",			0x0,	0x87 },
336 	{ "inst_queue",			0x0,	0x83 },
337 	{ "inst_retired",		0x0,	0xc0 },
338 
339 	{ "itlb",			0x0,	0x82 },
340 	{ "itlb_miss_retired",		0x0,	0xc9 },
341 	{ "l1d_all_ref",		0x0,	0x43 },
342 
343 	{ "l1d_cache_ld",		0x0,	0x40 },
344 	{ "l1d_cache_lock",		0x0,	0x42 },
345 	{ "l1d_cache_st",		0x0,	0x41 },
346 
347 	{ "l1d_m_evict",		0x0,	0x47 },
348 	{ "l1d_m_repl",			0x0,	0x46 },
349 	{ "l1d_pend_miss",		0x0,	0x48 },
350 
351 	{ "l1d_prefetch",		0x0,	0x4e },
352 	{ "l1d_repl",			0x0,	0x45 },
353 	{ "l1d_split",			0x0,	0x49 },
354 
355 	{ "l1i_misses",			0x0,	0x81 },
356 	{ "l1i_reads",			0x0,	0x80 },
357 	{ "l2_ads",			ALL_CORES,	0x21 },
358 
359 	{ "l2_dbus_busy_rd",		ALL_CORES,	0x23 },
360 	{ "l2_ifetch",			ALL_CORES,	0x28 },
361 	{ "l2_ld",			ALL_CORES,	0x29 },
362 
363 	{ "l2_lines_in",		ALL_CORES,	0x24 },
364 	{ "l2_lines_out",		ALL_CORES,	0x26 },
365 	{ "l2_lock",			ALL_CORES,	0x2b },
366 
367 	{ "l2_m_lines_in",		ALL_CORES,	0x25 },
368 	{ "l2_m_lines_out",		ALL_CORES,	0x27 },
369 	{ "l2_no_req",			ALL_CORES,	0x32 },
370 
371 	{ "l2_reject_busq",		ALL_CORES,	0x30 },
372 	{ "l2_rqsts",			ALL_CORES,	0x2e },
373 	{ "l2_st",			ALL_CORES,	0x2a },
374 
375 	{ "load_block",			0x0,	0x03 },
376 	{ "load_hit_pre",		0x0,	0x4c },
377 	{ "machine_nukes",		0x0,	0xc3 },
378 
379 	{ "macro_insts",		0x0,	0xaa },
380 	{ "memory_disambiguation",	0x0,	0x09 },
381 	{ "misalign_mem_ref",		0x0,	0x05 },
382 	{ "page_walks",			0x0,	0x0c },
383 
384 	{ "pref_rqsts_dn",		0x0,	0xf8 },
385 	{ "pref_rqsts_up",		0x0,	0xf0 },
386 	{ "rat_stalls",			0x0,	0xd2 },
387 
388 	{ "resource_stalls",		0x0,	0xdc },
389 	{ "rs_uops_dispatched",		0x0,	0xa0 },
390 	{ "seg_reg_renames",		0x0,	0xd5 },
391 
392 	{ "seg_rename_stalls",		0x0,	0xd4 },
393 	{ "segment_reg_loads",		0x0,	0x06 },
394 	{ "simd_assist",		0x0,	0xcd },
395 
396 	{ "simd_comp_inst_retired",	0x0,	0xca },
397 	{ "simd_inst_retired",		0x0,	0xc7 },
398 	{ "simd_instr_retired",		0x0,	0xce },
399 
400 	{ "simd_sat_instr_retired",	0x0,	0xcf },
401 	{ "simd_sat_uop_exec",		0x0,	0xb1 },
402 	{ "simd_uop_type_exec",		0x0,	0xb3 },
403 
404 	{ "simd_uops_exec",		0x0,	0xb0 },
405 	{ "snoop_stall_drv",		ALL_CORES | ALL_AGENTS,	0x7e },
406 	{ "sse_pre_exec",		0x0,	0x07 },
407 
408 	{ "sse_pre_miss",		0x0,	0x4b },
409 	{ "store_block",		0x0,	0x04 },
410 	{ "thermal_trip",		0x0,	0x3b },
411 
412 	{ "uops_retired",		0x0,	0xc2 },
413 	{ "x87_ops_retired",		0x0,	0xc1 },
414 	{ "",				0x0,	NT_END }
415 };
416 
417 /*
418  * If any of the pic specific events require privileges, make sure to add a
419  * check in configure_gpc() to find whether an event hard-coded as a number by
420  * the user has any privilege requirements
421  */
422 static const struct nametable_core_uarch pic0_events[] = {
423 	/* Alphabetical order of event name */
424 
425 	{ "cycles_div_busy",		0x0,	0x14 },
426 	{ "fp_comp_ops_exe",		0x0,	0x10 },
427 	{ "idle_during_div",		0x0,	0x18 },
428 
429 	{ "mem_load_retired",		0x0,	0xcb },
430 	{ "rs_uops_dispatched_port",	0x0,	0xa1 },
431 	{ "",				0x0,	NT_END }
432 };
433 
434 static const struct nametable_core_uarch pic1_events[] = {
435 	/* Alphabetical order of event name */
436 
437 	{ "delayed_bypass",	0x0,	0x19 },
438 	{ "div",		0x0,	0x13 },
439 	{ "fp_assist",		0x0,	0x11 },
440 
441 	{ "mul",		0x0,	0x12 },
442 	{ "",			0x0,	NT_END }
443 };
444 
445 /* FFC entries must be in order */
446 static char *ffc_names_non_htt[] = {
447 	"instr_retired.any",
448 	"cpu_clk_unhalted.core",
449 	"cpu_clk_unhalted.ref",
450 	NULL
451 };
452 
453 static char *ffc_names_htt[] = {
454 	"instr_retired.any",
455 	"cpu_clk_unhalted.thread",
456 	"cpu_clk_unhalted.ref",
457 	NULL
458 };
459 
460 static char *ffc_genericnames[] = {
461 	"PAPI_tot_ins",
462 	"PAPI_tot_cyc",
463 	"",
464 	NULL
465 };
466 
467 static char	**ffc_names = NULL;
468 static char	**ffc_allnames = NULL;
469 static char	**gpc_names = NULL;
470 static uint32_t	versionid;
471 static uint64_t	num_gpc;
472 static uint64_t	width_gpc;
473 static uint64_t	mask_gpc;
474 static uint64_t	num_ffc;
475 static uint64_t	width_ffc;
476 static uint64_t	mask_ffc;
477 static uint_t	total_pmc;
478 static uint64_t	control_ffc;
479 static uint64_t	control_gpc;
480 static uint64_t	control_mask;
481 static uint32_t	arch_events_vector;
482 
483 #define	IMPL_NAME_LEN 100
484 static char core_impl_name[IMPL_NAME_LEN];
485 
486 static const char *core_cpuref =
487 	"See https://download.01.org/perfmon/index/ or Chapers 18 and 19 " \
488 	"of the \"Intel 64 and IA-32 Architectures Software Developer's " \
489 	"Manual Volume 3: System Programming Guide\" Order Number: " \
490 	"325384-062US, March 2017.";
491 
492 
493 /* Architectural events */
494 #define	ARCH_EVENTS_COMMON					\
495 	{ 0xc0, 0x00, C_ALL, "inst_retired.any_p" },		\
496 	{ 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" },	\
497 	{ 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" },	\
498 	{ 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" },	\
499 	{ 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" },	\
500 	{ 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" }
501 
502 static const struct events_table_t arch_events_table_non_htt[] = {
503 	{ 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.core" },
504 	ARCH_EVENTS_COMMON
505 };
506 
507 static const struct events_table_t arch_events_table_htt[] = {
508 	{ 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" },
509 	ARCH_EVENTS_COMMON
510 };
511 
512 static char *arch_genevents_table[] = {
513 	"PAPI_tot_cyc", /* cpu_clk_unhalted.thread_p/core */
514 	"PAPI_tot_ins", /* inst_retired.any_p		  */
515 	"",		/* cpu_clk_unhalted.ref_p	  */
516 	"",		/* longest_lat_cache.reference	  */
517 	"",		/* longest_lat_cache.miss	  */
518 	"",		/* br_inst_retired.all_branches	  */
519 	"",		/* br_misp_retired.all_branches	  */
520 };
521 
522 static const struct events_table_t *arch_events_table = NULL;
523 static uint64_t known_arch_events;
524 static uint64_t known_ffc_num;
525 static const struct events_table_t *events_table = NULL;
526 
527 /*
528  * Initialize string containing list of supported general-purpose counter
529  * events for processors of Penryn and Merom Family
530  */
531 static void
pcbe_init_core_uarch()532 pcbe_init_core_uarch()
533 {
534 	const struct nametable_core_uarch	*n;
535 	const struct generic_events		*k;
536 	const struct nametable_core_uarch	*picspecific_events;
537 	const struct generic_events		*picspecific_genericevents;
538 	size_t			common_size;
539 	size_t			size;
540 	uint64_t		i;
541 
542 	gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
543 
544 	/* Calculate space needed to save all the common event names */
545 	common_size = 0;
546 	for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END; n++) {
547 		common_size += strlen(n->name) + 1;
548 	}
549 
550 	for (k = cmn_generic_events; k->event_num != NT_END; k++) {
551 		common_size += strlen(k->name) + 1;
552 	}
553 
554 	for (i = 0; i < num_gpc; i++) {
555 		size = 0;
556 		picspecific_genericevents = NULL;
557 
558 		switch (i) {
559 			case 0:
560 				picspecific_events = pic0_events;
561 				picspecific_genericevents = generic_events_pic0;
562 				break;
563 			case 1:
564 				picspecific_events = pic1_events;
565 				break;
566 			default:
567 				picspecific_events = NULL;
568 				break;
569 		}
570 		if (picspecific_events != NULL) {
571 			for (n = picspecific_events;
572 			    n->event_num != NT_END;
573 			    n++) {
574 				size += strlen(n->name) + 1;
575 			}
576 		}
577 		if (picspecific_genericevents != NULL) {
578 			for (k = picspecific_genericevents;
579 			    k->event_num != NT_END; k++) {
580 				size += strlen(k->name) + 1;
581 			}
582 		}
583 
584 		gpc_names[i] =
585 		    kmem_alloc(size + common_size + 1, KM_SLEEP);
586 
587 		gpc_names[i][0] = '\0';
588 		if (picspecific_events != NULL) {
589 			for (n = picspecific_events;
590 			    n->event_num != NT_END; n++) {
591 				(void) strcat(gpc_names[i], n->name);
592 				(void) strcat(gpc_names[i], ",");
593 			}
594 		}
595 		if (picspecific_genericevents != NULL) {
596 			for (k = picspecific_genericevents;
597 			    k->event_num != NT_END; k++) {
598 				(void) strcat(gpc_names[i], k->name);
599 				(void) strcat(gpc_names[i], ",");
600 			}
601 		}
602 		for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END;
603 		    n++) {
604 			(void) strcat(gpc_names[i], n->name);
605 			(void) strcat(gpc_names[i], ",");
606 		}
607 		for (k = cmn_generic_events; k->event_num != NT_END; k++) {
608 			(void) strcat(gpc_names[i], k->name);
609 			(void) strcat(gpc_names[i], ",");
610 		}
611 
612 		/*
613 		 * Remove trailing comma.
614 		 */
615 		gpc_names[i][common_size + size - 1] = '\0';
616 	}
617 }
618 
619 static int
core_pcbe_init(void)620 core_pcbe_init(void)
621 {
622 	struct cpuid_regs	cp;
623 	size_t			size;
624 	uint64_t		i;
625 	uint64_t		j;
626 	uint64_t		arch_events_vector_length;
627 	size_t			arch_events_string_length;
628 	uint_t			model, stepping;
629 
630 	if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
631 		return (-1);
632 
633 	/* Obtain Basic CPUID information */
634 	cp.cp_eax = 0x0;
635 	(void) __cpuid_insn(&cp);
636 
637 	/* No Architectural Performance Monitoring Leaf returned by CPUID */
638 	if (cp.cp_eax < 0xa) {
639 		return (-1);
640 	}
641 
642 	/* Obtain the Architectural Performance Monitoring Leaf */
643 	cp.cp_eax = 0xa;
644 	(void) __cpuid_insn(&cp);
645 
646 	versionid = cp.cp_eax & 0xFF;
647 
648 	/*
649 	 * Fixed-Function Counters (FFC)
650 	 *
651 	 * All Family 6 Model 15 and Model 23 processors have fixed-function
652 	 * counters.  These counters were made Architectural with
653 	 * Family 6 Model 15 Stepping 9.
654 	 */
655 	switch (versionid) {
656 
657 		case 0:
658 			return (-1);
659 
660 		case 2:
661 			num_ffc = cp.cp_edx & 0x1F;
662 			width_ffc = (cp.cp_edx >> 5) & 0xFF;
663 
664 			/*
665 			 * Some processors have an errata (AW34) where
666 			 * versionid is reported as 2 when actually 1.
667 			 * In this case, fixed-function counters are
668 			 * model-specific as in Version 1.
669 			 */
670 			if (num_ffc != 0) {
671 				break;
672 			}
673 			/* FALLTHROUGH */
674 		case 1:
675 			num_ffc = 3;
676 			width_ffc = 40;
677 			versionid = 1;
678 			break;
679 
680 		default:
681 			num_ffc = cp.cp_edx & 0x1F;
682 			width_ffc = (cp.cp_edx >> 5) & 0xFF;
683 			break;
684 	}
685 
686 
687 	if (num_ffc >= 64)
688 		return (-1);
689 
690 	/* Set HTT-specific names of architectural & FFC events */
691 	if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
692 		ffc_names = ffc_names_htt;
693 		arch_events_table = arch_events_table_htt;
694 		known_arch_events =
695 		    sizeof (arch_events_table_htt) /
696 		    sizeof (struct events_table_t);
697 		known_ffc_num =
698 		    sizeof (ffc_names_htt) / sizeof (char *);
699 	} else {
700 		ffc_names = ffc_names_non_htt;
701 		arch_events_table = arch_events_table_non_htt;
702 		known_arch_events =
703 		    sizeof (arch_events_table_non_htt) /
704 		    sizeof (struct events_table_t);
705 		known_ffc_num =
706 		    sizeof (ffc_names_non_htt) / sizeof (char *);
707 	}
708 
709 	if (num_ffc >= known_ffc_num) {
710 		/*
711 		 * The system seems to have more fixed-function counters than
712 		 * what this PCBE is able to handle correctly.  Default to the
713 		 * maximum number of fixed-function counters that this driver
714 		 * is aware of.
715 		 */
716 		num_ffc = known_ffc_num - 1;
717 	}
718 
719 	mask_ffc = BITMASK_XBITS(width_ffc);
720 	control_ffc = BITMASK_XBITS(num_ffc);
721 
722 	/*
723 	 * General Purpose Counters (GPC)
724 	 */
725 	num_gpc = (cp.cp_eax >> 8) & 0xFF;
726 	width_gpc = (cp.cp_eax >> 16) & 0xFF;
727 
728 	if (num_gpc >= 64)
729 		return (-1);
730 
731 	mask_gpc = BITMASK_XBITS(width_gpc);
732 
733 	control_gpc = BITMASK_XBITS(num_gpc);
734 
735 	control_mask = (control_ffc << 32) | control_gpc;
736 
737 	total_pmc = num_gpc + num_ffc;
738 	if (total_pmc > 64) {
739 		/* Too wide for the overflow bitmap */
740 		return (-1);
741 	}
742 
743 	/* FFC names */
744 	ffc_allnames = kmem_alloc(num_ffc * sizeof (char *), KM_SLEEP);
745 	for (i = 0; i < num_ffc; i++) {
746 		ffc_allnames[i] = kmem_alloc(
747 		    strlen(ffc_names[i]) + strlen(ffc_genericnames[i]) + 2,
748 		    KM_SLEEP);
749 
750 		ffc_allnames[i][0] = '\0';
751 		(void) strcat(ffc_allnames[i], ffc_names[i]);
752 
753 		/* Check if this ffc has a generic name */
754 		if (strcmp(ffc_genericnames[i], "") != 0) {
755 			(void) strcat(ffc_allnames[i], ",");
756 			(void) strcat(ffc_allnames[i], ffc_genericnames[i]);
757 		}
758 	}
759 
760 	/* GPC events for Family 6 Models 15, 23 and 29 only */
761 	if ((cpuid_getfamily(CPU) == 6) &&
762 	    ((cpuid_getmodel(CPU) == 15) || (cpuid_getmodel(CPU) == 23) ||
763 	    (cpuid_getmodel(CPU) == 29))) {
764 		(void) snprintf(core_impl_name, IMPL_NAME_LEN,
765 		    "Core Microarchitecture");
766 		pcbe_init_core_uarch();
767 		return (0);
768 	}
769 
770 	(void) snprintf(core_impl_name, IMPL_NAME_LEN,
771 	    "Intel Arch PerfMon v%d on Family %d Model %d",
772 	    versionid, cpuid_getfamily(CPU), cpuid_getmodel(CPU));
773 
774 	/*
775 	 * Architectural events
776 	 */
777 	arch_events_vector_length = (cp.cp_eax >> 24) & 0xFF;
778 
779 	ASSERT(known_arch_events == arch_events_vector_length);
780 
781 	/*
782 	 * To handle the case where a new performance monitoring setup is run
783 	 * on a non-debug kernel
784 	 */
785 	if (known_arch_events > arch_events_vector_length) {
786 		known_arch_events = arch_events_vector_length;
787 	} else {
788 		arch_events_vector_length = known_arch_events;
789 	}
790 
791 	arch_events_vector = cp.cp_ebx &
792 	    BITMASK_XBITS(arch_events_vector_length);
793 
794 	/*
795 	 * Process architectural and non-architectural events using GPC
796 	 */
797 	if (num_gpc > 0) {
798 
799 		gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
800 
801 		/* Calculate space required for the architectural gpc events */
802 		arch_events_string_length = 0;
803 		for (i = 0; i < known_arch_events; i++) {
804 			if (((1U << i) & arch_events_vector) == 0) {
805 				arch_events_string_length +=
806 				    strlen(arch_events_table[i].name) + 1;
807 				if (strcmp(arch_genevents_table[i], "") != 0) {
808 					arch_events_string_length +=
809 					    strlen(arch_genevents_table[i]) + 1;
810 				}
811 			}
812 		}
813 
814 		/* Non-architectural events list */
815 		model = cpuid_getmodel(CPU);
816 		stepping = cpuid_getstep(CPU);
817 		events_table = core_cpcgen_table(model, stepping);
818 
819 		for (i = 0; i < num_gpc; i++) {
820 
821 			/*
822 			 * Determine length of all supported event names
823 			 * (architectural + non-architectural)
824 			 */
825 			size = arch_events_string_length;
826 			for (j = 0; events_table != NULL &&
827 			    events_table[j].eventselect != NT_END;
828 			    j++) {
829 				if (C(i) & events_table[j].supported_counters) {
830 					size += strlen(events_table[j].name) +
831 					    1;
832 				}
833 			}
834 
835 			/* Allocate memory for this pics list */
836 			gpc_names[i] = kmem_alloc(size + 1, KM_SLEEP);
837 			gpc_names[i][0] = '\0';
838 			if (size == 0) {
839 				continue;
840 			}
841 
842 			/*
843 			 * Create the list of all supported events
844 			 * (architectural + non-architectural)
845 			 */
846 			for (j = 0; j < known_arch_events; j++) {
847 				if (((1U << j) & arch_events_vector) == 0) {
848 					(void) strcat(gpc_names[i],
849 					    arch_events_table[j].name);
850 					(void) strcat(gpc_names[i], ",");
851 					if (strcmp(
852 					    arch_genevents_table[j], "")
853 					    != 0) {
854 						(void) strcat(gpc_names[i],
855 						    arch_genevents_table[j]);
856 						(void) strcat(gpc_names[i],
857 						    ",");
858 					}
859 				}
860 			}
861 
862 			for (j = 0; events_table != NULL &&
863 			    events_table[j].eventselect != NT_END;
864 			    j++) {
865 				if (C(i) & events_table[j].supported_counters) {
866 					(void) strcat(gpc_names[i],
867 					    events_table[j].name);
868 					(void) strcat(gpc_names[i], ",");
869 				}
870 			}
871 
872 			/* Remove trailing comma */
873 			gpc_names[i][size - 1] = '\0';
874 		}
875 	}
876 
877 	return (0);
878 }
879 
core_pcbe_ncounters()880 static uint_t core_pcbe_ncounters()
881 {
882 	return (total_pmc);
883 }
884 
core_pcbe_impl_name(void)885 static const char *core_pcbe_impl_name(void)
886 {
887 	return (core_impl_name);
888 }
889 
core_pcbe_cpuref(void)890 static const char *core_pcbe_cpuref(void)
891 {
892 	return (core_cpuref);
893 }
894 
core_pcbe_list_events(uint_t picnum)895 static char *core_pcbe_list_events(uint_t picnum)
896 {
897 	ASSERT(picnum < cpc_ncounters);
898 
899 	if (picnum < num_gpc) {
900 		return (gpc_names[picnum]);
901 	} else {
902 		return (ffc_allnames[picnum - num_gpc]);
903 	}
904 }
905 
core_pcbe_list_attrs(void)906 static char *core_pcbe_list_attrs(void)
907 {
908 	if (versionid >= 3) {
909 		return ("edge,inv,umask,cmask,anythr");
910 	} else {
911 		return ("edge,pc,inv,umask,cmask");
912 	}
913 }
914 
915 static const struct nametable_core_uarch *
find_gpcevent_core_uarch(char * name,const struct nametable_core_uarch * nametable)916 find_gpcevent_core_uarch(char *name,
917     const struct nametable_core_uarch *nametable)
918 {
919 	const struct nametable_core_uarch *n;
920 	int compare_result = -1;
921 
922 	for (n = nametable; n->event_num != NT_END; n++) {
923 		compare_result = strcmp(name, n->name);
924 		if (compare_result <= 0) {
925 			break;
926 		}
927 	}
928 
929 	if (compare_result == 0) {
930 		return (n);
931 	}
932 
933 	return (NULL);
934 }
935 
936 static const struct generic_events *
find_generic_events(char * name,const struct generic_events * table)937 find_generic_events(char *name, const struct generic_events *table)
938 {
939 	const struct generic_events *n;
940 
941 	for (n = table; n->event_num != NT_END; n++) {
942 		if (strcmp(name, n->name) == 0) {
943 			return (n);
944 		};
945 	}
946 
947 	return (NULL);
948 }
949 
950 static const struct events_table_t *
find_gpcevent(char * name)951 find_gpcevent(char *name)
952 {
953 	int i;
954 
955 	/* Search architectural events */
956 	for (i = 0; i < known_arch_events; i++) {
957 		if (strcmp(name, arch_events_table[i].name) == 0 ||
958 		    strcmp(name, arch_genevents_table[i]) == 0) {
959 			if (((1U << i) & arch_events_vector) == 0) {
960 				return (&arch_events_table[i]);
961 			}
962 		}
963 	}
964 
965 	/* Search non-architectural events */
966 	if (events_table != NULL) {
967 		for (i = 0; events_table[i].eventselect != NT_END; i++) {
968 			if (strcmp(name, events_table[i].name) == 0) {
969 				return (&events_table[i]);
970 			}
971 		}
972 	}
973 
974 	return (NULL);
975 }
976 
977 static uint64_t
core_pcbe_event_coverage(char * event)978 core_pcbe_event_coverage(char *event)
979 {
980 	uint64_t bitmap;
981 	uint64_t bitmask;
982 	const struct events_table_t *n;
983 	int i;
984 
985 	bitmap = 0;
986 
987 	/* Is it an event that a GPC can track? */
988 	if (versionid >= 3) {
989 		n = find_gpcevent(event);
990 		if (n != NULL) {
991 			bitmap |= (n->supported_counters &
992 			    BITMASK_XBITS(num_gpc));
993 		}
994 	} else {
995 		if (find_generic_events(event, cmn_generic_events) != NULL) {
996 			bitmap |= BITMASK_XBITS(num_gpc);
997 		} else if (find_generic_events(event,
998 		    generic_events_pic0) != NULL) {
999 			bitmap |= 1ULL;
1000 		} else if (find_gpcevent_core_uarch(event,
1001 		    cmn_gpc_events_core_uarch) != NULL) {
1002 			bitmap |= BITMASK_XBITS(num_gpc);
1003 		} else if (find_gpcevent_core_uarch(event, pic0_events) !=
1004 		    NULL) {
1005 			bitmap |= 1ULL;
1006 		} else if (find_gpcevent_core_uarch(event, pic1_events) !=
1007 		    NULL) {
1008 			bitmap |= 1ULL << 1;
1009 		}
1010 	}
1011 
1012 	/* Check if the event can be counted in the fixed-function counters */
1013 	if (num_ffc > 0) {
1014 		bitmask = 1ULL << num_gpc;
1015 		for (i = 0; i < num_ffc; i++) {
1016 			if (strcmp(event, ffc_names[i]) == 0) {
1017 				bitmap |= bitmask;
1018 			} else if (strcmp(event, ffc_genericnames[i]) == 0) {
1019 				bitmap |= bitmask;
1020 			}
1021 			bitmask = bitmask << 1;
1022 		}
1023 	}
1024 
1025 	return (bitmap);
1026 }
1027 
1028 static uint64_t
core_pcbe_overflow_bitmap(void)1029 core_pcbe_overflow_bitmap(void)
1030 {
1031 	uint64_t interrupt_status;
1032 	uint64_t intrbits_ffc;
1033 	uint64_t intrbits_gpc;
1034 	extern int kcpc_hw_overflow_intr_installed;
1035 	uint64_t overflow_bitmap;
1036 
1037 	RDMSR(PERF_GLOBAL_STATUS, interrupt_status);
1038 	WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status);
1039 
1040 	interrupt_status = interrupt_status & control_mask;
1041 	intrbits_ffc = (interrupt_status >> 32) & control_ffc;
1042 	intrbits_gpc = interrupt_status & control_gpc;
1043 	overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc;
1044 
1045 	ASSERT(kcpc_hw_overflow_intr_installed);
1046 	(*kcpc_hw_enable_cpc_intr)();
1047 
1048 	return (overflow_bitmap);
1049 }
1050 
1051 static int
check_cpc_securitypolicy(core_pcbe_config_t * conf,const struct nametable_core_uarch * n)1052 check_cpc_securitypolicy(core_pcbe_config_t *conf,
1053     const struct nametable_core_uarch *n)
1054 {
1055 	if (conf->core_ctl & n->restricted_bits) {
1056 		if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1057 			return (CPC_ATTR_REQUIRES_PRIVILEGE);
1058 		}
1059 	}
1060 	return (0);
1061 }
1062 
1063 static int
configure_gpc(uint_t picnum,char * event,uint64_t preset,uint32_t flags,uint_t nattrs,kcpc_attr_t * attrs,void ** data)1064 configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1065     uint_t nattrs, kcpc_attr_t *attrs, void **data)
1066 {
1067 	core_pcbe_config_t	conf;
1068 	const struct nametable_core_uarch	*n;
1069 	const struct generic_events *k = NULL;
1070 	const struct nametable_core_uarch	*m;
1071 	const struct nametable_core_uarch	*picspecific_events;
1072 	struct nametable_core_uarch	nt_raw = { "", 0x0, 0x0 };
1073 	uint_t			i;
1074 	long			event_num;
1075 	const struct events_table_t *eventcode;
1076 
1077 	if (((preset & BITS_EXTENDED_FROM_31) != 0) &&
1078 	    ((preset & BITS_EXTENDED_FROM_31) !=
1079 	    BITS_EXTENDED_FROM_31)) {
1080 
1081 		/*
1082 		 * Bits beyond bit-31 in the general-purpose counters can only
1083 		 * be written to by extension of bit 31.  We cannot preset
1084 		 * these bits to any value other than all 1s or all 0s.
1085 		 */
1086 		return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1087 	}
1088 
1089 	if (versionid >= 3) {
1090 		eventcode = find_gpcevent(event);
1091 		if (eventcode != NULL) {
1092 			if ((C(picnum) & eventcode->supported_counters) == 0) {
1093 				return (CPC_PIC_NOT_CAPABLE);
1094 			}
1095 			if (nattrs > 0 &&
1096 			    (strncmp("PAPI_", event, 5) == 0)) {
1097 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1098 			}
1099 			conf.core_ctl = eventcode->eventselect;
1100 			conf.core_ctl |= eventcode->unitmask <<
1101 			    CORE_UMASK_SHIFT;
1102 		} else {
1103 			/* Event specified as raw event code */
1104 			if (ddi_strtol(event, NULL, 0, &event_num) != 0) {
1105 				return (CPC_INVALID_EVENT);
1106 			}
1107 			conf.core_ctl = event_num & 0xFF;
1108 		}
1109 	} else {
1110 		if ((k = find_generic_events(event, cmn_generic_events)) !=
1111 		    NULL ||
1112 		    (picnum == 0 &&
1113 		    (k = find_generic_events(event, generic_events_pic0)) !=
1114 		    NULL)) {
1115 			if (nattrs > 0) {
1116 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1117 			}
1118 			conf.core_ctl = k->event_num;
1119 			conf.core_ctl |= k->umask << CORE_UMASK_SHIFT;
1120 		} else {
1121 			/* Not a generic event */
1122 
1123 			n = find_gpcevent_core_uarch(event,
1124 			    cmn_gpc_events_core_uarch);
1125 			if (n == NULL) {
1126 				switch (picnum) {
1127 					case 0:
1128 						picspecific_events =
1129 						    pic0_events;
1130 						break;
1131 					case 1:
1132 						picspecific_events =
1133 						    pic1_events;
1134 						break;
1135 					default:
1136 						picspecific_events = NULL;
1137 						break;
1138 				}
1139 				if (picspecific_events != NULL) {
1140 					n = find_gpcevent_core_uarch(event,
1141 					    picspecific_events);
1142 				}
1143 			}
1144 			if (n == NULL) {
1145 
1146 				/*
1147 				 * Check if this is a case where the event was
1148 				 * specified directly by its event number
1149 				 * instead of its name string.
1150 				 */
1151 				if (ddi_strtol(event, NULL, 0, &event_num) !=
1152 				    0) {
1153 					return (CPC_INVALID_EVENT);
1154 				}
1155 
1156 				event_num = event_num & 0xFF;
1157 
1158 				/*
1159 				 * Search the event table to find out if the
1160 				 * event specified has an privilege
1161 				 * requirements.  Currently none of the
1162 				 * pic-specific counters have any privilege
1163 				 * requirements.  Hence only the table
1164 				 * cmn_gpc_events_core_uarch is searched.
1165 				 */
1166 				for (m = cmn_gpc_events_core_uarch;
1167 				    m->event_num != NT_END;
1168 				    m++) {
1169 					if (event_num == m->event_num) {
1170 						break;
1171 					}
1172 				}
1173 				if (m->event_num == NT_END) {
1174 					nt_raw.event_num = (uint8_t)event_num;
1175 					n = &nt_raw;
1176 				} else {
1177 					n = m;
1178 				}
1179 			}
1180 			conf.core_ctl = n->event_num; /* Event Select */
1181 		}
1182 	}
1183 
1184 
1185 	conf.core_picno = picnum;
1186 	conf.core_pictype = CORE_GPC;
1187 	conf.core_rawpic = preset & mask_gpc;
1188 
1189 	conf.core_pes = GPC_BASE_PES + picnum;
1190 	conf.core_pmc = GPC_BASE_PMC + picnum;
1191 
1192 	for (i = 0; i < nattrs; i++) {
1193 		if (strncmp(attrs[i].ka_name, "umask", 6) == 0) {
1194 			if ((attrs[i].ka_val | CORE_UMASK_MASK) !=
1195 			    CORE_UMASK_MASK) {
1196 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1197 			}
1198 			/* Clear out the default umask */
1199 			conf.core_ctl &= ~ (CORE_UMASK_MASK <<
1200 			    CORE_UMASK_SHIFT);
1201 			/* Use the user provided umask */
1202 			conf.core_ctl |= attrs[i].ka_val <<
1203 			    CORE_UMASK_SHIFT;
1204 		} else  if (strncmp(attrs[i].ka_name, "edge", 6) == 0) {
1205 			if (attrs[i].ka_val != 0)
1206 				conf.core_ctl |= CORE_EDGE;
1207 		} else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) {
1208 			if (attrs[i].ka_val != 0)
1209 				conf.core_ctl |= CORE_INV;
1210 		} else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) {
1211 			if ((attrs[i].ka_val | CORE_CMASK_MASK) !=
1212 			    CORE_CMASK_MASK) {
1213 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1214 			}
1215 			conf.core_ctl |= attrs[i].ka_val <<
1216 			    CORE_CMASK_SHIFT;
1217 		} else if (strncmp(attrs[i].ka_name, "anythr", 7) ==
1218 		    0) {
1219 			if (versionid < 3)
1220 				return (CPC_INVALID_ATTRIBUTE);
1221 			if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1222 				return (CPC_ATTR_REQUIRES_PRIVILEGE);
1223 			}
1224 			if (attrs[i].ka_val != 0)
1225 				conf.core_ctl |= CORE_ANYTHR;
1226 		} else {
1227 			return (CPC_INVALID_ATTRIBUTE);
1228 		}
1229 	}
1230 
1231 	if (flags & CPC_COUNT_USER)
1232 		conf.core_ctl |= CORE_USR;
1233 	if (flags & CPC_COUNT_SYSTEM)
1234 		conf.core_ctl |= CORE_OS;
1235 	if (flags & CPC_OVF_NOTIFY_EMT)
1236 		conf.core_ctl |= CORE_INT;
1237 	conf.core_ctl |= CORE_EN;
1238 
1239 	if (versionid < 3 && k == NULL) {
1240 		if (check_cpc_securitypolicy(&conf, n) != 0) {
1241 			return (CPC_ATTR_REQUIRES_PRIVILEGE);
1242 		}
1243 	}
1244 
1245 	*data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1246 	*((core_pcbe_config_t *)*data) = conf;
1247 
1248 	return (0);
1249 }
1250 
1251 static int
configure_ffc(uint_t picnum,char * event,uint64_t preset,uint32_t flags,uint_t nattrs,kcpc_attr_t * attrs,void ** data)1252 configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1253     uint_t nattrs, kcpc_attr_t *attrs, void **data)
1254 {
1255 	core_pcbe_config_t	*conf;
1256 	uint_t			i;
1257 
1258 	if (picnum - num_gpc >= num_ffc) {
1259 		return (CPC_INVALID_PICNUM);
1260 	}
1261 
1262 	if ((strcmp(ffc_names[picnum-num_gpc], event) != 0) &&
1263 	    (strcmp(ffc_genericnames[picnum-num_gpc], event) != 0)) {
1264 		return (CPC_INVALID_EVENT);
1265 	}
1266 
1267 	if ((versionid < 3) && (nattrs != 0)) {
1268 		return (CPC_INVALID_ATTRIBUTE);
1269 	}
1270 
1271 	conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1272 	conf->core_ctl = 0;
1273 
1274 	for (i = 0; i < nattrs; i++) {
1275 		if (strncmp(attrs[i].ka_name, "anythr", 7) == 0) {
1276 			if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1277 				kmem_free(conf, sizeof (core_pcbe_config_t));
1278 				return (CPC_ATTR_REQUIRES_PRIVILEGE);
1279 			}
1280 			if (attrs[i].ka_val != 0) {
1281 				conf->core_ctl |= CORE_FFC_ANYTHR;
1282 			}
1283 		} else {
1284 			kmem_free(conf, sizeof (core_pcbe_config_t));
1285 			return (CPC_INVALID_ATTRIBUTE);
1286 		}
1287 	}
1288 
1289 	conf->core_picno = picnum;
1290 	conf->core_pictype = CORE_FFC;
1291 	conf->core_rawpic = preset & mask_ffc;
1292 	conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc);
1293 
1294 	/* All fixed-function counters have the same control register */
1295 	conf->core_pes = PERF_FIXED_CTR_CTRL;
1296 
1297 	if (flags & CPC_COUNT_USER)
1298 		conf->core_ctl |= CORE_FFC_USR_EN;
1299 	if (flags & CPC_COUNT_SYSTEM)
1300 		conf->core_ctl |= CORE_FFC_OS_EN;
1301 	if (flags & CPC_OVF_NOTIFY_EMT)
1302 		conf->core_ctl |= CORE_FFC_PMI;
1303 
1304 	*data = conf;
1305 	return (0);
1306 }
1307 
1308 /*ARGSUSED*/
1309 static int
core_pcbe_configure(uint_t picnum,char * event,uint64_t preset,uint32_t flags,uint_t nattrs,kcpc_attr_t * attrs,void ** data,void * token)1310 core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
1311     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
1312     void *token)
1313 {
1314 	int			ret;
1315 	core_pcbe_config_t	*conf;
1316 
1317 	/*
1318 	 * If we've been handed an existing configuration, we need only preset
1319 	 * the counter value.
1320 	 */
1321 	if (*data != NULL) {
1322 		conf = *data;
1323 		ASSERT(conf->core_pictype == CORE_GPC ||
1324 		    conf->core_pictype == CORE_FFC);
1325 		if (conf->core_pictype == CORE_GPC)
1326 			conf->core_rawpic = preset & mask_gpc;
1327 		else /* CORE_FFC */
1328 			conf->core_rawpic = preset & mask_ffc;
1329 		return (0);
1330 	}
1331 
1332 	if (picnum >= total_pmc) {
1333 		return (CPC_INVALID_PICNUM);
1334 	}
1335 
1336 	if (picnum < num_gpc) {
1337 		ret = configure_gpc(picnum, event, preset, flags,
1338 		    nattrs, attrs, data);
1339 	} else {
1340 		ret = configure_ffc(picnum, event, preset, flags,
1341 		    nattrs, attrs, data);
1342 	}
1343 	return (ret);
1344 }
1345 
1346 static void
core_pcbe_program(void * token)1347 core_pcbe_program(void *token)
1348 {
1349 	core_pcbe_config_t	*cfg;
1350 	uint64_t		perf_global_ctrl;
1351 	uint64_t		perf_fixed_ctr_ctrl;
1352 	uint64_t		curcr4;
1353 
1354 	core_pcbe_allstop();
1355 
1356 	curcr4 = getcr4();
1357 	if (kcpc_allow_nonpriv(token))
1358 		/* Allow RDPMC at any ring level */
1359 		setcr4(curcr4 | CR4_PCE);
1360 	else
1361 		/* Allow RDPMC only at ring 0 */
1362 		setcr4(curcr4 & ~CR4_PCE);
1363 
1364 	/* Clear any overflow indicators before programming the counters */
1365 	WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask);
1366 
1367 	cfg = NULL;
1368 	perf_global_ctrl = 0;
1369 	perf_fixed_ctr_ctrl = 0;
1370 	cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL);
1371 	while (cfg != NULL) {
1372 		ASSERT(cfg->core_pictype == CORE_GPC ||
1373 		    cfg->core_pictype == CORE_FFC);
1374 
1375 		if (cfg->core_pictype == CORE_GPC) {
1376 			/*
1377 			 * General-purpose counter registers have write
1378 			 * restrictions where only the lower 32-bits can be
1379 			 * written to.  The rest of the relevant bits are
1380 			 * written to by extension from bit 31 (all ZEROS if
1381 			 * bit-31 is ZERO and all ONE if bit-31 is ONE).  This
1382 			 * makes it possible to write to the counter register
1383 			 * only values that have all ONEs or all ZEROs in the
1384 			 * higher bits.
1385 			 */
1386 			if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) ||
1387 			    ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) ==
1388 			    BITS_EXTENDED_FROM_31)) {
1389 				/*
1390 				 * Straighforward case where the higher bits
1391 				 * are all ZEROs or all ONEs.
1392 				 */
1393 				WRMSR(cfg->core_pmc,
1394 				    (cfg->core_rawpic & mask_gpc));
1395 			} else {
1396 				/*
1397 				 * The high order bits are not all the same.
1398 				 * We save what is currently in the registers
1399 				 * and do not write to it.  When we want to do
1400 				 * a read from this register later (in
1401 				 * core_pcbe_sample()), we subtract the value
1402 				 * we save here to get the actual event count.
1403 				 *
1404 				 * NOTE: As a result, we will not get overflow
1405 				 * interrupts as expected.
1406 				 */
1407 				RDMSR(cfg->core_pmc, cfg->core_rawpic);
1408 				cfg->core_rawpic = cfg->core_rawpic & mask_gpc;
1409 			}
1410 			WRMSR(cfg->core_pes, cfg->core_ctl);
1411 			perf_global_ctrl |= 1ull << cfg->core_picno;
1412 		} else {
1413 			/*
1414 			 * Unlike the general-purpose counters, all relevant
1415 			 * bits of fixed-function counters can be written to.
1416 			 */
1417 			WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc);
1418 
1419 			/*
1420 			 * Collect the control bits for all the
1421 			 * fixed-function counters and write it at one shot
1422 			 * later in this function
1423 			 */
1424 			perf_fixed_ctr_ctrl |= cfg->core_ctl <<
1425 			    ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE);
1426 			perf_global_ctrl |=
1427 			    1ull << (cfg->core_picno - num_gpc + 32);
1428 		}
1429 
1430 		cfg = (core_pcbe_config_t *)
1431 		    kcpc_next_config(token, cfg, NULL);
1432 	}
1433 
1434 	/* Enable all the counters */
1435 	WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl);
1436 	WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl);
1437 }
1438 
1439 static void
core_pcbe_allstop(void)1440 core_pcbe_allstop(void)
1441 {
1442 	/* Disable all the counters together */
1443 	WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED);
1444 
1445 	setcr4(getcr4() & ~CR4_PCE);
1446 }
1447 
1448 static void
core_pcbe_sample(void * token)1449 core_pcbe_sample(void *token)
1450 {
1451 	uint64_t		*daddr;
1452 	uint64_t		curpic;
1453 	core_pcbe_config_t	*cfg;
1454 	uint64_t			counter_mask;
1455 
1456 	cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr);
1457 	while (cfg != NULL) {
1458 		ASSERT(cfg->core_pictype == CORE_GPC ||
1459 		    cfg->core_pictype == CORE_FFC);
1460 
1461 		curpic = rdmsr(cfg->core_pmc);
1462 
1463 		DTRACE_PROBE4(core__pcbe__sample,
1464 		    uint64_t, cfg->core_pmc,
1465 		    uint64_t, curpic,
1466 		    uint64_t, cfg->core_rawpic,
1467 		    uint64_t, *daddr);
1468 
1469 		if (cfg->core_pictype == CORE_GPC) {
1470 			counter_mask = mask_gpc;
1471 		} else {
1472 			counter_mask = mask_ffc;
1473 		}
1474 		curpic = curpic & counter_mask;
1475 		if (curpic >= cfg->core_rawpic) {
1476 			*daddr += curpic - cfg->core_rawpic;
1477 		} else {
1478 			/* Counter overflowed since our last sample */
1479 			*daddr += counter_mask - (cfg->core_rawpic - curpic) +
1480 			    1;
1481 		}
1482 		cfg->core_rawpic = *daddr & counter_mask;
1483 
1484 		cfg =
1485 		    (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr);
1486 	}
1487 }
1488 
1489 static void
core_pcbe_free(void * config)1490 core_pcbe_free(void *config)
1491 {
1492 	kmem_free(config, sizeof (core_pcbe_config_t));
1493 }
1494 
1495 static struct modlpcbe core_modlpcbe = {
1496 	&mod_pcbeops,
1497 	"Core Performance Counters",
1498 	&core_pcbe_ops
1499 };
1500 
1501 static struct modlinkage core_modl = {
1502 	MODREV_1,
1503 	&core_modlpcbe,
1504 };
1505 
1506 int
_init(void)1507 _init(void)
1508 {
1509 	if (core_pcbe_init() != 0) {
1510 		return (ENOTSUP);
1511 	}
1512 	return (mod_install(&core_modl));
1513 }
1514 
1515 int
_fini(void)1516 _fini(void)
1517 {
1518 	return (mod_remove(&core_modl));
1519 }
1520 
1521 int
_info(struct modinfo * mi)1522 _info(struct modinfo *mi)
1523 {
1524 	return (mod_info(&core_modl, mi));
1525 }
1526