core_pcbe.c revision c18e9bc303e04175d63c5c51206b2ce6f6efe6a4
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2019 Joyent, Inc.
24 */
25
26/*
27 * This file contains preset event names from the Performance Application
28 * Programming Interface v3.5 which included the following notice:
29 *
30 *                             Copyright (c) 2005,6
31 *                           Innovative Computing Labs
32 *                         Computer Science Department,
33 *                            University of Tennessee,
34 *                                 Knoxville, TN.
35 *                              All Rights Reserved.
36 *
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions are met:
40 *
41 *    * Redistributions of source code must retain the above copyright notice,
42 *      this list of conditions and the following disclaimer.
43 *    * Redistributions in binary form must reproduce the above copyright
44 *      notice, this list of conditions and the following disclaimer in the
45 *      documentation and/or other materials provided with the distribution.
46 *    * Neither the name of the University of Tennessee nor the names of its
47 *      contributors may be used to endorse or promote products derived from
48 *      this software without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
51 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
54 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
55 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
56 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
57 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
58 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
59 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
60 * POSSIBILITY OF SUCH DAMAGE.
61 *
62 *
63 * This open source software license conforms to the BSD License template.
64 */
65
66
67/*
68 * Performance Counter Back-End for Intel processors supporting Architectural
69 * Performance Monitoring.
70 */
71
72#include <sys/cpuvar.h>
73#include <sys/param.h>
74#include <sys/cpc_impl.h>
75#include <sys/cpc_pcbe.h>
76#include <sys/modctl.h>
77#include <sys/inttypes.h>
78#include <sys/systm.h>
79#include <sys/cmn_err.h>
80#include <sys/x86_archext.h>
81#include <sys/sdt.h>
82#include <sys/archsystm.h>
83#include <sys/privregs.h>
84#include <sys/ddi.h>
85#include <sys/sunddi.h>
86#include <sys/cred.h>
87#include <sys/policy.h>
88
89#include "core_pcbe_table.h"
90#include <core_pcbe_cpcgen.h>
91
92static int core_pcbe_init(void);
93static uint_t core_pcbe_ncounters(void);
94static const char *core_pcbe_impl_name(void);
95static const char *core_pcbe_cpuref(void);
96static char *core_pcbe_list_events(uint_t picnum);
97static char *core_pcbe_list_attrs(void);
98static uint64_t core_pcbe_event_coverage(char *event);
99static uint64_t core_pcbe_overflow_bitmap(void);
100static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
101    uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
102    void *token);
103static void core_pcbe_program(void *token);
104static void core_pcbe_allstop(void);
105static void core_pcbe_sample(void *token);
106static void core_pcbe_free(void *config);
107
108#define	FALSE	0
109#define	TRUE	1
110
111/* Counter Type */
112#define	CORE_GPC	0	/* General-Purpose Counter (GPC) */
113#define	CORE_FFC	1	/* Fixed-Function Counter (FFC) */
114
115/* MSR Addresses */
116#define	GPC_BASE_PMC		0x00c1	/* First GPC */
117#define	GPC_BASE_PES		0x0186	/* First GPC Event Select register */
118#define	FFC_BASE_PMC		0x0309	/* First FFC */
119#define	PERF_FIXED_CTR_CTRL	0x038d	/* Used to enable/disable FFCs */
120#define	PERF_GLOBAL_STATUS	0x038e	/* Overflow status register */
121#define	PERF_GLOBAL_CTRL	0x038f	/* Used to enable/disable counting */
122#define	PERF_GLOBAL_OVF_CTRL	0x0390	/* Used to clear overflow status */
123
124/*
125 * Processor Event Select register fields
126 */
127#define	CORE_USR	(1ULL << 16)	/* Count while not in ring 0 */
128#define	CORE_OS		(1ULL << 17)	/* Count while in ring 0 */
129#define	CORE_EDGE	(1ULL << 18)	/* Enable edge detection */
130#define	CORE_PC		(1ULL << 19)	/* Enable pin control */
131#define	CORE_INT	(1ULL << 20)	/* Enable interrupt on overflow */
132#define	CORE_EN		(1ULL << 22)	/* Enable counting */
133#define	CORE_INV	(1ULL << 23)	/* Invert the CMASK */
134#define	CORE_ANYTHR	(1ULL << 21)	/* Count event for any thread on core */
135
136#define	CORE_UMASK_SHIFT	8
137#define	CORE_UMASK_MASK		0xffu
138#define	CORE_CMASK_SHIFT	24
139#define	CORE_CMASK_MASK		0xffu
140
141/*
142 * Fixed-function counter attributes
143 */
144#define	CORE_FFC_OS_EN	(1ULL << 0)	/* Count while not in ring 0 */
145#define	CORE_FFC_USR_EN	(1ULL << 1)	/* Count while in ring 1 */
146#define	CORE_FFC_ANYTHR	(1ULL << 2)	/* Count event for any thread on core */
147#define	CORE_FFC_PMI	(1ULL << 3)	/* Enable interrupt on overflow */
148
149/*
150 * Number of bits for specifying each FFC's attributes in the control register
151 */
152#define	CORE_FFC_ATTR_SIZE	4
153
154/*
155 * CondChgd and OvfBuffer fields of global status and overflow control registers
156 */
157#define	CONDCHGD	(1ULL << 63)
158#define	OVFBUFFER	(1ULL << 62)
159#define	MASK_CONDCHGD_OVFBUFFER	(CONDCHGD | OVFBUFFER)
160
161#define	ALL_STOPPED	0ULL
162
163#define	BITMASK_XBITS(x)	((1ull << (x)) - 1ull)
164
165/*
166 * Only the lower 32-bits can be written to in the general-purpose
167 * counters.  The higher bits are extended from bit 31; all ones if
168 * bit 31 is one and all zeros otherwise.
169 *
170 * The fixed-function counters do not have this restriction.
171 */
172#define	BITS_EXTENDED_FROM_31	(BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31))
173
174#define	WRMSR(msr, value)						\
175	wrmsr((msr), (value));						\
176	DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value));
177
178#define	RDMSR(msr, value)						\
179	(value) = rdmsr((msr));						\
180	DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value));
181
182typedef struct core_pcbe_config {
183	uint64_t	core_rawpic;
184	uint64_t	core_ctl;	/* Event Select bits */
185	uint64_t	core_pmc;	/* Counter register address */
186	uint64_t	core_pes;	/* Event Select register address */
187	uint_t		core_picno;
188	uint8_t		core_pictype;	/* CORE_GPC or CORE_FFC */
189} core_pcbe_config_t;
190
191pcbe_ops_t core_pcbe_ops = {
192	PCBE_VER_1,			/* pcbe_ver */
193	CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE,	/* pcbe_caps */
194	core_pcbe_ncounters,		/* pcbe_ncounters */
195	core_pcbe_impl_name,		/* pcbe_impl_name */
196	core_pcbe_cpuref,		/* pcbe_cpuref */
197	core_pcbe_list_events,		/* pcbe_list_events */
198	core_pcbe_list_attrs,		/* pcbe_list_attrs */
199	core_pcbe_event_coverage,	/* pcbe_event_coverage */
200	core_pcbe_overflow_bitmap,	/* pcbe_overflow_bitmap */
201	core_pcbe_configure,		/* pcbe_configure */
202	core_pcbe_program,		/* pcbe_program */
203	core_pcbe_allstop,		/* pcbe_allstop */
204	core_pcbe_sample,		/* pcbe_sample */
205	core_pcbe_free			/* pcbe_free */
206};
207
208struct nametable_core_uarch {
209	const char	*name;
210	uint64_t	restricted_bits;
211	uint8_t		event_num;
212};
213
214/*
215 * Counting an event for all cores or all bus agents requires cpc_cpu privileges
216 */
217#define	ALL_CORES	(1ULL << 15)
218#define	ALL_AGENTS	(1ULL << 13)
219
220struct generic_events {
221	const char	*name;
222	uint8_t		event_num;
223	uint8_t		umask;
224};
225
226static const struct generic_events cmn_generic_events[] = {
227	{ "PAPI_tot_cyc", 0x3c, 0x00 }, /* cpu_clk_unhalted.thread_p/core */
228	{ "PAPI_tot_ins", 0xc0, 0x00 }, /* inst_retired.any_p		  */
229	{ "PAPI_br_ins",  0xc4, 0x0c }, /* br_inst_retired.taken	  */
230	{ "PAPI_br_msp",  0xc5, 0x00 }, /* br_inst_retired.mispred	  */
231	{ "PAPI_br_ntk",  0xc4, 0x03 },
232				/* br_inst_retired.pred_not_taken|pred_taken */
233	{ "PAPI_br_prc",  0xc4, 0x05 },
234				/* br_inst_retired.pred_not_taken|pred_taken */
235	{ "PAPI_hw_int",  0xc8, 0x00 }, /* hw_int_rvc			  */
236	{ "PAPI_tot_iis", 0xaa, 0x01 }, /* macro_insts.decoded		  */
237	{ "PAPI_l1_dca",  0x43, 0x01 }, /* l1d_all_ref			  */
238	{ "PAPI_l1_icm",  0x81, 0x00 }, /* l1i_misses			  */
239	{ "PAPI_l1_icr",  0x80, 0x00 }, /* l1i_reads			  */
240	{ "PAPI_l1_tcw",  0x41, 0x0f }, /* l1d_cache_st.mesi		  */
241	{ "PAPI_l2_stm",  0x2a, 0x41 }, /* l2_st.self.i_state		  */
242	{ "PAPI_l2_tca",  0x2e, 0x4f }, /* l2_rqsts.self.demand.mesi	  */
243	{ "PAPI_l2_tch",  0x2e, 0x4e }, /* l2_rqsts.mes			  */
244	{ "PAPI_l2_tcm",  0x2e, 0x41 }, /* l2_rqsts.self.demand.i_state   */
245	{ "PAPI_l2_tcw",  0x2a, 0x4f }, /* l2_st.self.mesi		  */
246	{ "PAPI_ld_ins",  0xc0, 0x01 }, /* inst_retired.loads		  */
247	{ "PAPI_lst_ins", 0xc0, 0x03 }, /* inst_retired.loads|stores	  */
248	{ "PAPI_sr_ins",  0xc0, 0x02 }, /* inst_retired.stores		  */
249	{ "PAPI_tlb_dm",  0x08, 0x01 }, /* dtlb_misses.any		  */
250	{ "PAPI_tlb_im",  0x82, 0x12 }, /* itlb.small_miss|large_miss	  */
251	{ "PAPI_tlb_tl",  0x0c, 0x03 }, /* page_walks			  */
252	{ "",		  NT_END, 0  }
253};
254
255static const struct generic_events generic_events_pic0[] = {
256	{ "PAPI_l1_dcm",  0xcb, 0x01 }, /* mem_load_retired.l1d_miss */
257	{ "",		  NT_END, 0  }
258};
259
260/*
261 * The events listed in the following table can be counted on all
262 * general-purpose counters on processors that are of Penryn and Merom Family
263 */
264static const struct nametable_core_uarch cmn_gpc_events_core_uarch[] = {
265	/* Alphabetical order of event name */
266
267	{ "baclears",			0x0,	0xe6 },
268	{ "bogus_br",			0x0,	0xe4 },
269	{ "br_bac_missp_exec",		0x0,	0x8a },
270
271	{ "br_call_exec",		0x0,	0x92 },
272	{ "br_call_missp_exec",		0x0,	0x93 },
273	{ "br_cnd_exec",		0x0,	0x8b },
274
275	{ "br_cnd_missp_exec",		0x0,	0x8c },
276	{ "br_ind_call_exec",		0x0,	0x94 },
277	{ "br_ind_exec",		0x0,	0x8d },
278
279	{ "br_ind_missp_exec",		0x0,	0x8e },
280	{ "br_inst_decoded",		0x0,	0xe0 },
281	{ "br_inst_exec",		0x0,	0x88 },
282
283	{ "br_inst_retired",		0x0,	0xc4 },
284	{ "br_inst_retired_mispred",	0x0,	0xc5 },
285	{ "br_missp_exec",		0x0,	0x89 },
286
287	{ "br_ret_bac_missp_exec",	0x0,	0x91 },
288	{ "br_ret_exec",		0x0,	0x8f },
289	{ "br_ret_missp_exec",		0x0,	0x90 },
290
291	{ "br_tkn_bubble_1",		0x0,	0x97 },
292	{ "br_tkn_bubble_2",		0x0,	0x98 },
293	{ "bus_bnr_drv",		ALL_AGENTS,	0x61 },
294
295	{ "bus_data_rcv",		ALL_CORES,	0x64 },
296	{ "bus_drdy_clocks",		ALL_AGENTS,	0x62 },
297	{ "bus_hit_drv",		ALL_AGENTS,	0x7a },
298
299	{ "bus_hitm_drv",		ALL_AGENTS,	0x7b },
300	{ "bus_io_wait",		ALL_CORES,	0x7f },
301	{ "bus_lock_clocks",		ALL_CORES | ALL_AGENTS,	0x63 },
302
303	{ "bus_request_outstanding",	ALL_CORES | ALL_AGENTS,	0x60 },
304	{ "bus_trans_any",		ALL_CORES | ALL_AGENTS,	0x70 },
305	{ "bus_trans_brd",		ALL_CORES | ALL_AGENTS,	0x65 },
306
307	{ "bus_trans_burst",		ALL_CORES | ALL_AGENTS,	0x6e },
308	{ "bus_trans_def",		ALL_CORES | ALL_AGENTS,	0x6d },
309	{ "bus_trans_ifetch",		ALL_CORES | ALL_AGENTS,	0x68 },
310
311	{ "bus_trans_inval",		ALL_CORES | ALL_AGENTS,	0x69 },
312	{ "bus_trans_io",		ALL_CORES | ALL_AGENTS,	0x6c },
313	{ "bus_trans_mem",		ALL_CORES | ALL_AGENTS,	0x6f },
314
315	{ "bus_trans_p",		ALL_CORES | ALL_AGENTS,	0x6b },
316	{ "bus_trans_pwr",		ALL_CORES | ALL_AGENTS,	0x6a },
317	{ "bus_trans_rfo",		ALL_CORES | ALL_AGENTS,	0x66 },
318
319	{ "bus_trans_wb",		ALL_CORES | ALL_AGENTS,	0x67 },
320	{ "busq_empty",			ALL_CORES,	0x7d },
321	{ "cmp_snoop",			ALL_CORES,	0x78 },
322
323	{ "cpu_clk_unhalted",		0x0,	0x3c },
324	{ "cycles_int",			0x0,	0xc6 },
325	{ "cycles_l1i_mem_stalled",	0x0,	0x86 },
326
327	{ "dtlb_misses",		0x0,	0x08 },
328	{ "eist_trans",			0x0,	0x3a },
329	{ "esp",			0x0,	0xab },
330
331	{ "ext_snoop",			ALL_AGENTS,	0x77 },
332	{ "fp_mmx_trans",		0x0,	0xcc },
333	{ "hw_int_rcv",			0x0,	0xc8 },
334
335	{ "ild_stall",			0x0,	0x87 },
336	{ "inst_queue",			0x0,	0x83 },
337	{ "inst_retired",		0x0,	0xc0 },
338
339	{ "itlb",			0x0,	0x82 },
340	{ "itlb_miss_retired",		0x0,	0xc9 },
341	{ "l1d_all_ref",		0x0,	0x43 },
342
343	{ "l1d_cache_ld",		0x0,	0x40 },
344	{ "l1d_cache_lock",		0x0,	0x42 },
345	{ "l1d_cache_st",		0x0,	0x41 },
346
347	{ "l1d_m_evict",		0x0,	0x47 },
348	{ "l1d_m_repl",			0x0,	0x46 },
349	{ "l1d_pend_miss",		0x0,	0x48 },
350
351	{ "l1d_prefetch",		0x0,	0x4e },
352	{ "l1d_repl",			0x0,	0x45 },
353	{ "l1d_split",			0x0,	0x49 },
354
355	{ "l1i_misses",			0x0,	0x81 },
356	{ "l1i_reads",			0x0,	0x80 },
357	{ "l2_ads",			ALL_CORES,	0x21 },
358
359	{ "l2_dbus_busy_rd",		ALL_CORES,	0x23 },
360	{ "l2_ifetch",			ALL_CORES,	0x28 },
361	{ "l2_ld",			ALL_CORES,	0x29 },
362
363	{ "l2_lines_in",		ALL_CORES,	0x24 },
364	{ "l2_lines_out",		ALL_CORES,	0x26 },
365	{ "l2_lock",			ALL_CORES,	0x2b },
366
367	{ "l2_m_lines_in",		ALL_CORES,	0x25 },
368	{ "l2_m_lines_out",		ALL_CORES,	0x27 },
369	{ "l2_no_req",			ALL_CORES,	0x32 },
370
371	{ "l2_reject_busq",		ALL_CORES,	0x30 },
372	{ "l2_rqsts",			ALL_CORES,	0x2e },
373	{ "l2_st",			ALL_CORES,	0x2a },
374
375	{ "load_block",			0x0,	0x03 },
376	{ "load_hit_pre",		0x0,	0x4c },
377	{ "machine_nukes",		0x0,	0xc3 },
378
379	{ "macro_insts",		0x0,	0xaa },
380	{ "memory_disambiguation",	0x0,	0x09 },
381	{ "misalign_mem_ref",		0x0,	0x05 },
382	{ "page_walks",			0x0,	0x0c },
383
384	{ "pref_rqsts_dn",		0x0,	0xf8 },
385	{ "pref_rqsts_up",		0x0,	0xf0 },
386	{ "rat_stalls",			0x0,	0xd2 },
387
388	{ "resource_stalls",		0x0,	0xdc },
389	{ "rs_uops_dispatched",		0x0,	0xa0 },
390	{ "seg_reg_renames",		0x0,	0xd5 },
391
392	{ "seg_rename_stalls",		0x0,	0xd4 },
393	{ "segment_reg_loads",		0x0,	0x06 },
394	{ "simd_assist",		0x0,	0xcd },
395
396	{ "simd_comp_inst_retired",	0x0,	0xca },
397	{ "simd_inst_retired",		0x0,	0xc7 },
398	{ "simd_instr_retired",		0x0,	0xce },
399
400	{ "simd_sat_instr_retired",	0x0,	0xcf },
401	{ "simd_sat_uop_exec",		0x0,	0xb1 },
402	{ "simd_uop_type_exec",		0x0,	0xb3 },
403
404	{ "simd_uops_exec",		0x0,	0xb0 },
405	{ "snoop_stall_drv",		ALL_CORES | ALL_AGENTS,	0x7e },
406	{ "sse_pre_exec",		0x0,	0x07 },
407
408	{ "sse_pre_miss",		0x0,	0x4b },
409	{ "store_block",		0x0,	0x04 },
410	{ "thermal_trip",		0x0,	0x3b },
411
412	{ "uops_retired",		0x0,	0xc2 },
413	{ "x87_ops_retired",		0x0,	0xc1 },
414	{ "",				0x0,	NT_END }
415};
416
417/*
418 * If any of the pic specific events require privileges, make sure to add a
419 * check in configure_gpc() to find whether an event hard-coded as a number by
420 * the user has any privilege requirements
421 */
422static const struct nametable_core_uarch pic0_events[] = {
423	/* Alphabetical order of event name */
424
425	{ "cycles_div_busy",		0x0,	0x14 },
426	{ "fp_comp_ops_exe",		0x0,	0x10 },
427	{ "idle_during_div",		0x0,	0x18 },
428
429	{ "mem_load_retired",		0x0,	0xcb },
430	{ "rs_uops_dispatched_port",	0x0,	0xa1 },
431	{ "",				0x0,	NT_END }
432};
433
434static const struct nametable_core_uarch pic1_events[] = {
435	/* Alphabetical order of event name */
436
437	{ "delayed_bypass",	0x0,	0x19 },
438	{ "div",		0x0,	0x13 },
439	{ "fp_assist",		0x0,	0x11 },
440
441	{ "mul",		0x0,	0x12 },
442	{ "",			0x0,	NT_END }
443};
444
445/* FFC entries must be in order */
446static char *ffc_names_non_htt[] = {
447	"instr_retired.any",
448	"cpu_clk_unhalted.core",
449	"cpu_clk_unhalted.ref",
450	NULL
451};
452
453static char *ffc_names_htt[] = {
454	"instr_retired.any",
455	"cpu_clk_unhalted.thread",
456	"cpu_clk_unhalted.ref",
457	NULL
458};
459
460static char *ffc_genericnames[] = {
461	"PAPI_tot_ins",
462	"PAPI_tot_cyc",
463	"",
464	NULL
465};
466
467static char	**ffc_names = NULL;
468static char	**ffc_allnames = NULL;
469static char	**gpc_names = NULL;
470static uint32_t	versionid;
471static uint64_t	num_gpc;
472static uint64_t	width_gpc;
473static uint64_t	mask_gpc;
474static uint64_t	num_ffc;
475static uint64_t	width_ffc;
476static uint64_t	mask_ffc;
477static uint_t	total_pmc;
478static uint64_t	control_ffc;
479static uint64_t	control_gpc;
480static uint64_t	control_mask;
481static uint32_t	arch_events_vector;
482
483#define	IMPL_NAME_LEN 100
484static char core_impl_name[IMPL_NAME_LEN];
485
486static const char *core_cpuref =
487	"See https://download.01.org/perfmon/index/ or Chapers 18 and 19 " \
488	"of the \"Intel 64 and IA-32 Architectures Software Developer's " \
489	"Manual Volume 3: System Programming Guide\" Order Number: " \
490	"325384-062US, March 2017.";
491
492
493/* Architectural events */
494#define	ARCH_EVENTS_COMMON					\
495	{ 0xc0, 0x00, C_ALL, "inst_retired.any_p" },		\
496	{ 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" },	\
497	{ 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" },	\
498	{ 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" },	\
499	{ 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" },	\
500	{ 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" }
501
502static const struct events_table_t arch_events_table_non_htt[] = {
503	{ 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.core" },
504	ARCH_EVENTS_COMMON
505};
506
507static const struct events_table_t arch_events_table_htt[] = {
508	{ 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" },
509	ARCH_EVENTS_COMMON
510};
511
512static char *arch_genevents_table[] = {
513	"PAPI_tot_cyc", /* cpu_clk_unhalted.thread_p/core */
514	"PAPI_tot_ins", /* inst_retired.any_p		  */
515	"",		/* cpu_clk_unhalted.ref_p	  */
516	"",		/* longest_lat_cache.reference	  */
517	"",		/* longest_lat_cache.miss	  */
518	"",		/* br_inst_retired.all_branches	  */
519	"",		/* br_misp_retired.all_branches	  */
520};
521
522static const struct events_table_t *arch_events_table = NULL;
523static uint64_t known_arch_events;
524static uint64_t known_ffc_num;
525static const struct events_table_t *events_table = NULL;
526
527/*
528 * Initialize string containing list of supported general-purpose counter
529 * events for processors of Penryn and Merom Family
530 */
531static void
532pcbe_init_core_uarch()
533{
534	const struct nametable_core_uarch	*n;
535	const struct generic_events		*k;
536	const struct nametable_core_uarch	*picspecific_events;
537	const struct generic_events		*picspecific_genericevents;
538	size_t			common_size;
539	size_t			size;
540	uint64_t		i;
541
542	gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
543
544	/* Calculate space needed to save all the common event names */
545	common_size = 0;
546	for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END; n++) {
547		common_size += strlen(n->name) + 1;
548	}
549
550	for (k = cmn_generic_events; k->event_num != NT_END; k++) {
551		common_size += strlen(k->name) + 1;
552	}
553
554	for (i = 0; i < num_gpc; i++) {
555		size = 0;
556		picspecific_genericevents = NULL;
557
558		switch (i) {
559			case 0:
560				picspecific_events = pic0_events;
561				picspecific_genericevents = generic_events_pic0;
562				break;
563			case 1:
564				picspecific_events = pic1_events;
565				break;
566			default:
567				picspecific_events = NULL;
568				break;
569		}
570		if (picspecific_events != NULL) {
571			for (n = picspecific_events;
572			    n->event_num != NT_END;
573			    n++) {
574				size += strlen(n->name) + 1;
575			}
576		}
577		if (picspecific_genericevents != NULL) {
578			for (k = picspecific_genericevents;
579			    k->event_num != NT_END; k++) {
580				size += strlen(k->name) + 1;
581			}
582		}
583
584		gpc_names[i] =
585		    kmem_alloc(size + common_size + 1, KM_SLEEP);
586
587		gpc_names[i][0] = '\0';
588		if (picspecific_events != NULL) {
589			for (n = picspecific_events;
590			    n->event_num != NT_END; n++) {
591				(void) strcat(gpc_names[i], n->name);
592				(void) strcat(gpc_names[i], ",");
593			}
594		}
595		if (picspecific_genericevents != NULL) {
596			for (k = picspecific_genericevents;
597			    k->event_num != NT_END; k++) {
598				(void) strcat(gpc_names[i], k->name);
599				(void) strcat(gpc_names[i], ",");
600			}
601		}
602		for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END;
603		    n++) {
604			(void) strcat(gpc_names[i], n->name);
605			(void) strcat(gpc_names[i], ",");
606		}
607		for (k = cmn_generic_events; k->event_num != NT_END; k++) {
608			(void) strcat(gpc_names[i], k->name);
609			(void) strcat(gpc_names[i], ",");
610		}
611
612		/*
613		 * Remove trailing comma.
614		 */
615		gpc_names[i][common_size + size - 1] = '\0';
616	}
617}
618
619static int
620core_pcbe_init(void)
621{
622	struct cpuid_regs	cp;
623	size_t			size;
624	uint64_t		i;
625	uint64_t		j;
626	uint64_t		arch_events_vector_length;
627	size_t			arch_events_string_length;
628	uint_t			model, stepping;
629
630	if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
631		return (-1);
632
633	/* Obtain Basic CPUID information */
634	cp.cp_eax = 0x0;
635	(void) __cpuid_insn(&cp);
636
637	/* No Architectural Performance Monitoring Leaf returned by CPUID */
638	if (cp.cp_eax < 0xa) {
639		return (-1);
640	}
641
642	/* Obtain the Architectural Performance Monitoring Leaf */
643	cp.cp_eax = 0xa;
644	(void) __cpuid_insn(&cp);
645
646	versionid = cp.cp_eax & 0xFF;
647
648	/*
649	 * Fixed-Function Counters (FFC)
650	 *
651	 * All Family 6 Model 15 and Model 23 processors have fixed-function
652	 * counters.  These counters were made Architectural with
653	 * Family 6 Model 15 Stepping 9.
654	 */
655	switch (versionid) {
656
657		case 0:
658			return (-1);
659
660		case 2:
661			num_ffc = cp.cp_edx & 0x1F;
662			width_ffc = (cp.cp_edx >> 5) & 0xFF;
663
664			/*
665			 * Some processors have an errata (AW34) where
666			 * versionid is reported as 2 when actually 1.
667			 * In this case, fixed-function counters are
668			 * model-specific as in Version 1.
669			 */
670			if (num_ffc != 0) {
671				break;
672			}
673			/* FALLTHROUGH */
674		case 1:
675			num_ffc = 3;
676			width_ffc = 40;
677			versionid = 1;
678			break;
679
680		default:
681			num_ffc = cp.cp_edx & 0x1F;
682			width_ffc = (cp.cp_edx >> 5) & 0xFF;
683			break;
684	}
685
686
687	if (num_ffc >= 64)
688		return (-1);
689
690	/* Set HTT-specific names of architectural & FFC events */
691	if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
692		ffc_names = ffc_names_htt;
693		arch_events_table = arch_events_table_htt;
694		known_arch_events =
695		    sizeof (arch_events_table_htt) /
696		    sizeof (struct events_table_t);
697		known_ffc_num =
698		    sizeof (ffc_names_htt) / sizeof (char *);
699	} else {
700		ffc_names = ffc_names_non_htt;
701		arch_events_table = arch_events_table_non_htt;
702		known_arch_events =
703		    sizeof (arch_events_table_non_htt) /
704		    sizeof (struct events_table_t);
705		known_ffc_num =
706		    sizeof (ffc_names_non_htt) / sizeof (char *);
707	}
708
709	if (num_ffc >= known_ffc_num) {
710		/*
711		 * The system seems to have more fixed-function counters than
712		 * what this PCBE is able to handle correctly.  Default to the
713		 * maximum number of fixed-function counters that this driver
714		 * is aware of.
715		 */
716		num_ffc = known_ffc_num - 1;
717	}
718
719	mask_ffc = BITMASK_XBITS(width_ffc);
720	control_ffc = BITMASK_XBITS(num_ffc);
721
722	/*
723	 * General Purpose Counters (GPC)
724	 */
725	num_gpc = (cp.cp_eax >> 8) & 0xFF;
726	width_gpc = (cp.cp_eax >> 16) & 0xFF;
727
728	if (num_gpc >= 64)
729		return (-1);
730
731	mask_gpc = BITMASK_XBITS(width_gpc);
732
733	control_gpc = BITMASK_XBITS(num_gpc);
734
735	control_mask = (control_ffc << 32) | control_gpc;
736
737	total_pmc = num_gpc + num_ffc;
738	if (total_pmc > 64) {
739		/* Too wide for the overflow bitmap */
740		return (-1);
741	}
742
743	/* FFC names */
744	ffc_allnames = kmem_alloc(num_ffc * sizeof (char *), KM_SLEEP);
745	for (i = 0; i < num_ffc; i++) {
746		ffc_allnames[i] = kmem_alloc(
747		    strlen(ffc_names[i]) + strlen(ffc_genericnames[i]) + 2,
748		    KM_SLEEP);
749
750		ffc_allnames[i][0] = '\0';
751		(void) strcat(ffc_allnames[i], ffc_names[i]);
752
753		/* Check if this ffc has a generic name */
754		if (strcmp(ffc_genericnames[i], "") != 0) {
755			(void) strcat(ffc_allnames[i], ",");
756			(void) strcat(ffc_allnames[i], ffc_genericnames[i]);
757		}
758	}
759
760	/* GPC events for Family 6 Models 15, 23 and 29 only */
761	if ((cpuid_getfamily(CPU) == 6) &&
762	    ((cpuid_getmodel(CPU) == 15) || (cpuid_getmodel(CPU) == 23) ||
763	    (cpuid_getmodel(CPU) == 29))) {
764		(void) snprintf(core_impl_name, IMPL_NAME_LEN,
765		    "Core Microarchitecture");
766		pcbe_init_core_uarch();
767		return (0);
768	}
769
770	(void) snprintf(core_impl_name, IMPL_NAME_LEN,
771	    "Intel Arch PerfMon v%d on Family %d Model %d",
772	    versionid, cpuid_getfamily(CPU), cpuid_getmodel(CPU));
773
774	/*
775	 * Architectural events
776	 */
777	arch_events_vector_length = (cp.cp_eax >> 24) & 0xFF;
778
779	ASSERT(known_arch_events == arch_events_vector_length);
780
781	/*
782	 * To handle the case where a new performance monitoring setup is run
783	 * on a non-debug kernel
784	 */
785	if (known_arch_events > arch_events_vector_length) {
786		known_arch_events = arch_events_vector_length;
787	} else {
788		arch_events_vector_length = known_arch_events;
789	}
790
791	arch_events_vector = cp.cp_ebx &
792	    BITMASK_XBITS(arch_events_vector_length);
793
794	/*
795	 * Process architectural and non-architectural events using GPC
796	 */
797	if (num_gpc > 0) {
798
799		gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
800
801		/* Calculate space required for the architectural gpc events */
802		arch_events_string_length = 0;
803		for (i = 0; i < known_arch_events; i++) {
804			if (((1U << i) & arch_events_vector) == 0) {
805				arch_events_string_length +=
806				    strlen(arch_events_table[i].name) + 1;
807				if (strcmp(arch_genevents_table[i], "") != 0) {
808					arch_events_string_length +=
809					    strlen(arch_genevents_table[i]) + 1;
810				}
811			}
812		}
813
814		/* Non-architectural events list */
815		model = cpuid_getmodel(CPU);
816		stepping = cpuid_getstep(CPU);
817		events_table = core_cpcgen_table(model, stepping);
818
819		for (i = 0; i < num_gpc; i++) {
820
821			/*
822			 * Determine length of all supported event names
823			 * (architectural + non-architectural)
824			 */
825			size = arch_events_string_length;
826			for (j = 0; events_table != NULL &&
827			    events_table[j].eventselect != NT_END;
828			    j++) {
829				if (C(i) & events_table[j].supported_counters) {
830					size += strlen(events_table[j].name) +
831					    1;
832				}
833			}
834
835			/* Allocate memory for this pics list */
836			gpc_names[i] = kmem_alloc(size + 1, KM_SLEEP);
837			gpc_names[i][0] = '\0';
838			if (size == 0) {
839				continue;
840			}
841
842			/*
843			 * Create the list of all supported events
844			 * (architectural + non-architectural)
845			 */
846			for (j = 0; j < known_arch_events; j++) {
847				if (((1U << j) & arch_events_vector) == 0) {
848					(void) strcat(gpc_names[i],
849					    arch_events_table[j].name);
850					(void) strcat(gpc_names[i], ",");
851					if (strcmp(
852					    arch_genevents_table[j], "")
853					    != 0) {
854						(void) strcat(gpc_names[i],
855						    arch_genevents_table[j]);
856						(void) strcat(gpc_names[i],
857						    ",");
858					}
859				}
860			}
861
862			for (j = 0; events_table != NULL &&
863			    events_table[j].eventselect != NT_END;
864			    j++) {
865				if (C(i) & events_table[j].supported_counters) {
866					(void) strcat(gpc_names[i],
867					    events_table[j].name);
868					(void) strcat(gpc_names[i], ",");
869				}
870			}
871
872			/* Remove trailing comma */
873			gpc_names[i][size - 1] = '\0';
874		}
875	}
876
877	return (0);
878}
879
880static uint_t core_pcbe_ncounters()
881{
882	return (total_pmc);
883}
884
885static const char *core_pcbe_impl_name(void)
886{
887	return (core_impl_name);
888}
889
890static const char *core_pcbe_cpuref(void)
891{
892	return (core_cpuref);
893}
894
895static char *core_pcbe_list_events(uint_t picnum)
896{
897	ASSERT(picnum < cpc_ncounters);
898
899	if (picnum < num_gpc) {
900		return (gpc_names[picnum]);
901	} else {
902		return (ffc_allnames[picnum - num_gpc]);
903	}
904}
905
906static char *core_pcbe_list_attrs(void)
907{
908	if (versionid >= 3) {
909		return ("edge,inv,umask,cmask,anythr");
910	} else {
911		return ("edge,pc,inv,umask,cmask");
912	}
913}
914
915static const struct nametable_core_uarch *
916find_gpcevent_core_uarch(char *name,
917    const struct nametable_core_uarch *nametable)
918{
919	const struct nametable_core_uarch *n;
920	int compare_result = -1;
921
922	for (n = nametable; n->event_num != NT_END; n++) {
923		compare_result = strcmp(name, n->name);
924		if (compare_result <= 0) {
925			break;
926		}
927	}
928
929	if (compare_result == 0) {
930		return (n);
931	}
932
933	return (NULL);
934}
935
936static const struct generic_events *
937find_generic_events(char *name, const struct generic_events *table)
938{
939	const struct generic_events *n;
940
941	for (n = table; n->event_num != NT_END; n++) {
942		if (strcmp(name, n->name) == 0) {
943			return (n);
944		};
945	}
946
947	return (NULL);
948}
949
950static const struct events_table_t *
951find_gpcevent(char *name)
952{
953	int i;
954
955	/* Search architectural events */
956	for (i = 0; i < known_arch_events; i++) {
957		if (strcmp(name, arch_events_table[i].name) == 0 ||
958		    strcmp(name, arch_genevents_table[i]) == 0) {
959			if (((1U << i) & arch_events_vector) == 0) {
960				return (&arch_events_table[i]);
961			}
962		}
963	}
964
965	/* Search non-architectural events */
966	if (events_table != NULL) {
967		for (i = 0; events_table[i].eventselect != NT_END; i++) {
968			if (strcmp(name, events_table[i].name) == 0) {
969				return (&events_table[i]);
970			}
971		}
972	}
973
974	return (NULL);
975}
976
977static uint64_t
978core_pcbe_event_coverage(char *event)
979{
980	uint64_t bitmap;
981	uint64_t bitmask;
982	const struct events_table_t *n;
983	int i;
984
985	bitmap = 0;
986
987	/* Is it an event that a GPC can track? */
988	if (versionid >= 3) {
989		n = find_gpcevent(event);
990		if (n != NULL) {
991			bitmap |= (n->supported_counters &
992			    BITMASK_XBITS(num_gpc));
993		}
994	} else {
995		if (find_generic_events(event, cmn_generic_events) != NULL) {
996			bitmap |= BITMASK_XBITS(num_gpc);
997		} if (find_generic_events(event, generic_events_pic0) != NULL) {
998			bitmap |= 1ULL;
999		} else if (find_gpcevent_core_uarch(event,
1000		    cmn_gpc_events_core_uarch) != NULL) {
1001			bitmap |= BITMASK_XBITS(num_gpc);
1002		} else if (find_gpcevent_core_uarch(event, pic0_events) !=
1003		    NULL) {
1004			bitmap |= 1ULL;
1005		} else if (find_gpcevent_core_uarch(event, pic1_events) !=
1006		    NULL) {
1007			bitmap |= 1ULL << 1;
1008		}
1009	}
1010
1011	/* Check if the event can be counted in the fixed-function counters */
1012	if (num_ffc > 0) {
1013		bitmask = 1ULL << num_gpc;
1014		for (i = 0; i < num_ffc; i++) {
1015			if (strcmp(event, ffc_names[i]) == 0) {
1016				bitmap |= bitmask;
1017			} else if (strcmp(event, ffc_genericnames[i]) == 0) {
1018				bitmap |= bitmask;
1019			}
1020			bitmask = bitmask << 1;
1021		}
1022	}
1023
1024	return (bitmap);
1025}
1026
1027static uint64_t
1028core_pcbe_overflow_bitmap(void)
1029{
1030	uint64_t interrupt_status;
1031	uint64_t intrbits_ffc;
1032	uint64_t intrbits_gpc;
1033	extern int kcpc_hw_overflow_intr_installed;
1034	uint64_t overflow_bitmap;
1035
1036	RDMSR(PERF_GLOBAL_STATUS, interrupt_status);
1037	WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status);
1038
1039	interrupt_status = interrupt_status & control_mask;
1040	intrbits_ffc = (interrupt_status >> 32) & control_ffc;
1041	intrbits_gpc = interrupt_status & control_gpc;
1042	overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc;
1043
1044	ASSERT(kcpc_hw_overflow_intr_installed);
1045	(*kcpc_hw_enable_cpc_intr)();
1046
1047	return (overflow_bitmap);
1048}
1049
1050static int
1051check_cpc_securitypolicy(core_pcbe_config_t *conf,
1052    const struct nametable_core_uarch *n)
1053{
1054	if (conf->core_ctl & n->restricted_bits) {
1055		if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1056			return (CPC_ATTR_REQUIRES_PRIVILEGE);
1057		}
1058	}
1059	return (0);
1060}
1061
1062static int
1063configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1064    uint_t nattrs, kcpc_attr_t *attrs, void **data)
1065{
1066	core_pcbe_config_t	conf;
1067	const struct nametable_core_uarch	*n;
1068	const struct generic_events *k = NULL;
1069	const struct nametable_core_uarch	*m;
1070	const struct nametable_core_uarch	*picspecific_events;
1071	struct nametable_core_uarch	nt_raw = { "", 0x0, 0x0 };
1072	uint_t			i;
1073	long			event_num;
1074	const struct events_table_t *eventcode;
1075
1076	if (((preset & BITS_EXTENDED_FROM_31) != 0) &&
1077	    ((preset & BITS_EXTENDED_FROM_31) !=
1078	    BITS_EXTENDED_FROM_31)) {
1079
1080		/*
1081		 * Bits beyond bit-31 in the general-purpose counters can only
1082		 * be written to by extension of bit 31.  We cannot preset
1083		 * these bits to any value other than all 1s or all 0s.
1084		 */
1085		return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1086	}
1087
1088	if (versionid >= 3) {
1089		eventcode = find_gpcevent(event);
1090		if (eventcode != NULL) {
1091			if ((C(picnum) & eventcode->supported_counters) == 0) {
1092				return (CPC_PIC_NOT_CAPABLE);
1093			}
1094			if (nattrs > 0 &&
1095			    (strncmp("PAPI_", event, 5) == 0)) {
1096				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1097			}
1098			conf.core_ctl = eventcode->eventselect;
1099			conf.core_ctl |= eventcode->unitmask <<
1100			    CORE_UMASK_SHIFT;
1101		} else {
1102			/* Event specified as raw event code */
1103			if (ddi_strtol(event, NULL, 0, &event_num) != 0) {
1104				return (CPC_INVALID_EVENT);
1105			}
1106			conf.core_ctl = event_num & 0xFF;
1107		}
1108	} else {
1109		if ((k = find_generic_events(event, cmn_generic_events)) !=
1110		    NULL ||
1111		    (picnum == 0 &&
1112		    (k = find_generic_events(event, generic_events_pic0)) !=
1113		    NULL)) {
1114			if (nattrs > 0) {
1115				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1116			}
1117			conf.core_ctl = k->event_num;
1118			conf.core_ctl |= k->umask << CORE_UMASK_SHIFT;
1119		} else {
1120			/* Not a generic event */
1121
1122			n = find_gpcevent_core_uarch(event,
1123			    cmn_gpc_events_core_uarch);
1124			if (n == NULL) {
1125				switch (picnum) {
1126					case 0:
1127						picspecific_events =
1128						    pic0_events;
1129						break;
1130					case 1:
1131						picspecific_events =
1132						    pic1_events;
1133						break;
1134					default:
1135						picspecific_events = NULL;
1136						break;
1137				}
1138				if (picspecific_events != NULL) {
1139					n = find_gpcevent_core_uarch(event,
1140					    picspecific_events);
1141				}
1142			}
1143			if (n == NULL) {
1144
1145				/*
1146				 * Check if this is a case where the event was
1147				 * specified directly by its event number
1148				 * instead of its name string.
1149				 */
1150				if (ddi_strtol(event, NULL, 0, &event_num) !=
1151				    0) {
1152					return (CPC_INVALID_EVENT);
1153				}
1154
1155				event_num = event_num & 0xFF;
1156
1157				/*
1158				 * Search the event table to find out if the
1159				 * event specified has an privilege
1160				 * requirements.  Currently none of the
1161				 * pic-specific counters have any privilege
1162				 * requirements.  Hence only the table
1163				 * cmn_gpc_events_core_uarch is searched.
1164				 */
1165				for (m = cmn_gpc_events_core_uarch;
1166				    m->event_num != NT_END;
1167				    m++) {
1168					if (event_num == m->event_num) {
1169						break;
1170					}
1171				}
1172				if (m->event_num == NT_END) {
1173					nt_raw.event_num = (uint8_t)event_num;
1174					n = &nt_raw;
1175				} else {
1176					n = m;
1177				}
1178			}
1179			conf.core_ctl = n->event_num; /* Event Select */
1180		}
1181	}
1182
1183
1184	conf.core_picno = picnum;
1185	conf.core_pictype = CORE_GPC;
1186	conf.core_rawpic = preset & mask_gpc;
1187
1188	conf.core_pes = GPC_BASE_PES + picnum;
1189	conf.core_pmc = GPC_BASE_PMC + picnum;
1190
1191	for (i = 0; i < nattrs; i++) {
1192		if (strncmp(attrs[i].ka_name, "umask", 6) == 0) {
1193			if ((attrs[i].ka_val | CORE_UMASK_MASK) !=
1194			    CORE_UMASK_MASK) {
1195				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1196			}
1197			/* Clear out the default umask */
1198			conf.core_ctl &= ~ (CORE_UMASK_MASK <<
1199			    CORE_UMASK_SHIFT);
1200			/* Use the user provided umask */
1201			conf.core_ctl |= attrs[i].ka_val <<
1202			    CORE_UMASK_SHIFT;
1203		} else  if (strncmp(attrs[i].ka_name, "edge", 6) == 0) {
1204			if (attrs[i].ka_val != 0)
1205				conf.core_ctl |= CORE_EDGE;
1206		} else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) {
1207			if (attrs[i].ka_val != 0)
1208				conf.core_ctl |= CORE_INV;
1209		} else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) {
1210			if ((attrs[i].ka_val | CORE_CMASK_MASK) !=
1211			    CORE_CMASK_MASK) {
1212				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1213			}
1214			conf.core_ctl |= attrs[i].ka_val <<
1215			    CORE_CMASK_SHIFT;
1216		} else if (strncmp(attrs[i].ka_name, "anythr", 7) ==
1217		    0) {
1218			if (versionid < 3)
1219				return (CPC_INVALID_ATTRIBUTE);
1220			if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1221				return (CPC_ATTR_REQUIRES_PRIVILEGE);
1222			}
1223			if (attrs[i].ka_val != 0)
1224				conf.core_ctl |= CORE_ANYTHR;
1225		} else {
1226			return (CPC_INVALID_ATTRIBUTE);
1227		}
1228	}
1229
1230	if (flags & CPC_COUNT_USER)
1231		conf.core_ctl |= CORE_USR;
1232	if (flags & CPC_COUNT_SYSTEM)
1233		conf.core_ctl |= CORE_OS;
1234	if (flags & CPC_OVF_NOTIFY_EMT)
1235		conf.core_ctl |= CORE_INT;
1236	conf.core_ctl |= CORE_EN;
1237
1238	if (versionid < 3 && k == NULL) {
1239		if (check_cpc_securitypolicy(&conf, n) != 0) {
1240			return (CPC_ATTR_REQUIRES_PRIVILEGE);
1241		}
1242	}
1243
1244	*data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1245	*((core_pcbe_config_t *)*data) = conf;
1246
1247	return (0);
1248}
1249
1250static int
1251configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1252    uint_t nattrs, kcpc_attr_t *attrs, void **data)
1253{
1254	core_pcbe_config_t	*conf;
1255	uint_t			i;
1256
1257	if (picnum - num_gpc >= num_ffc) {
1258		return (CPC_INVALID_PICNUM);
1259	}
1260
1261	if ((strcmp(ffc_names[picnum-num_gpc], event) != 0) &&
1262	    (strcmp(ffc_genericnames[picnum-num_gpc], event) != 0)) {
1263		return (CPC_INVALID_EVENT);
1264	}
1265
1266	if ((versionid < 3) && (nattrs != 0)) {
1267		return (CPC_INVALID_ATTRIBUTE);
1268	}
1269
1270	conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1271	conf->core_ctl = 0;
1272
1273	for (i = 0; i < nattrs; i++) {
1274		if (strncmp(attrs[i].ka_name, "anythr", 7) == 0) {
1275			if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1276				kmem_free(conf, sizeof (core_pcbe_config_t));
1277				return (CPC_ATTR_REQUIRES_PRIVILEGE);
1278			}
1279			if (attrs[i].ka_val != 0) {
1280				conf->core_ctl |= CORE_FFC_ANYTHR;
1281			}
1282		} else {
1283			kmem_free(conf, sizeof (core_pcbe_config_t));
1284			return (CPC_INVALID_ATTRIBUTE);
1285		}
1286	}
1287
1288	conf->core_picno = picnum;
1289	conf->core_pictype = CORE_FFC;
1290	conf->core_rawpic = preset & mask_ffc;
1291	conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc);
1292
1293	/* All fixed-function counters have the same control register */
1294	conf->core_pes = PERF_FIXED_CTR_CTRL;
1295
1296	if (flags & CPC_COUNT_USER)
1297		conf->core_ctl |= CORE_FFC_USR_EN;
1298	if (flags & CPC_COUNT_SYSTEM)
1299		conf->core_ctl |= CORE_FFC_OS_EN;
1300	if (flags & CPC_OVF_NOTIFY_EMT)
1301		conf->core_ctl |= CORE_FFC_PMI;
1302
1303	*data = conf;
1304	return (0);
1305}
1306
1307/*ARGSUSED*/
1308static int
1309core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
1310    uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
1311    void *token)
1312{
1313	int			ret;
1314	core_pcbe_config_t	*conf;
1315
1316	/*
1317	 * If we've been handed an existing configuration, we need only preset
1318	 * the counter value.
1319	 */
1320	if (*data != NULL) {
1321		conf = *data;
1322		ASSERT(conf->core_pictype == CORE_GPC ||
1323		    conf->core_pictype == CORE_FFC);
1324		if (conf->core_pictype == CORE_GPC)
1325			conf->core_rawpic = preset & mask_gpc;
1326		else /* CORE_FFC */
1327			conf->core_rawpic = preset & mask_ffc;
1328		return (0);
1329	}
1330
1331	if (picnum >= total_pmc) {
1332		return (CPC_INVALID_PICNUM);
1333	}
1334
1335	if (picnum < num_gpc) {
1336		ret = configure_gpc(picnum, event, preset, flags,
1337		    nattrs, attrs, data);
1338	} else {
1339		ret = configure_ffc(picnum, event, preset, flags,
1340		    nattrs, attrs, data);
1341	}
1342	return (ret);
1343}
1344
1345static void
1346core_pcbe_program(void *token)
1347{
1348	core_pcbe_config_t	*cfg;
1349	uint64_t		perf_global_ctrl;
1350	uint64_t		perf_fixed_ctr_ctrl;
1351	uint64_t		curcr4;
1352
1353	core_pcbe_allstop();
1354
1355	curcr4 = getcr4();
1356	if (kcpc_allow_nonpriv(token))
1357		/* Allow RDPMC at any ring level */
1358		setcr4(curcr4 | CR4_PCE);
1359	else
1360		/* Allow RDPMC only at ring 0 */
1361		setcr4(curcr4 & ~CR4_PCE);
1362
1363	/* Clear any overflow indicators before programming the counters */
1364	WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask);
1365
1366	cfg = NULL;
1367	perf_global_ctrl = 0;
1368	perf_fixed_ctr_ctrl = 0;
1369	cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL);
1370	while (cfg != NULL) {
1371		ASSERT(cfg->core_pictype == CORE_GPC ||
1372		    cfg->core_pictype == CORE_FFC);
1373
1374		if (cfg->core_pictype == CORE_GPC) {
1375			/*
1376			 * General-purpose counter registers have write
1377			 * restrictions where only the lower 32-bits can be
1378			 * written to.  The rest of the relevant bits are
1379			 * written to by extension from bit 31 (all ZEROS if
1380			 * bit-31 is ZERO and all ONE if bit-31 is ONE).  This
1381			 * makes it possible to write to the counter register
1382			 * only values that have all ONEs or all ZEROs in the
1383			 * higher bits.
1384			 */
1385			if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) ||
1386			    ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) ==
1387			    BITS_EXTENDED_FROM_31)) {
1388				/*
1389				 * Straighforward case where the higher bits
1390				 * are all ZEROs or all ONEs.
1391				 */
1392				WRMSR(cfg->core_pmc,
1393				    (cfg->core_rawpic & mask_gpc));
1394			} else {
1395				/*
1396				 * The high order bits are not all the same.
1397				 * We save what is currently in the registers
1398				 * and do not write to it.  When we want to do
1399				 * a read from this register later (in
1400				 * core_pcbe_sample()), we subtract the value
1401				 * we save here to get the actual event count.
1402				 *
1403				 * NOTE: As a result, we will not get overflow
1404				 * interrupts as expected.
1405				 */
1406				RDMSR(cfg->core_pmc, cfg->core_rawpic);
1407				cfg->core_rawpic = cfg->core_rawpic & mask_gpc;
1408			}
1409			WRMSR(cfg->core_pes, cfg->core_ctl);
1410			perf_global_ctrl |= 1ull << cfg->core_picno;
1411		} else {
1412			/*
1413			 * Unlike the general-purpose counters, all relevant
1414			 * bits of fixed-function counters can be written to.
1415			 */
1416			WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc);
1417
1418			/*
1419			 * Collect the control bits for all the
1420			 * fixed-function counters and write it at one shot
1421			 * later in this function
1422			 */
1423			perf_fixed_ctr_ctrl |= cfg->core_ctl <<
1424			    ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE);
1425			perf_global_ctrl |=
1426			    1ull << (cfg->core_picno - num_gpc + 32);
1427		}
1428
1429		cfg = (core_pcbe_config_t *)
1430		    kcpc_next_config(token, cfg, NULL);
1431	}
1432
1433	/* Enable all the counters */
1434	WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl);
1435	WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl);
1436}
1437
1438static void
1439core_pcbe_allstop(void)
1440{
1441	/* Disable all the counters together */
1442	WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED);
1443
1444	setcr4(getcr4() & ~CR4_PCE);
1445}
1446
1447static void
1448core_pcbe_sample(void *token)
1449{
1450	uint64_t		*daddr;
1451	uint64_t		curpic;
1452	core_pcbe_config_t	*cfg;
1453	uint64_t			counter_mask;
1454
1455	cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr);
1456	while (cfg != NULL) {
1457		ASSERT(cfg->core_pictype == CORE_GPC ||
1458		    cfg->core_pictype == CORE_FFC);
1459
1460		curpic = rdmsr(cfg->core_pmc);
1461
1462		DTRACE_PROBE4(core__pcbe__sample,
1463		    uint64_t, cfg->core_pmc,
1464		    uint64_t, curpic,
1465		    uint64_t, cfg->core_rawpic,
1466		    uint64_t, *daddr);
1467
1468		if (cfg->core_pictype == CORE_GPC) {
1469			counter_mask = mask_gpc;
1470		} else {
1471			counter_mask = mask_ffc;
1472		}
1473		curpic = curpic & counter_mask;
1474		if (curpic >= cfg->core_rawpic) {
1475			*daddr += curpic - cfg->core_rawpic;
1476		} else {
1477			/* Counter overflowed since our last sample */
1478			*daddr += counter_mask - (cfg->core_rawpic - curpic) +
1479			    1;
1480		}
1481		cfg->core_rawpic = *daddr & counter_mask;
1482
1483		cfg =
1484		    (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr);
1485	}
1486}
1487
1488static void
1489core_pcbe_free(void *config)
1490{
1491	kmem_free(config, sizeof (core_pcbe_config_t));
1492}
1493
1494static struct modlpcbe core_modlpcbe = {
1495	&mod_pcbeops,
1496	"Core Performance Counters",
1497	&core_pcbe_ops
1498};
1499
1500static struct modlinkage core_modl = {
1501	MODREV_1,
1502	&core_modlpcbe,
1503};
1504
1505int
1506_init(void)
1507{
1508	if (core_pcbe_init() != 0) {
1509		return (ENOTSUP);
1510	}
1511	return (mod_install(&core_modl));
1512}
1513
1514int
1515_fini(void)
1516{
1517	return (mod_remove(&core_modl));
1518}
1519
1520int
1521_info(struct modinfo *mi)
1522{
1523	return (mod_info(&core_modl, mi));
1524}
1525