xref: /illumos-gate/usr/src/uts/intel/pcbe/core_pcbe.c (revision afb806e6e80619e1656bd85fd44a7ab667ed0714)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Performance Counter Back-End for Intel processors supporting Architectural
28  * Performance Monitoring.
29  */
30 
31 #include <sys/cpuvar.h>
32 #include <sys/param.h>
33 #include <sys/cpc_impl.h>
34 #include <sys/cpc_pcbe.h>
35 #include <sys/modctl.h>
36 #include <sys/inttypes.h>
37 #include <sys/systm.h>
38 #include <sys/cmn_err.h>
39 #include <sys/x86_archext.h>
40 #include <sys/sdt.h>
41 #include <sys/archsystm.h>
42 #include <sys/privregs.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/cred.h>
46 #include <sys/policy.h>
47 
48 static int core_pcbe_init(void);
49 static uint_t core_pcbe_ncounters(void);
50 static const char *core_pcbe_impl_name(void);
51 static const char *core_pcbe_cpuref(void);
52 static char *core_pcbe_list_events(uint_t picnum);
53 static char *core_pcbe_list_attrs(void);
54 static uint64_t core_pcbe_event_coverage(char *event);
55 static uint64_t core_pcbe_overflow_bitmap(void);
56 static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
57     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
58     void *token);
59 static void core_pcbe_program(void *token);
60 static void core_pcbe_allstop(void);
61 static void core_pcbe_sample(void *token);
62 static void core_pcbe_free(void *config);
63 
64 #define	FALSE	0
65 #define	TRUE	1
66 
67 /* Counter Type */
68 #define	CORE_GPC	0	/* General-Purpose Counter (GPC) */
69 #define	CORE_FFC	1	/* Fixed-Function Counter (FFC) */
70 
71 /* MSR Addresses */
72 #define	GPC_BASE_PMC		0x00c1	/* First GPC */
73 #define	GPC_BASE_PES		0x0186	/* First GPC Event Select register */
74 #define	FFC_BASE_PMC		0x0309	/* First FFC */
75 #define	PERF_FIXED_CTR_CTRL	0x038d	/* Used to enable/disable FFCs */
76 #define	PERF_GLOBAL_STATUS	0x038e	/* Overflow status register */
77 #define	PERF_GLOBAL_CTRL	0x038f	/* Used to enable/disable counting */
78 #define	PERF_GLOBAL_OVF_CTRL	0x0390	/* Used to clear overflow status */
79 
80 /*
81  * Processor Event Select register fields
82  */
83 #define	CORE_USR	(1ULL << 16)	/* Count while not in ring 0 */
84 #define	CORE_OS		(1ULL << 17)	/* Count while in ring 0 */
85 #define	CORE_EDGE	(1ULL << 18)	/* Enable edge detection */
86 #define	CORE_PC		(1ULL << 19)	/* Enable pin control */
87 #define	CORE_INT	(1ULL << 20)	/* Enable interrupt on overflow */
88 #define	CORE_EN		(1ULL << 22)	/* Enable counting */
89 #define	CORE_INV	(1ULL << 23)	/* Invert the CMASK */
90 #define	CORE_ANYTHR	(1ULL << 21)	/* Count event for any thread on core */
91 
92 #define	CORE_UMASK_SHIFT	8
93 #define	CORE_UMASK_MASK		0xffu
94 #define	CORE_CMASK_SHIFT	24
95 #define	CORE_CMASK_MASK		0xffu
96 
97 /*
98  * Fixed-function counter attributes
99  */
100 #define	CORE_FFC_OS_EN	(1ULL << 0)	/* Count while not in ring 0 */
101 #define	CORE_FFC_USR_EN	(1ULL << 1)	/* Count while in ring 1 */
102 #define	CORE_FFC_ANYTHR	(1ULL << 2)	/* Count event for any thread on core */
103 #define	CORE_FFC_PMI	(1ULL << 3)	/* Enable interrupt on overflow */
104 
105 /*
106  * Number of bits for specifying each FFC's attributes in the control register
107  */
108 #define	CORE_FFC_ATTR_SIZE	4
109 
110 /*
111  * CondChgd and OvfBuffer fields of global status and overflow control registers
112  */
113 #define	CONDCHGD	(1ULL << 63)
114 #define	OVFBUFFER	(1ULL << 62)
115 #define	MASK_CONDCHGD_OVFBUFFER	(CONDCHGD | OVFBUFFER)
116 
117 #define	ALL_STOPPED	0ULL
118 
119 #define	BITMASK_XBITS(x)	((1ull << (x)) - 1ull)
120 
121 /*
122  * Only the lower 32-bits can be written to in the general-purpose
123  * counters.  The higher bits are extended from bit 31; all ones if
124  * bit 31 is one and all zeros otherwise.
125  *
126  * The fixed-function counters do not have this restriction.
127  */
128 #define	BITS_EXTENDED_FROM_31	(BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31))
129 
130 #define	WRMSR(msr, value)						\
131 	wrmsr((msr), (value));						\
132 	DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value));
133 
134 #define	RDMSR(msr, value)						\
135 	(value) = rdmsr((msr));						\
136 	DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value));
137 
138 typedef struct core_pcbe_config {
139 	uint64_t	core_rawpic;
140 	uint64_t	core_ctl;	/* Event Select bits */
141 	uint64_t	core_pmc;	/* Counter register address */
142 	uint64_t	core_pes;	/* Event Select register address */
143 	uint_t		core_picno;
144 	uint8_t		core_pictype;	/* CORE_GPC or CORE_FFC */
145 } core_pcbe_config_t;
146 
147 pcbe_ops_t core_pcbe_ops = {
148 	PCBE_VER_1,			/* pcbe_ver */
149 	CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE,	/* pcbe_caps */
150 	core_pcbe_ncounters,		/* pcbe_ncounters */
151 	core_pcbe_impl_name,		/* pcbe_impl_name */
152 	core_pcbe_cpuref,		/* pcbe_cpuref */
153 	core_pcbe_list_events,		/* pcbe_list_events */
154 	core_pcbe_list_attrs,		/* pcbe_list_attrs */
155 	core_pcbe_event_coverage,	/* pcbe_event_coverage */
156 	core_pcbe_overflow_bitmap,	/* pcbe_overflow_bitmap */
157 	core_pcbe_configure,		/* pcbe_configure */
158 	core_pcbe_program,		/* pcbe_program */
159 	core_pcbe_allstop,		/* pcbe_allstop */
160 	core_pcbe_sample,		/* pcbe_sample */
161 	core_pcbe_free			/* pcbe_free */
162 };
163 
164 struct nametable_core_uarch {
165 	const char	*name;
166 	uint64_t	restricted_bits;
167 	uint8_t		event_num;
168 };
169 
170 #define	NT_END	0xFF
171 
172 /*
173  * Counting an event for all cores or all bus agents requires cpc_cpu privileges
174  */
175 #define	ALL_CORES	(1ULL << 15)
176 #define	ALL_AGENTS	(1ULL << 13)
177 
178 /*
179  * The events listed in the following table can be counted on all
180  * general-purpose counters on processors that are of Penryn and Merom Family
181  */
182 static const struct nametable_core_uarch cmn_gpc_events_core_uarch[] = {
183 	/* Alphabetical order of event name */
184 
185 	{ "baclears",			0x0,	0xe6 },
186 	{ "bogus_br",			0x0,	0xe4 },
187 	{ "br_bac_missp_exec",		0x0,	0x8a },
188 
189 	{ "br_call_exec",		0x0,	0x92 },
190 	{ "br_call_missp_exec",		0x0,	0x93 },
191 	{ "br_cnd_exec",		0x0,	0x8b },
192 
193 	{ "br_cnd_missp_exec",		0x0,	0x8c },
194 	{ "br_ind_call_exec",		0x0,	0x94 },
195 	{ "br_ind_exec",		0x0,	0x8d },
196 
197 	{ "br_ind_missp_exec",		0x0,	0x8e },
198 	{ "br_inst_decoded",		0x0,	0xe0 },
199 	{ "br_inst_exec",		0x0,	0x88 },
200 
201 	{ "br_inst_retired",		0x0,	0xc4 },
202 	{ "br_inst_retired_mispred",	0x0,	0xc5 },
203 	{ "br_missp_exec",		0x0,	0x89 },
204 
205 	{ "br_ret_bac_missp_exec",	0x0,	0x91 },
206 	{ "br_ret_exec",		0x0,	0x8f },
207 	{ "br_ret_missp_exec",		0x0,	0x90 },
208 
209 	{ "br_tkn_bubble_1",		0x0,	0x97 },
210 	{ "br_tkn_bubble_2",		0x0,	0x98 },
211 	{ "bus_bnr_drv",		ALL_AGENTS,	0x61 },
212 
213 	{ "bus_data_rcv",		ALL_CORES,	0x64 },
214 	{ "bus_drdy_clocks",		ALL_AGENTS,	0x62 },
215 	{ "bus_hit_drv",		ALL_AGENTS,	0x7a },
216 
217 	{ "bus_hitm_drv",		ALL_AGENTS,	0x7b },
218 	{ "bus_io_wait",		ALL_CORES,	0x7f },
219 	{ "bus_lock_clocks",		ALL_CORES | ALL_AGENTS,	0x63 },
220 
221 	{ "bus_request_outstanding",	ALL_CORES | ALL_AGENTS,	0x60 },
222 	{ "bus_trans_any",		ALL_CORES | ALL_AGENTS,	0x70 },
223 	{ "bus_trans_brd",		ALL_CORES | ALL_AGENTS,	0x65 },
224 
225 	{ "bus_trans_burst",		ALL_CORES | ALL_AGENTS,	0x6e },
226 	{ "bus_trans_def",		ALL_CORES | ALL_AGENTS,	0x6d },
227 	{ "bus_trans_ifetch",		ALL_CORES | ALL_AGENTS,	0x68 },
228 
229 	{ "bus_trans_inval",		ALL_CORES | ALL_AGENTS,	0x69 },
230 	{ "bus_trans_io",		ALL_CORES | ALL_AGENTS,	0x6c },
231 	{ "bus_trans_mem",		ALL_CORES | ALL_AGENTS,	0x6f },
232 
233 	{ "bus_trans_p",		ALL_CORES | ALL_AGENTS,	0x6b },
234 	{ "bus_trans_pwr",		ALL_CORES | ALL_AGENTS,	0x6a },
235 	{ "bus_trans_rfo",		ALL_CORES | ALL_AGENTS,	0x66 },
236 
237 	{ "bus_trans_wb",		ALL_CORES | ALL_AGENTS,	0x67 },
238 	{ "busq_empty",			ALL_CORES,	0x7d },
239 	{ "cmp_snoop",			ALL_CORES,	0x78 },
240 
241 	{ "cpu_clk_unhalted",		0x0,	0x3c },
242 	{ "cycles_int",			0x0,	0xc6 },
243 	{ "cycles_l1i_mem_stalled",	0x0,	0x86 },
244 
245 	{ "dtlb_misses",		0x0,	0x08 },
246 	{ "eist_trans",			0x0,	0x3a },
247 	{ "esp",			0x0,	0xab },
248 
249 	{ "ext_snoop",			ALL_AGENTS,	0x77 },
250 	{ "fp_mmx_trans",		0x0,	0xcc },
251 	{ "hw_int_rcv",			0x0,	0xc8 },
252 
253 	{ "ild_stall",			0x0,	0x87 },
254 	{ "inst_queue",			0x0,	0x83 },
255 	{ "inst_retired",		0x0,	0xc0 },
256 
257 	{ "itlb",			0x0,	0x82 },
258 	{ "itlb_miss_retired",		0x0,	0xc9 },
259 	{ "l1d_all_ref",		0x0,	0x43 },
260 
261 	{ "l1d_cache_ld",		0x0,	0x40 },
262 	{ "l1d_cache_lock",		0x0,	0x42 },
263 	{ "l1d_cache_st",		0x0,	0x41 },
264 
265 	{ "l1d_m_evict",		0x0,	0x47 },
266 	{ "l1d_m_repl",			0x0,	0x46 },
267 	{ "l1d_pend_miss",		0x0,	0x48 },
268 
269 	{ "l1d_prefetch",		0x0,	0x4e },
270 	{ "l1d_repl",			0x0,	0x45 },
271 	{ "l1d_split",			0x0,	0x49 },
272 
273 	{ "l1i_misses",			0x0,	0x81 },
274 	{ "l1i_reads",			0x0,	0x80 },
275 	{ "l2_ads",			ALL_CORES,	0x21 },
276 
277 	{ "l2_dbus_busy_rd",		ALL_CORES,	0x23 },
278 	{ "l2_ifetch",			ALL_CORES,	0x28 },
279 	{ "l2_ld",			ALL_CORES,	0x29 },
280 
281 	{ "l2_lines_in",		ALL_CORES,	0x24 },
282 	{ "l2_lines_out",		ALL_CORES,	0x26 },
283 	{ "l2_lock",			ALL_CORES,	0x2b },
284 
285 	{ "l2_m_lines_in",		ALL_CORES,	0x25 },
286 	{ "l2_m_lines_out",		ALL_CORES,	0x27 },
287 	{ "l2_no_req",			ALL_CORES,	0x32 },
288 
289 	{ "l2_reject_busq",		ALL_CORES,	0x30 },
290 	{ "l2_rqsts",			ALL_CORES,	0x2e },
291 	{ "l2_st",			ALL_CORES,	0x2a },
292 
293 	{ "load_block",			0x0,	0x03 },
294 	{ "load_hit_pre",		0x0,	0x4c },
295 	{ "machine_nukes",		0x0,	0xc3 },
296 
297 	{ "macro_insts",		0x0,	0xaa },
298 	{ "memory_disambiguation",	0x0,	0x09 },
299 	{ "misalign_mem_ref",		0x0,	0x05 },
300 	{ "page_walks",			0x0,	0x0c },
301 
302 	{ "pref_rqsts_dn",		0x0,	0xf8 },
303 	{ "pref_rqsts_up",		0x0,	0xf0 },
304 	{ "rat_stalls",			0x0,	0xd2 },
305 
306 	{ "resource_stalls",		0x0,	0xdc },
307 	{ "rs_uops_dispatched",		0x0,	0xa0 },
308 	{ "seg_reg_renames",		0x0,	0xd5 },
309 
310 	{ "seg_rename_stalls",		0x0,	0xd4 },
311 	{ "segment_reg_loads",		0x0,	0x06 },
312 	{ "simd_assist",		0x0,	0xcd },
313 
314 	{ "simd_comp_inst_retired",	0x0,	0xca },
315 	{ "simd_inst_retired",		0x0,	0xc7 },
316 	{ "simd_instr_retired",		0x0,	0xce },
317 
318 	{ "simd_sat_instr_retired",	0x0,	0xcf },
319 	{ "simd_sat_uop_exec",		0x0,	0xb1 },
320 	{ "simd_uop_type_exec",		0x0,	0xb3 },
321 
322 	{ "simd_uops_exec",		0x0,	0xb0 },
323 	{ "snoop_stall_drv",		ALL_CORES | ALL_AGENTS,	0x7e },
324 	{ "sse_pre_exec",		0x0,	0x07 },
325 
326 	{ "sse_pre_miss",		0x0,	0x4b },
327 	{ "store_block",		0x0,	0x04 },
328 	{ "thermal_trip",		0x0,	0x3b },
329 
330 	{ "uops_retired",		0x0,	0xc2 },
331 	{ "x87_ops_retired",		0x0,	0xc1 },
332 	{ "",				0x0,	NT_END }
333 };
334 
335 /*
336  * If any of the pic specific events require privileges, make sure to add a
337  * check in configure_gpc() to find whether an event hard-coded as a number by
338  * the user has any privilege requirements
339  */
340 static const struct nametable_core_uarch pic0_events[] = {
341 	/* Alphabetical order of event name */
342 
343 	{ "cycles_div_busy",		0x0,	0x14 },
344 	{ "fp_comp_ops_exe",		0x0,	0x10 },
345 	{ "idle_during_div",		0x0,	0x18 },
346 
347 	{ "mem_load_retired",		0x0,	0xcb },
348 	{ "rs_uops_dispatched_port",	0x0,	0xa1 },
349 	{ "",				0x0,	NT_END }
350 };
351 
352 static const struct nametable_core_uarch pic1_events[] = {
353 	/* Alphabetical order of event name */
354 
355 	{ "delayed_bypass",	0x0,	0x19 },
356 	{ "div",		0x0,	0x13 },
357 	{ "fp_assist",		0x0,	0x11 },
358 
359 	{ "mul",		0x0,	0x12 },
360 	{ "",			0x0,	NT_END }
361 };
362 
363 /* FFC entries must be in order */
364 char *ffc_names_non_htt[] = {
365 	"instr_retired.any",
366 	"cpu_clk_unhalted.core",
367 	"cpu_clk_unhalted.ref",
368 	NULL
369 };
370 
371 char *ffc_names_htt[] = {
372 	"instr_retired.any",
373 	"cpu_clk_unhalted.thread",
374 	"cpu_clk_unhalted.ref",
375 	NULL
376 };
377 
378 char **ffc_names = NULL;
379 
380 static char	**gpc_names = NULL;
381 static uint32_t	versionid;
382 static uint64_t	num_gpc;
383 static uint64_t	width_gpc;
384 static uint64_t	mask_gpc;
385 static uint64_t	num_ffc;
386 static uint64_t	width_ffc;
387 static uint64_t	mask_ffc;
388 static uint_t	total_pmc;
389 static uint64_t	control_ffc;
390 static uint64_t	control_gpc;
391 static uint64_t	control_mask;
392 static uint32_t	arch_events_vector;
393 
394 #define	IMPL_NAME_LEN 100
395 static char core_impl_name[IMPL_NAME_LEN];
396 
397 static const char *core_cpuref =
398 	"See Appendix A of the \"Intel 64 and IA-32 Architectures Software" \
399 	" Developer's Manual Volume 3B: System Programming Guide, Part 2\"" \
400 	" Order Number: 253669-026US, Februrary 2008";
401 
402 struct events_table_t {
403 	uint8_t		eventselect;
404 	uint8_t		unitmask;
405 	uint64_t	supported_counters;
406 	const char	*name;
407 };
408 
409 /* Used to describe which counters support an event */
410 #define	C(x) (1 << (x))
411 #define	C0 C(0)
412 #define	C1 C(1)
413 #define	C2 C(2)
414 #define	C3 C(3)
415 #define	C_ALL 0xFFFFFFFFFFFFFFFF
416 
417 /* Architectural events */
418 #define	ARCH_EVENTS_COMMON					\
419 	{ 0xc0, 0x00, C_ALL, "inst_retired.any_p" },		\
420 	{ 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" },	\
421 	{ 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" },	\
422 	{ 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" },	\
423 	{ 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" },	\
424 	{ 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" }
425 
426 const struct events_table_t arch_events_table_non_htt[] = {
427 	{ 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.core" },
428 	ARCH_EVENTS_COMMON
429 };
430 
431 const struct events_table_t arch_events_table_htt[] = {
432 	{ 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" },
433 	ARCH_EVENTS_COMMON
434 };
435 
436 const struct events_table_t *arch_events_table = NULL;
437 static uint64_t known_arch_events;
438 static uint64_t known_ffc_num;
439 
440 #define	EVENTS_FAM6_MOD26						\
441 									\
442 { 0x80, 0x04, C0|C1|C2|C3, "l1i.cycles_stalled" },			\
443 { 0x80, 0x01, C0|C1|C2|C3, "l1i.hits" },				\
444 { 0x80, 0x02, C0|C1|C2|C3, "l1i.misses" },				\
445 									\
446 { 0x80, 0x03, C0|C1|C2|C3, "l1i.reads" },				\
447 { 0x82, 0x01, C0|C1|C2|C3, "large_itlb.hit" },				\
448 { 0x87, 0x0F, C0|C1|C2|C3, "ild_stall.any" },				\
449 									\
450 { 0x87, 0x04, C0|C1|C2|C3, "ild_stall.iq_full" },			\
451 { 0x87, 0x01, C0|C1|C2|C3, "ild_stall.lcp" },				\
452 { 0x87, 0x02, C0|C1|C2|C3, "ild_stall.mru" },				\
453 									\
454 { 0x87, 0x08, C0|C1|C2|C3, "ild_stall.regen" },				\
455 { 0xE6, 0x02, C0|C1|C2|C3, "baclear.bad_target" },			\
456 { 0xE6, 0x01, C0|C1|C2|C3, "baclear.clear" },				\
457 									\
458 { 0xE8, 0x01, C0|C1|C2|C3, "bpu_clears.early" },			\
459 { 0xE8, 0x02, C0|C1|C2|C3, "bpu_clears.late" },				\
460 { 0xE5, 0x01, C0|C1|C2|C3, "bpu_missed_call_ret" },			\
461 									\
462 { 0xE0, 0x01, C0|C1|C2|C3, "br_inst_decoded" },				\
463 { 0x88, 0x7F, C0|C1|C2|C3, "br_inst_exec.any" },			\
464 { 0x88, 0x01, C0|C1|C2|C3, "br_inst_exec.cond" },			\
465 									\
466 { 0x88, 0x02, C0|C1|C2|C3, "br_inst_exec.direct" },			\
467 { 0x88, 0x10, C0|C1|C2|C3, "br_inst_exec.direct_near_call" },		\
468 { 0x88, 0x20, C0|C1|C2|C3, "br_inst_exec.indirect_near_call" },		\
469 									\
470 { 0x88, 0x04, C0|C1|C2|C3, "br_inst_exec.indirect_non_call" },		\
471 { 0x88, 0x30, C0|C1|C2|C3, "br_inst_exec.near_calls" },			\
472 { 0x88, 0x07, C0|C1|C2|C3, "br_inst_exec.non_calls" },			\
473 									\
474 { 0x88, 0x08, C0|C1|C2|C3, "br_inst_exec.return_near" },		\
475 { 0x88, 0x40, C0|C1|C2|C3, "br_inst_exec.taken" },			\
476 { 0x89, 0x7F, C0|C1|C2|C3, "br_misp_exec.any" },			\
477 									\
478 { 0x89, 0x01, C0|C1|C2|C3, "br_misp_exec.cond" },			\
479 { 0x89, 0x02, C0|C1|C2|C3, "br_misp_exec.direct" },			\
480 { 0x89, 0x10, C0|C1|C2|C3, "br_misp_exec.direct_near_call" },		\
481 									\
482 { 0x89, 0x20, C0|C1|C2|C3, "br_misp_exec.indirect_near_call" },		\
483 { 0x89, 0x04, C0|C1|C2|C3, "br_misp_exec.indirect_non_call" },		\
484 { 0x89, 0x30, C0|C1|C2|C3, "br_misp_exec.near_calls" },			\
485 									\
486 { 0x89, 0x07, C0|C1|C2|C3, "br_misp_exec.non_calls" },			\
487 { 0x89, 0x08, C0|C1|C2|C3, "br_misp_exec.return_near" },		\
488 { 0x89, 0x40, C0|C1|C2|C3, "br_misp_exec.taken" },			\
489 									\
490 { 0x17, 0x01, C0|C1|C2|C3, "inst_queue_writes" },			\
491 { 0x1E, 0x01, C0|C1|C2|C3, "inst_queue_write_cycles" },			\
492 { 0xA7, 0x01, C0|C1|C2|C3, "baclear_force_iq" },			\
493 									\
494 { 0xD0, 0x01, C0|C1|C2|C3, "macro_insts.decoded" },			\
495 { 0xA6, 0x01, C0|C1|C2|C3, "macro_insts.fusions_decoded" },		\
496 { 0x19, 0x01, C0|C1|C2|C3, "two_uop_insts_decoded" },			\
497 									\
498 { 0x18, 0x01, C0|C1|C2|C3, "inst_decoded.dec0" },			\
499 { 0xD1, 0x04, C0|C1|C2|C3, "uops_decoded.esp_folding" },		\
500 { 0xD1, 0x08, C0|C1|C2|C3, "uops_decoded.esp_sync" },			\
501 									\
502 { 0xD1, 0x02, C0|C1|C2|C3, "uops_decoded.ms" },				\
503 { 0x20, 0x01, C0|C1|C2|C3, "lsd_overflow" },				\
504 { 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.any" },				\
505 									\
506 { 0x0E, 0x02, C0|C1|C2|C3, "uops_issued.fused" },			\
507 { 0xA2, 0x20, C0|C1|C2|C3, "resource_stalls.fpcw" },			\
508 { 0xA2, 0x02, C0|C1|C2|C3, "resource_stalls.load" },			\
509 									\
510 { 0xA2, 0x40, C0|C1|C2|C3, "resource_stalls.mxcsr" },			\
511 { 0xA2, 0x04, C0|C1|C2|C3, "resource_stalls.rs_full" },			\
512 { 0xA2, 0x08, C0|C1|C2|C3, "resource_stalls.store" },			\
513 									\
514 { 0xA2, 0x01, C0|C1|C2|C3, "resource_stalls.any" },			\
515 { 0xD2, 0x01, C0|C1|C2|C3, "rat_stalls.flags" },			\
516 { 0xD2, 0x02, C0|C1|C2|C3, "rat_stalls.registers" },			\
517 									\
518 { 0xD2, 0x04, C0|C1|C2|C3, "rat_stalls.rob_read_port" },		\
519 { 0xD2, 0x08, C0|C1|C2|C3, "rat_stalls.scoreboard" },			\
520 { 0xD2, 0x0F, C0|C1|C2|C3, "rat_stalls.any" },				\
521 									\
522 { 0xD4, 0x01, C0|C1|C2|C3, "seg_rename_stalls" },			\
523 { 0xD5, 0x01, C0|C1|C2|C3, "es_reg_renames" },				\
524 { 0x10, 0x02, C0|C1|C2|C3, "fp_comp_ops_exe.mmx" },			\
525 									\
526 { 0x10, 0x80, C0|C1|C2|C3, "fp_comp_ops_exe.sse_double_precision" },	\
527 { 0x10, 0x04, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp" },			\
528 { 0x10, 0x10, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_packed" },		\
529 									\
530 { 0x10, 0x20, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_scalar" },		\
531 { 0x10, 0x40, C0|C1|C2|C3, "fp_comp_ops_exe.sse_single_precision" },	\
532 { 0x10, 0x08, C0|C1|C2|C3, "fp_comp_ops_exe.sse2_integer" },		\
533 									\
534 { 0x10, 0x01, C0|C1|C2|C3, "fp_comp_ops_exe.x87" },			\
535 { 0x14, 0x01, C0|C1|C2|C3, "arith.cycles_div_busy" },			\
536 { 0x14, 0x02, C0|C1|C2|C3, "arith.mul" },				\
537 									\
538 { 0x12, 0x04, C0|C1|C2|C3, "simd_int_128.pack" },			\
539 { 0x12, 0x20, C0|C1|C2|C3, "simd_int_128.packed_arith" },		\
540 { 0x12, 0x10, C0|C1|C2|C3, "simd_int_128.packed_logical" },		\
541 									\
542 { 0x12, 0x01, C0|C1|C2|C3, "simd_int_128.packed_mpy" },			\
543 { 0x12, 0x02, C0|C1|C2|C3, "simd_int_128.packed_shift" },		\
544 { 0x12, 0x40, C0|C1|C2|C3, "simd_int_128.shuffle_move" },		\
545 									\
546 { 0x12, 0x08, C0|C1|C2|C3, "simd_int_128.unpack" },			\
547 { 0xFD, 0x04, C0|C1|C2|C3, "simd_int_64.pack" },			\
548 { 0xFD, 0x20, C0|C1|C2|C3, "simd_int_64.packed_arith" },		\
549 									\
550 { 0xFD, 0x10, C0|C1|C2|C3, "simd_int_64.packed_logical" },		\
551 { 0xFD, 0x01, C0|C1|C2|C3, "simd_int_64.packed_mpy" },			\
552 { 0xFD, 0x02, C0|C1|C2|C3, "simd_int_64.packed_shift" },		\
553 									\
554 { 0xFD, 0x40, C0|C1|C2|C3, "simd_int_64.shuffle_move" },		\
555 { 0xFD, 0x08, C0|C1|C2|C3, "simd_int_64.unpack" },			\
556 { 0xB1, 0x01, C0|C1|C2|C3, "uops_executed.port0" },			\
557 									\
558 { 0xB1, 0x02, C0|C1|C2|C3, "uops_executed.port1" },			\
559 { 0x40, 0x04, C0|C1, "l1d_cache_ld.e_state" },				\
560 { 0x40, 0x01, C0|C1, "l1d_cache_ld.i_state" },				\
561 									\
562 { 0x40, 0x08, C0|C1, "l1d_cache_ld.m_state" },				\
563 { 0x40, 0x0F, C0|C1, "l1d_cache_ld.mesi" },				\
564 { 0x40, 0x02, C0|C1, "l1d_cache_ld.s_state" },				\
565 									\
566 { 0x41, 0x04, C0|C1, "l1d_cache_st.e_state" },				\
567 { 0x41, 0x08, C0|C1, "l1d_cache_st.m_state" },				\
568 { 0x41, 0x0F, C0|C1, "l1d_cache_st.mesi" },				\
569 									\
570 { 0x41, 0x02, C0|C1, "l1d_cache_st.s_state" },				\
571 { 0x42, 0x04, C0|C1, "l1d_cache_lock.e_state" },			\
572 { 0x42, 0x01, C0|C1, "l1d_cache_lock.hit" },				\
573 									\
574 { 0x42, 0x08, C0|C1, "l1d_cache_lock.m_state" },			\
575 { 0x42, 0x02, C0|C1, "l1d_cache_lock.s_state" },			\
576 { 0x43, 0x01, C0|C1, "l1d_all_ref.any" },				\
577 									\
578 { 0x43, 0x02, C0|C1, "l1d_all_ref.cacheable" },				\
579 { 0x4B, 0x01, C0|C1, "mmx2_mem_exec.nta" },				\
580 { 0x4C, 0x01, C0|C1, "load_hit_pre" },					\
581 									\
582 { 0x4E, 0x02, C0|C1, "l1d_prefetch.miss" },				\
583 { 0x4E, 0x01, C0|C1, "l1d_prefetch.requests" },				\
584 { 0x4E, 0x04, C0|C1, "l1d_prefetch.triggers" },				\
585 									\
586 { 0x51, 0x04, C0|C1, "l1d.m_evict" },					\
587 { 0x51, 0x02, C0|C1, "l1d.m_repl" },					\
588 { 0x51, 0x08, C0|C1, "l1d.m_snoop_evict" },				\
589 									\
590 { 0x51, 0x01, C0|C1, "l1d.repl" },					\
591 { 0x52, 0x01, C0|C1, "l1d_cache_prefetch_lock_fb_hit" },		\
592 { 0x53, 0x01, C0|C1, "l1d_cache_lock_fb_hit" },				\
593 									\
594 { 0x63, 0x02, C0|C1, "cache_lock_cycles.l1d" },				\
595 { 0x63, 0x01, C0|C1, "cache_lock_cycles.l1d_l2" },			\
596 { 0x06, 0x04, C0|C1|C2|C3, "store_blocks.at_ret" },			\
597 									\
598 { 0x06, 0x08, C0|C1|C2|C3, "store_blocks.l1d_block" },			\
599 { 0x06, 0x01, C0|C1|C2|C3, "store_blocks.not_sta" },			\
600 { 0x06, 0x02, C0|C1|C2|C3, "store_blocks.sta" },			\
601 									\
602 { 0x13, 0x07, C0|C1|C2|C3, "load_dispatch.any" },			\
603 { 0x13, 0x04, C0|C1|C2|C3, "load_dispatch.mob" },			\
604 { 0x13, 0x01, C0|C1|C2|C3, "load_dispatch.rs" },			\
605 									\
606 { 0x13, 0x02, C0|C1|C2|C3, "load_dispatch.rs_delayed" },		\
607 { 0x08, 0x01, C0|C1|C2|C3, "dtlb_load_misses.any" },			\
608 { 0x08, 0x20, C0|C1|C2|C3, "dtlb_load_misses.pde_miss" },		\
609 									\
610 { 0x08, 0x02, C0|C1|C2|C3, "dtlb_load_misses.walk_completed" },		\
611 { 0x49, 0x01, C0|C1|C2|C3, "dtlb_misses.any" },				\
612 { 0x49, 0x10, C0|C1|C2|C3, "dtlb_misses.stlb_hit" },			\
613 									\
614 { 0x49, 0x02, C0|C1|C2|C3, "dtlb_misses.walk_completed" },		\
615 { 0x4F, 0x02, C0|C1|C2|C3, "ept.epde_miss" },				\
616 { 0x4F, 0x08, C0|C1|C2|C3, "ept.epdpe_miss" },				\
617 									\
618 { 0x85, 0x01, C0|C1|C2|C3, "itlb_misses.any" },				\
619 { 0x85, 0x02, C0|C1|C2|C3, "itlb_misses.walk_completed" },		\
620 { 0x24, 0xAA, C0|C1|C2|C3, "l2_rqsts.miss" },				\
621 									\
622 { 0x24, 0xFF, C0|C1|C2|C3, "l2_rqsts.references" },			\
623 { 0x24, 0x10, C0|C1|C2|C3, "l2_rqsts.ifetch_hit" },			\
624 { 0x24, 0x20, C0|C1|C2|C3, "l2_rqsts.ifetch_miss" },			\
625 									\
626 { 0x24, 0x30, C0|C1|C2|C3, "l2_rqsts.ifetches" },			\
627 { 0x24, 0x01, C0|C1|C2|C3, "l2_rqsts.ld_hit" },				\
628 { 0x24, 0x02, C0|C1|C2|C3, "l2_rqsts.ld_miss" },			\
629 									\
630 { 0x24, 0x03, C0|C1|C2|C3, "l2_rqsts.loads" },				\
631 { 0x24, 0x40, C0|C1|C2|C3, "l2_rqsts.prefetch_hit" },			\
632 { 0x24, 0x80, C0|C1|C2|C3, "l2_rqsts.prefetch_miss" },			\
633 									\
634 { 0x24, 0xC0, C0|C1|C2|C3, "l2_rqsts.prefetches" },			\
635 { 0x24, 0x04, C0|C1|C2|C3, "l2_rqsts.rfo_hit" },			\
636 { 0x24, 0x08, C0|C1|C2|C3, "l2_rqsts.rfo_miss" },			\
637 									\
638 { 0x24, 0x0C, C0|C1|C2|C3, "l2_rqsts.rfos" },				\
639 { 0x26, 0xFF, C0|C1|C2|C3, "l2_data_rqsts.any" },			\
640 { 0x26, 0x04, C0|C1|C2|C3, "l2_data_rqsts.demand.e_state" },		\
641 									\
642 { 0x26, 0x01, C0|C1|C2|C3, "l2_data_rqsts.demand.i_state" },		\
643 { 0x26, 0x08, C0|C1|C2|C3, "l2_data_rqsts.demand.m_state" },		\
644 { 0x26, 0x0F, C0|C1|C2|C3, "l2_data_rqsts.demand.mesi" },		\
645 									\
646 { 0x26, 0x02, C0|C1|C2|C3, "l2_data_rqsts.demand.s_state" },		\
647 { 0x26, 0x40, C0|C1|C2|C3, "l2_data_rqsts.prefetch.e_state" },		\
648 { 0x26, 0x10, C0|C1|C2|C3, "l2_data_rqsts.prefetch.i_state" },		\
649 									\
650 { 0x26, 0x80, C0|C1|C2|C3, "l2_data_rqsts.prefetch.m_state" },		\
651 { 0x26, 0xF0, C0|C1|C2|C3, "l2_data_rqsts.prefetch.mesi" },		\
652 { 0x26, 0x20, C0|C1|C2|C3, "l2_data_rqsts.prefetch.s_state" },		\
653 									\
654 { 0x27, 0x40, C0|C1|C2|C3, "l2_write.lock.e_state" },			\
655 { 0x27, 0x10, C0|C1|C2|C3, "l2_write.lock.i_state" },			\
656 { 0x27, 0x20, C0|C1|C2|C3, "l2_write.lock.s_state" },			\
657 									\
658 { 0x27, 0x0E, C0|C1|C2|C3, "l2_write.rfo.hit" },			\
659 { 0x27, 0x01, C0|C1|C2|C3, "l2_write.rfo.i_state" },			\
660 { 0x27, 0x08, C0|C1|C2|C3, "l2_write.rfo.m_state" },			\
661 									\
662 { 0x27, 0x0F, C0|C1|C2|C3, "l2_write.rfo.mesi" },			\
663 { 0x27, 0x02, C0|C1|C2|C3, "l2_write.rfo.s_state" },			\
664 { 0x28, 0x04, C0|C1|C2|C3, "l1d_wb_l2.e_state" },			\
665 									\
666 { 0x28, 0x01, C0|C1|C2|C3, "l1d_wb_l2.i_state" },			\
667 { 0x28, 0x08, C0|C1|C2|C3, "l1d_wb_l2.m_state" },			\
668 { 0xF0, 0x80, C0|C1|C2|C3, "l2_transactions.any" },			\
669 									\
670 { 0xF0, 0x20, C0|C1|C2|C3, "l2_transactions.fill" },			\
671 { 0xF0, 0x04, C0|C1|C2|C3, "l2_transactions.ifetch" },			\
672 { 0xF0, 0x10, C0|C1|C2|C3, "l2_transactions.l1d_wb" },			\
673 									\
674 { 0xF0, 0x01, C0|C1|C2|C3, "l2_transactions.load" },			\
675 { 0xF0, 0x08, C0|C1|C2|C3, "l2_transactions.prefetch" },		\
676 { 0xF0, 0x02, C0|C1|C2|C3, "l2_transactions.rfo" },			\
677 									\
678 { 0xF0, 0x40, C0|C1|C2|C3, "l2_transactions.wb" },			\
679 { 0xF1, 0x07, C0|C1|C2|C3, "l2_lines_in.any" },				\
680 { 0xF1, 0x04, C0|C1|C2|C3, "l2_lines_in.e_state" },			\
681 									\
682 { 0xF1, 0x02, C0|C1|C2|C3, "l2_lines_in.s_state" },			\
683 { 0xF2, 0x0F, C0|C1|C2|C3, "l2_lines_out.any" },			\
684 { 0xF2, 0x01, C0|C1|C2|C3, "l2_lines_out.demand_clean" },		\
685 									\
686 { 0xF2, 0x02, C0|C1|C2|C3, "l2_lines_out.demand_dirty" },		\
687 { 0xF2, 0x04, C0|C1|C2|C3, "l2_lines_out.prefetch_clean" },		\
688 { 0x6C, 0x01, C0|C1|C2|C3, "io_transactions" },				\
689 									\
690 { 0xB0, 0x80, C0|C1|C2|C3, "offcore_requests.any" },			\
691 { 0xB0, 0x10, C0|C1|C2|C3, "offcore_requests.any.rfo" },		\
692 { 0xB0, 0x40, C0|C1|C2|C3, "offcore_requests.l1d_writeback" },		\
693 									\
694 { 0xB8, 0x01, C0|C1|C2|C3, "snoop_response.hit" },			\
695 { 0xB8, 0x02, C0|C1|C2|C3, "snoop_response.hite" },			\
696 { 0xB8, 0x04, C0|C1|C2|C3, "snoop_response.hitm" },			\
697 									\
698 { 0xF4, 0x10, C0|C1|C2|C3, "sq_misc.split_lock" },			\
699 { 0x0B, 0x01, C0|C1|C2|C3, "mem_inst_retired.loads" },			\
700 { 0x0B, 0x02, C0|C1|C2|C3, "mem_inst_retired.stores" },			\
701 									\
702 { 0xC0, 0x04, C0|C1|C2|C3, "inst_retired.mmx" },			\
703 { 0xC0, 0x02, C0|C1|C2|C3, "inst_retired.x87" },			\
704 { 0xC7, 0x04, C0|C1|C2|C3, "ssex_uops_retired.packed_double" },		\
705 									\
706 { 0xC7, 0x01, C0|C1|C2|C3, "ssex_uops_retired.packed_single" },		\
707 { 0xC7, 0x08, C0|C1|C2|C3, "ssex_uops_retired.scalar_double" },		\
708 { 0xC7, 0x02, C0|C1|C2|C3, "ssex_uops_retired.scalar_single" },		\
709 									\
710 { 0xC7, 0x10, C0|C1|C2|C3, "ssex_uops_retired.vector_integer" },	\
711 { 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.any" },			\
712 { 0xC2, 0x04, C0|C1|C2|C3, "uops_retired.macro_fused" },		\
713 									\
714 { 0xC8, 0x20, C0|C1|C2|C3, "itlb_miss_retired" },			\
715 { 0xCB, 0x80, C0|C1|C2|C3, "mem_load_retired.dtlb_miss" },		\
716 { 0xCB, 0x40, C0|C1|C2|C3, "mem_load_retired.hit_lfb" },		\
717 									\
718 { 0xCB, 0x01, C0|C1|C2|C3, "mem_load_retired.l1d_hit" },		\
719 { 0xCB, 0x02, C0|C1|C2|C3, "mem_load_retired.l2_hit" },			\
720 { 0xCB, 0x10, C0|C1|C2|C3, "mem_load_retired.llc_miss" },		\
721 									\
722 { 0xCB, 0x04, C0|C1|C2|C3, "mem_load_retired.llc_unshared_hit" },	\
723 { 0xCB, 0x08, C0|C1|C2|C3, "mem_load_retired.other_core_l2_hit_hitm" },	\
724 { 0x0F, 0x02, C0|C1|C2|C3, "mem_uncore_retired.other_core_l2_hitm" },	\
725 									\
726 { 0x0F, 0x08, C0|C1|C2|C3, "mem_uncore_retired.remote_cache_local_home_hit" },\
727 { 0x0F, 0x10, C0|C1|C2|C3, "mem_uncore_retired.remote_dram" },		\
728 { 0x0F, 0x20, C0|C1|C2|C3, "mem_uncore_retired.local_dram" },		\
729 									\
730 { 0x0C, 0x01, C0|C1|C2|C3, "mem_store_retired.dtlb_miss" },		\
731 { 0xC4, 0x01, C0|C1|C2|C3, "br_inst_retired.conditional" },		\
732 { 0xC4, 0x02, C0|C1|C2|C3, "br_inst_retired.near_call" },		\
733 									\
734 { 0xC5, 0x02, C0|C1|C2|C3, "br_misp_retired.near_call" },		\
735 { 0xDB, 0x01, C0|C1|C2|C3, "uop_unfusion" },				\
736 { 0xF7, 0x01, C0|C1|C2|C3, "fp_assist.all" },				\
737 									\
738 { 0xF7, 0x04, C0|C1|C2|C3, "fp_assist.input" },				\
739 { 0xF7, 0x02, C0|C1|C2|C3, "fp_assist.output" },			\
740 { 0xCC, 0x03, C0|C1|C2|C3, "fp_mmx_trans.any" },			\
741 									\
742 { 0xCC, 0x01, C0|C1|C2|C3, "fp_mmx_trans.to_fp" },			\
743 { 0xCC, 0x02, C0|C1|C2|C3, "fp_mmx_trans.to_mmx" },			\
744 { 0xC3, 0x04, C0|C1|C2|C3, "machine_clears.smc" }
745 
746 #define	EVENTS_FAM6_MOD28						\
747 	{ 0x2,  0x81, C0|C1, "store_forwards.good" },                   \
748 	{ 0x6,  0x0,  C0|C1, "segment_reg_loads.any" },                 \
749 	{ 0x7,  0x1,  C0|C1, "prefetch.prefetcht0" },                   \
750 	{ 0x7,  0x6,  C0|C1, "prefetch.sw_l2" },                        \
751 	{ 0x7,  0x8,  C0|C1, "prefetch.prefetchnta" },                  \
752 	{ 0x8,  0x7,  C0|C1, "data_tlb_misses.dtlb_miss" },             \
753 	{ 0x8,  0x5,  C0|C1, "data_tlb_misses.dtlb_miss_ld" },          \
754 	{ 0x8,  0x9,  C0|C1, "data_tlb_misses.l0_dtlb_miss_ld" },	\
755 	{ 0x8,  0x6,  C0|C1, "data_tlb_misses.dtlb_miss_st" },          \
756 	{ 0xC,  0x3,  C0|C1, "page_walks.cycles" },                     \
757 	{ 0x10, 0x1,  C0|C1, "x87_comp_ops_exe.any.s" },                \
758 	{ 0x10, 0x81, C0|C1, "x87_comp_ops_exe.any.ar" },               \
759 	{ 0x11, 0x1,  C0|C1, "fp_assist" },                             \
760 	{ 0x11, 0x81, C0|C1, "fp_assist.ar" },                          \
761 	{ 0x12, 0x1,  C0|C1, "mul.s" },                                 \
762 	{ 0x12, 0x81, C0|C1, "mul.ar" },                                \
763 	{ 0x13, 0x1,  C0|C1, "div.s" },                                 \
764 	{ 0x13, 0x81, C0|C1, "div.ar" },                                \
765 	{ 0x14, 0x1,  C0|C1, "cycles_div_busy" },                       \
766 	{ 0x21, 0x0,  C0|C1, "l2_ads" },                      		\
767 	{ 0x22, 0x0,  C0|C1, "l2_dbus_busy" },                		\
768 	{ 0x24, 0x0,  C0|C1, "l2_lines_in" },   			\
769 	{ 0x25, 0x0,  C0|C1, "l2_m_lines_in" },               		\
770 	{ 0x26, 0x0,  C0|C1, "l2_lines_out" },  			\
771 	{ 0x27, 0x0,  C0|C1, "l2_m_lines_out" },			\
772 	{ 0x28, 0x0,  C0|C1, "l2_ifetch" },  				\
773 	{ 0x29, 0x0,  C0|C1, "l2_ld" },					\
774 	{ 0x2A, 0x0,  C0|C1, "l2_st" },      				\
775 	{ 0x2B, 0x0,  C0|C1, "l2_lock" },    				\
776 	{ 0x2E, 0x0,  C0|C1, "l2_rqsts" },             			\
777 	{ 0x2E, 0x41, C0|C1, "l2_rqsts.self.demand.i_state" },		\
778 	{ 0x2E, 0x4F, C0|C1, "l2_rqsts.self.demand.mesi" },		\
779 	{ 0x30, 0x0,  C0|C1, "l2_reject_bus_q" },			\
780 	{ 0x32, 0x0,  C0|C1, "l2_no_req" },                   		\
781 	{ 0x3A, 0x0,  C0|C1, "eist_trans" },                            \
782 	{ 0x3B, 0xC0, C0|C1, "thermal_trip" },                          \
783 	{ 0x3C, 0x0,  C0|C1, "cpu_clk_unhalted.core_p" },               \
784 	{ 0x3C, 0x1,  C0|C1, "cpu_clk_unhalted.bus" },                  \
785 	{ 0x3C, 0x2,  C0|C1, "cpu_clk_unhalted.no_other" },             \
786 	{ 0x40, 0x21, C0|C1, "l1d_cache.ld" },                          \
787 	{ 0x40, 0x22, C0|C1, "l1d_cache.st" },                          \
788 	{ 0x60, 0x0,  C0|C1, "bus_request_outstanding" },		\
789 	{ 0x61, 0x0,  C0|C1, "bus_bnr_drv" },                		\
790 	{ 0x62, 0x0,  C0|C1, "bus_drdy_clocks" },            		\
791 	{ 0x63, 0x0,  C0|C1, "bus_lock_clocks" },  			\
792 	{ 0x64, 0x0,  C0|C1, "bus_data_rcv" },                		\
793 	{ 0x65, 0x0,  C0|C1, "bus_trans_brd" },    			\
794 	{ 0x66, 0x0,  C0|C1, "bus_trans_rfo" },    			\
795 	{ 0x67, 0x0,  C0|C1, "bus_trans_wb" },     			\
796 	{ 0x68, 0x0,  C0|C1, "bus_trans_ifetch" }, 			\
797 	{ 0x69, 0x0,  C0|C1, "bus_trans_inval" },  			\
798 	{ 0x6A, 0x0,  C0|C1, "bus_trans_pwr" },				\
799 	{ 0x6B, 0x0,  C0|C1, "bus_trans_p" },      			\
800 	{ 0x6C, 0x0,  C0|C1, "bus_trans_io" },     			\
801 	{ 0x6D, 0x0,  C0|C1, "bus_trans_def" },    			\
802 	{ 0x6E, 0x0,  C0|C1, "bus_trans_burst" },  			\
803 	{ 0x6F, 0x0,  C0|C1, "bus_trans_mem" },    			\
804 	{ 0x70, 0x0,  C0|C1, "bus_trans_any" },    			\
805 	{ 0x77, 0x0,  C0|C1, "ext_snoop" },     			\
806 	{ 0x7A, 0x0,  C0|C1, "bus_hit_drv" },                		\
807 	{ 0x7B, 0x0,  C0|C1, "bus_hitm_drv" },               		\
808 	{ 0x7D, 0x0,  C0|C1, "busq_empty" },                  		\
809 	{ 0x7E, 0x0,  C0|C1, "snoop_stall_drv" },  			\
810 	{ 0x7F, 0x0,  C0|C1, "bus_io_wait" },				\
811 	{ 0x80, 0x3,  C0|C1, "icache.accesses" },                       \
812 	{ 0x80, 0x2,  C0|C1, "icache.misses" },                         \
813 	{ 0x82, 0x4,  C0|C1, "itlb.flush" },                            \
814 	{ 0x82, 0x2,  C0|C1, "itlb.misses" },                           \
815 	{ 0xAA, 0x2,  C0|C1, "macro_insts.cisc_decoded" },              \
816 	{ 0xAA, 0x3,  C0|C1, "macro_insts.all_decoded" },               \
817 	{ 0xB0, 0x0,  C0|C1, "simd_uops_exec.s" },                      \
818 	{ 0xB0, 0x80, C0|C1, "simd_uops_exec.ar" },                     \
819 	{ 0xB1, 0x0,  C0|C1, "simd_sat_uop_exec.s" },                   \
820 	{ 0xB1, 0x80, C0|C1, "simd_sat_uop_exec.ar" },                  \
821 	{ 0xB3, 0x1,  C0|C1, "simd_uop_type_exec.mul.s" },              \
822 	{ 0xB3, 0x81, C0|C1, "simd_uop_type_exec.mul.ar" },             \
823 	{ 0xB3, 0x02, C0|C1, "simd_uop_type_exec.shift.s" },            \
824 	{ 0xB3, 0x82, C0|C1, "simd_uop_type_exec.shift.ar" },           \
825 	{ 0xB3, 0x04, C0|C1, "simd_uop_type_exec.pack.s" },             \
826 	{ 0xB3, 0x84, C0|C1, "simd_uop_type_exec.pack.ar" },            \
827 	{ 0xB3, 0x08, C0|C1, "simd_uop_type_exec.unpack.s" },           \
828 	{ 0xB3, 0x88, C0|C1, "simd_uop_type_exec.unpack.ar" },          \
829 	{ 0xB3, 0x10, C0|C1, "simd_uop_type_exec.logical.s" },          \
830 	{ 0xB3, 0x90, C0|C1, "simd_uop_type_exec.logical.ar" },         \
831 	{ 0xB3, 0x20, C0|C1, "simd_uop_type_exec.arithmetic.s" },       \
832 	{ 0xB3, 0xA0, C0|C1, "simd_uop_type_exec.arithmetic.ar" },      \
833 	{ 0xC2, 0x10, C0|C1, "uops_retired.any" },                      \
834 	{ 0xC3, 0x1,  C0|C1, "machine_clears.smc" },                    \
835 	{ 0xC4, 0x0,  C0|C1, "br_inst_retired.any" },                   \
836 	{ 0xC4, 0x1,  C0|C1, "br_inst_retired.pred_not_taken" },        \
837 	{ 0xC4, 0x2,  C0|C1, "br_inst_retired.mispred_not_taken" },     \
838 	{ 0xC4, 0x4,  C0|C1, "br_inst_retired.pred_taken" },            \
839 	{ 0xC4, 0x8,  C0|C1, "br_inst_retired.mispred_taken" },         \
840 	{ 0xC4, 0xA,  C0|C1, "br_inst_retired.mispred" },               \
841 	{ 0xC4, 0xC,  C0|C1, "br_inst_retired.taken" },                 \
842 	{ 0xC4, 0xF,  C0|C1, "br_inst_retired.any1" },                  \
843 	{ 0xC6, 0x1,  C0|C1, "cycles_int_masked.cycles_int_masked" },   \
844 	{ 0xC6, 0x2,  C0|C1,						\
845 		"cycles_int_masked.cycles_int_pending_and_masked" },	\
846 	{ 0xC7, 0x1,  C0|C1, "simd_inst_retired.packed_single" },       \
847 	{ 0xC7, 0x2,  C0|C1, "simd_inst_retired.scalar_single" },      	\
848 	{ 0xC7, 0x4,  C0|C1, "simd_inst_retired.packed_double" },       \
849 	{ 0xC7, 0x8,  C0|C1, "simd_inst_retired.scalar_double" },       \
850 	{ 0xC7, 0x10, C0|C1, "simd_inst_retired.vector" },              \
851 	{ 0xC7, 0x1F, C0|C1, "simd_inst_retired.any" },                 \
852 	{ 0xC8, 0x00, C0|C1, "hw_int_rcv" },                            \
853 	{ 0xCA, 0x1,  C0|C1, "simd_comp_inst_retired.packed_single" },  \
854 	{ 0xCA, 0x2,  C0|C1, "simd_comp_inst_retired.scalar_single" }, 	\
855 	{ 0xCA, 0x4,  C0|C1, "simd_comp_inst_retired.packed_double" },  \
856 	{ 0xCA, 0x8,  C0|C1, "simd_comp_inst_retired.scalar_double" },  \
857 	{ 0xCB, 0x1,  C0|C1, "mem_load_retired.l2_hit" },               \
858 	{ 0xCB, 0x2,  C0|C1, "mem_load_retired.l2_miss" },              \
859 	{ 0xCB, 0x4,  C0|C1, "mem_load_retired.dtlb_miss" },           	\
860 	{ 0xCD, 0x0,  C0|C1, "simd_assist" },                           \
861 	{ 0xCE, 0x0,  C0|C1, "simd_instr_retired" },                    \
862 	{ 0xCF, 0x0,  C0|C1, "simd_sat_instr_retired" },                \
863 	{ 0xE0, 0x1,  C0|C1, "br_inst_decoded" },                       \
864 	{ 0xE4, 0x1,  C0|C1, "bogus_br" },                             	\
865 	{ 0xE6, 0x1,  C0|C1, "baclears.any" }
866 
867 static const struct events_table_t *events_table = NULL;
868 
869 const struct events_table_t events_fam6_mod26[] = {
870 	EVENTS_FAM6_MOD26,
871 	{ NT_END, 0, 0, "" }
872 };
873 
874 const struct events_table_t events_fam6_mod28[] = {
875 	EVENTS_FAM6_MOD28,
876 	{ NT_END, 0, 0, "" }
877 };
878 
879 /*
880  * Initialize string containing list of supported general-purpose counter
881  * events for processors of Penryn and Merom Family
882  */
883 static void
884 pcbe_init_core_uarch()
885 {
886 	const struct nametable_core_uarch	*n;
887 	const struct nametable_core_uarch	*picspecific_events;
888 	size_t			common_size;
889 	size_t			size;
890 	uint64_t		i;
891 
892 	gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
893 
894 	/* Calculate space needed to save all the common event names */
895 	common_size = 0;
896 	for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END; n++) {
897 		common_size += strlen(n->name) + 1;
898 	}
899 
900 	for (i = 0; i < num_gpc; i++) {
901 		size = 0;
902 		switch (i) {
903 			case 0:
904 				picspecific_events = pic0_events;
905 				break;
906 			case 1:
907 				picspecific_events = pic1_events;
908 				break;
909 			default:
910 				picspecific_events = NULL;
911 				break;
912 		}
913 		if (picspecific_events != NULL) {
914 			for (n = picspecific_events;
915 			    n->event_num != NT_END;
916 			    n++) {
917 				size += strlen(n->name) + 1;
918 			}
919 		}
920 
921 		gpc_names[i] =
922 		    kmem_alloc(size + common_size + 1, KM_SLEEP);
923 
924 		gpc_names[i][0] = '\0';
925 		if (picspecific_events != NULL) {
926 			for (n = picspecific_events;
927 			    n->event_num != NT_END;
928 			    n++) {
929 				(void) strcat(gpc_names[i], n->name);
930 				(void) strcat(gpc_names[i], ",");
931 			}
932 		}
933 		for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END;
934 		    n++) {
935 			(void) strcat(gpc_names[i], n->name);
936 			(void) strcat(gpc_names[i], ",");
937 		}
938 		/*
939 		 * Remove trailing comma.
940 		 */
941 		gpc_names[i][common_size + size - 1] = '\0';
942 	}
943 }
944 
945 static int
946 core_pcbe_init(void)
947 {
948 	struct cpuid_regs	cp;
949 	size_t			size;
950 	uint64_t		i;
951 	uint64_t		j;
952 	uint64_t		arch_events_vector_length;
953 	size_t			arch_events_string_length;
954 
955 	if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
956 		return (-1);
957 
958 	/* Obtain Basic CPUID information */
959 	cp.cp_eax = 0x0;
960 	(void) __cpuid_insn(&cp);
961 
962 	/* No Architectural Performance Monitoring Leaf returned by CPUID */
963 	if (cp.cp_eax < 0xa) {
964 		return (-1);
965 	}
966 
967 	/* Obtain the Architectural Performance Monitoring Leaf */
968 	cp.cp_eax = 0xa;
969 	(void) __cpuid_insn(&cp);
970 
971 	versionid = cp.cp_eax & 0xFF;
972 
973 	/*
974 	 * Fixed-Function Counters (FFC)
975 	 *
976 	 * All Family 6 Model 15 and Model 23 processors have fixed-function
977 	 * counters.  These counters were made Architectural with
978 	 * Family 6 Model 15 Stepping 9.
979 	 */
980 	switch (versionid) {
981 
982 		case 0:
983 			return (-1);
984 
985 		case 2:
986 			num_ffc = cp.cp_edx & 0x1F;
987 			width_ffc = (cp.cp_edx >> 5) & 0xFF;
988 
989 			/*
990 			 * Some processors have an errata (AW34) where
991 			 * versionid is reported as 2 when actually 1.
992 			 * In this case, fixed-function counters are
993 			 * model-specific as in Version 1.
994 			 */
995 			if (num_ffc != 0) {
996 				break;
997 			}
998 			/* FALLTHROUGH */
999 		case 1:
1000 			num_ffc = 3;
1001 			width_ffc = 40;
1002 			versionid = 1;
1003 			break;
1004 
1005 		default:
1006 			num_ffc = cp.cp_edx & 0x1F;
1007 			width_ffc = (cp.cp_edx >> 5) & 0xFF;
1008 			break;
1009 	}
1010 
1011 
1012 	if (num_ffc >= 64)
1013 		return (-1);
1014 
1015 	/* Set HTT-specific names of architectural & FFC events */
1016 	if (x86_feature & X86_HTT) {
1017 		ffc_names = ffc_names_htt;
1018 		arch_events_table = arch_events_table_htt;
1019 		known_arch_events =
1020 		    sizeof (arch_events_table_htt) /
1021 		    sizeof (struct events_table_t);
1022 		known_ffc_num =
1023 		    sizeof (ffc_names_htt) / sizeof (char *);
1024 	} else {
1025 		ffc_names = ffc_names_non_htt;
1026 		arch_events_table = arch_events_table_non_htt;
1027 		known_arch_events =
1028 		    sizeof (arch_events_table_non_htt) /
1029 		    sizeof (struct events_table_t);
1030 		known_ffc_num =
1031 		    sizeof (ffc_names_non_htt) / sizeof (char *);
1032 	}
1033 
1034 	if (num_ffc >= known_ffc_num) {
1035 		/*
1036 		 * The system seems to have more fixed-function counters than
1037 		 * what this PCBE is able to handle correctly.  Default to the
1038 		 * maximum number of fixed-function counters that this driver
1039 		 * is aware of.
1040 		 */
1041 		num_ffc = known_ffc_num - 1;
1042 	}
1043 
1044 	mask_ffc = BITMASK_XBITS(width_ffc);
1045 	control_ffc = BITMASK_XBITS(num_ffc);
1046 
1047 	/*
1048 	 * General Purpose Counters (GPC)
1049 	 */
1050 	num_gpc = (cp.cp_eax >> 8) & 0xFF;
1051 	width_gpc = (cp.cp_eax >> 16) & 0xFF;
1052 
1053 	if (num_gpc >= 64)
1054 		return (-1);
1055 
1056 	mask_gpc = BITMASK_XBITS(width_gpc);
1057 
1058 	control_gpc = BITMASK_XBITS(num_gpc);
1059 
1060 	control_mask = (control_ffc << 32) | control_gpc;
1061 
1062 	total_pmc = num_gpc + num_ffc;
1063 	if (total_pmc > 64) {
1064 		/* Too wide for the overflow bitmap */
1065 		return (-1);
1066 	}
1067 
1068 	/* GPC events for Family 6 Models 15 & 23 only */
1069 	if ((cpuid_getfamily(CPU) == 6) &&
1070 	    ((cpuid_getmodel(CPU) == 15) || (cpuid_getmodel(CPU) == 23) ||
1071 	    (cpuid_getmodel(CPU) == 29))) {
1072 		(void) snprintf(core_impl_name, IMPL_NAME_LEN,
1073 		    "Core Microarchitecture");
1074 		pcbe_init_core_uarch();
1075 		return (0);
1076 	}
1077 
1078 	(void) snprintf(core_impl_name, IMPL_NAME_LEN,
1079 	    "Intel Arch PerfMon v%d on Family %d Model %d",
1080 	    versionid, cpuid_getfamily(CPU), cpuid_getmodel(CPU));
1081 
1082 	/*
1083 	 * Architectural events
1084 	 */
1085 	arch_events_vector_length = (cp.cp_eax >> 24) & 0xFF;
1086 
1087 	ASSERT(known_arch_events == arch_events_vector_length);
1088 
1089 	/*
1090 	 * To handle the case where a new performance monitoring setup is run
1091 	 * on a non-debug kernel
1092 	 */
1093 	if (known_arch_events > arch_events_vector_length) {
1094 		known_arch_events = arch_events_vector_length;
1095 	} else {
1096 		arch_events_vector_length = known_arch_events;
1097 	}
1098 
1099 	arch_events_vector = cp.cp_ebx &
1100 	    BITMASK_XBITS(arch_events_vector_length);
1101 
1102 	/*
1103 	 * Process architectural and non-architectural events using GPC
1104 	 */
1105 	if (num_gpc > 0) {
1106 
1107 		gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
1108 
1109 		/* Calculate space required for the architectural gpc events */
1110 		arch_events_string_length = 0;
1111 		for (i = 0; i < known_arch_events; i++) {
1112 			if (((1U << i) & arch_events_vector) == 0) {
1113 				arch_events_string_length +=
1114 				    strlen(arch_events_table[i].name) + 1;
1115 			}
1116 		}
1117 
1118 		/* Non-architectural events list */
1119 		if (cpuid_getmodel(CPU) == 26) {
1120 			events_table = events_fam6_mod26;
1121 		} else if (cpuid_getmodel(CPU) == 28) {
1122 			events_table = events_fam6_mod28;
1123 		}
1124 
1125 		for (i = 0; i < num_gpc; i++) {
1126 
1127 			/*
1128 			 * Determine length of all supported event names
1129 			 * (architectural + non-architectural)
1130 			 */
1131 			size = arch_events_string_length;
1132 			for (j = 0; events_table != NULL &&
1133 			    events_table[j].eventselect != NT_END;
1134 			    j++) {
1135 				if (C(i) & events_table[j].supported_counters) {
1136 					size += strlen(events_table[j].name) +
1137 					    1;
1138 				}
1139 			}
1140 
1141 			/* Allocate memory for this pics list */
1142 			gpc_names[i] = kmem_alloc(size + 1, KM_SLEEP);
1143 			gpc_names[i][0] = '\0';
1144 			if (size == 0) {
1145 				continue;
1146 			}
1147 
1148 			/*
1149 			 * Create the list of all supported events
1150 			 * (architectural + non-architectural)
1151 			 */
1152 			for (j = 0; j < known_arch_events; j++) {
1153 				if (((1U << j) & arch_events_vector) == 0) {
1154 					(void) strcat(gpc_names[i],
1155 					    arch_events_table[j].name);
1156 					(void) strcat(gpc_names[i], ",");
1157 				}
1158 			}
1159 
1160 			for (j = 0; events_table != NULL &&
1161 			    events_table[j].eventselect != NT_END;
1162 			    j++) {
1163 				if (C(i) & events_table[j].supported_counters) {
1164 					(void) strcat(gpc_names[i],
1165 					    events_table[j].name);
1166 					(void) strcat(gpc_names[i], ",");
1167 				}
1168 			}
1169 
1170 			/* Remove trailing comma */
1171 			gpc_names[i][size - 1] = '\0';
1172 		}
1173 	}
1174 	/*
1175 	 * Fixed-function Counters (FFC) are already listed individually in
1176 	 * ffc_names[]
1177 	 */
1178 	return (0);
1179 }
1180 
1181 static uint_t core_pcbe_ncounters()
1182 {
1183 	return (total_pmc);
1184 }
1185 
1186 static const char *core_pcbe_impl_name(void)
1187 {
1188 	return (core_impl_name);
1189 }
1190 
1191 static const char *core_pcbe_cpuref(void)
1192 {
1193 	return (core_cpuref);
1194 }
1195 
1196 static char *core_pcbe_list_events(uint_t picnum)
1197 {
1198 	ASSERT(picnum < cpc_ncounters);
1199 
1200 	if (picnum < num_gpc) {
1201 		return (gpc_names[picnum]);
1202 	} else {
1203 		return (ffc_names[picnum - num_gpc]);
1204 	}
1205 }
1206 
1207 static char *core_pcbe_list_attrs(void)
1208 {
1209 	if (versionid >= 3) {
1210 		return ("edge,inv,umask,cmask,anythr");
1211 	} else {
1212 		return ("edge,pc,inv,umask,cmask");
1213 	}
1214 }
1215 
1216 static const struct nametable_core_uarch *
1217 find_gpcevent_core_uarch(char *name,
1218     const struct nametable_core_uarch *nametable)
1219 {
1220 	const struct nametable_core_uarch *n;
1221 	int compare_result = -1;
1222 
1223 	for (n = nametable; n->event_num != NT_END; n++) {
1224 		compare_result = strcmp(name, n->name);
1225 		if (compare_result <= 0) {
1226 			break;
1227 		}
1228 	}
1229 
1230 	if (compare_result == 0) {
1231 		return (n);
1232 	}
1233 
1234 	return (NULL);
1235 }
1236 
1237 static const struct events_table_t *
1238 find_gpcevent(char *name)
1239 {
1240 	int i;
1241 
1242 	/* Search architectural events */
1243 	for (i = 0; i < known_arch_events; i++) {
1244 		if (strcmp(name, arch_events_table[i].name) == 0) {
1245 			if (((1U << i) & arch_events_vector) == 0) {
1246 				return (&arch_events_table[i]);
1247 			}
1248 		}
1249 	}
1250 
1251 	/* Search non-architectural events */
1252 	if (events_table != NULL) {
1253 		for (i = 0; events_table[i].eventselect != NT_END; i++) {
1254 			if (strcmp(name, events_table[i].name) == 0) {
1255 				return (&events_table[i]);
1256 			}
1257 		}
1258 	}
1259 
1260 	return (NULL);
1261 }
1262 static uint64_t
1263 core_pcbe_event_coverage(char *event)
1264 {
1265 	uint64_t bitmap;
1266 	uint64_t bitmask;
1267 	const struct events_table_t *n;
1268 	int i;
1269 
1270 	bitmap = 0;
1271 
1272 	/* Is it an event that a GPC can track? */
1273 	if (versionid >= 3) {
1274 		n = find_gpcevent(event);
1275 		if (n != NULL) {
1276 			bitmap |= (n->supported_counters &
1277 			    BITMASK_XBITS(num_gpc));
1278 		}
1279 	} else {
1280 		if (find_gpcevent_core_uarch(event, cmn_gpc_events_core_uarch)
1281 		    != NULL) {
1282 			bitmap |= BITMASK_XBITS(num_gpc);
1283 		} else if (find_gpcevent_core_uarch(event, pic0_events) !=
1284 		    NULL) {
1285 			bitmap |= 1ULL;
1286 		} else if (find_gpcevent_core_uarch(event, pic1_events) !=
1287 		    NULL) {
1288 			bitmap |= 1ULL << 1;
1289 		}
1290 	}
1291 
1292 	/* Check if the event can be counted in the fixed-function counters */
1293 	if (num_ffc > 0) {
1294 		bitmask = 1ULL << num_gpc;
1295 		for (i = 0; i < num_ffc; i++) {
1296 			if (strcmp(event, ffc_names[i]) == 0) {
1297 				bitmap |= bitmask;
1298 			}
1299 			bitmask = bitmask << 1;
1300 		}
1301 	}
1302 
1303 	return (bitmap);
1304 }
1305 
1306 static uint64_t
1307 core_pcbe_overflow_bitmap(void)
1308 {
1309 	uint64_t interrupt_status;
1310 	uint64_t intrbits_ffc;
1311 	uint64_t intrbits_gpc;
1312 	extern int kcpc_hw_overflow_intr_installed;
1313 	uint64_t overflow_bitmap;
1314 
1315 	RDMSR(PERF_GLOBAL_STATUS, interrupt_status);
1316 	WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status);
1317 
1318 	interrupt_status = interrupt_status & control_mask;
1319 	intrbits_ffc = (interrupt_status >> 32) & control_ffc;
1320 	intrbits_gpc = interrupt_status & control_gpc;
1321 	overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc;
1322 
1323 	ASSERT(kcpc_hw_overflow_intr_installed);
1324 	(*kcpc_hw_enable_cpc_intr)();
1325 
1326 	return (overflow_bitmap);
1327 }
1328 
1329 static int
1330 check_cpc_securitypolicy(core_pcbe_config_t *conf,
1331     const struct nametable_core_uarch *n)
1332 {
1333 	if (conf->core_ctl & n->restricted_bits) {
1334 		if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1335 			return (CPC_ATTR_REQUIRES_PRIVILEGE);
1336 		}
1337 	}
1338 	return (0);
1339 }
1340 
1341 static int
1342 configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1343     uint_t nattrs, kcpc_attr_t *attrs, void **data)
1344 {
1345 	core_pcbe_config_t	conf;
1346 	const struct nametable_core_uarch	*n;
1347 	const struct nametable_core_uarch	*m;
1348 	const struct nametable_core_uarch	*picspecific_events;
1349 	struct nametable_core_uarch	nt_raw = { "", 0x0, 0x0 };
1350 	uint_t			i;
1351 	long			event_num;
1352 	const struct events_table_t *eventcode;
1353 
1354 	if (((preset & BITS_EXTENDED_FROM_31) != 0) &&
1355 	    ((preset & BITS_EXTENDED_FROM_31) !=
1356 	    BITS_EXTENDED_FROM_31)) {
1357 
1358 		/*
1359 		 * Bits beyond bit-31 in the general-purpose counters can only
1360 		 * be written to by extension of bit 31.  We cannot preset
1361 		 * these bits to any value other than all 1s or all 0s.
1362 		 */
1363 		return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1364 	}
1365 
1366 	if (versionid >= 3) {
1367 		eventcode = find_gpcevent(event);
1368 		if (eventcode != NULL) {
1369 			if ((C(picnum) & eventcode->supported_counters) == 0) {
1370 				return (CPC_PIC_NOT_CAPABLE);
1371 			}
1372 			conf.core_ctl = eventcode->eventselect;
1373 			conf.core_ctl |= eventcode->unitmask <<
1374 			    CORE_UMASK_SHIFT;
1375 		} else {
1376 			/* Event specified as raw event code */
1377 			if (ddi_strtol(event, NULL, 0, &event_num) != 0) {
1378 				return (CPC_INVALID_EVENT);
1379 			}
1380 			conf.core_ctl = event_num & 0xFF;
1381 		}
1382 	} else {
1383 		n = find_gpcevent_core_uarch(event, cmn_gpc_events_core_uarch);
1384 		if (n == NULL) {
1385 			switch (picnum) {
1386 				case 0:
1387 					picspecific_events = pic0_events;
1388 					break;
1389 				case 1:
1390 					picspecific_events = pic1_events;
1391 					break;
1392 				default:
1393 					picspecific_events = NULL;
1394 					break;
1395 			}
1396 			if (picspecific_events != NULL) {
1397 				n = find_gpcevent_core_uarch(event,
1398 				    picspecific_events);
1399 			}
1400 		}
1401 		if (n == NULL) {
1402 			/*
1403 			 * Check if this is a case where the event was
1404 			 * specified directly by its event number instead of
1405 			 * its name string.
1406 			 */
1407 			if (ddi_strtol(event, NULL, 0, &event_num) != 0) {
1408 				return (CPC_INVALID_EVENT);
1409 			}
1410 
1411 			event_num = event_num & 0xFF;
1412 
1413 			/*
1414 			 * Search the event table to find out if the event
1415 			 * specified has an privilege requirements.  Currently
1416 			 * none of the pic-specific counters have any privilege
1417 			 * requirements.  Hence only the table
1418 			 * cmn_gpc_events_core_uarch is searched.
1419 			 */
1420 			for (m = cmn_gpc_events_core_uarch;
1421 			    m->event_num != NT_END;
1422 			    m++) {
1423 				if (event_num == m->event_num) {
1424 					break;
1425 				}
1426 			}
1427 			if (m->event_num == NT_END) {
1428 				nt_raw.event_num = (uint8_t)event_num;
1429 				n = &nt_raw;
1430 			} else {
1431 				n = m;
1432 			}
1433 		}
1434 		conf.core_ctl = n->event_num; /* Event Select */
1435 	}
1436 
1437 
1438 	conf.core_picno = picnum;
1439 	conf.core_pictype = CORE_GPC;
1440 	conf.core_rawpic = preset & mask_gpc;
1441 
1442 	conf.core_pes = GPC_BASE_PES + picnum;
1443 	conf.core_pmc = GPC_BASE_PMC + picnum;
1444 
1445 	for (i = 0; i < nattrs; i++) {
1446 		if (strncmp(attrs[i].ka_name, "umask", 6) == 0) {
1447 			if ((attrs[i].ka_val | CORE_UMASK_MASK) !=
1448 			    CORE_UMASK_MASK) {
1449 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1450 			}
1451 			/* Clear out the default umask */
1452 			conf.core_ctl &= ~ (CORE_UMASK_MASK <<
1453 			    CORE_UMASK_SHIFT);
1454 			/* Use the user provided umask */
1455 			conf.core_ctl |= attrs[i].ka_val <<
1456 			    CORE_UMASK_SHIFT;
1457 		} else  if (strncmp(attrs[i].ka_name, "edge", 6) == 0) {
1458 			if (attrs[i].ka_val != 0)
1459 				conf.core_ctl |= CORE_EDGE;
1460 		} else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) {
1461 			if (attrs[i].ka_val != 0)
1462 				conf.core_ctl |= CORE_INV;
1463 		} else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) {
1464 			if ((attrs[i].ka_val | CORE_CMASK_MASK) !=
1465 			    CORE_CMASK_MASK) {
1466 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1467 			}
1468 			conf.core_ctl |= attrs[i].ka_val <<
1469 			    CORE_CMASK_SHIFT;
1470 		} else if (strncmp(attrs[i].ka_name, "anythr", 7) ==
1471 		    0) {
1472 			if (versionid < 3)
1473 				return (CPC_INVALID_ATTRIBUTE);
1474 			if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1475 				return (CPC_ATTR_REQUIRES_PRIVILEGE);
1476 			}
1477 			if (attrs[i].ka_val != 0)
1478 				conf.core_ctl |= CORE_ANYTHR;
1479 		} else {
1480 			return (CPC_INVALID_ATTRIBUTE);
1481 		}
1482 	}
1483 
1484 	if (flags & CPC_COUNT_USER)
1485 		conf.core_ctl |= CORE_USR;
1486 	if (flags & CPC_COUNT_SYSTEM)
1487 		conf.core_ctl |= CORE_OS;
1488 	if (flags & CPC_OVF_NOTIFY_EMT)
1489 		conf.core_ctl |= CORE_INT;
1490 	conf.core_ctl |= CORE_EN;
1491 
1492 	if (versionid < 3) {
1493 		if (check_cpc_securitypolicy(&conf, n) != 0) {
1494 			return (CPC_ATTR_REQUIRES_PRIVILEGE);
1495 		}
1496 	}
1497 
1498 	*data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1499 	*((core_pcbe_config_t *)*data) = conf;
1500 
1501 	return (0);
1502 }
1503 
1504 static int
1505 configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1506     uint_t nattrs, kcpc_attr_t *attrs, void **data)
1507 {
1508 	core_pcbe_config_t	*conf;
1509 	uint_t			i;
1510 
1511 	if (picnum - num_gpc >= num_ffc) {
1512 		return (CPC_INVALID_PICNUM);
1513 	}
1514 
1515 	if (strcmp(ffc_names[picnum-num_gpc], event) != 0) {
1516 		return (CPC_INVALID_EVENT);
1517 	}
1518 
1519 	if ((versionid < 3) && (nattrs != 0)) {
1520 		return (CPC_INVALID_ATTRIBUTE);
1521 	}
1522 
1523 	conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1524 	conf->core_ctl = 0;
1525 
1526 	for (i = 0; i < nattrs; i++) {
1527 		if (strncmp(attrs[i].ka_name, "anythr", 7) == 0) {
1528 			if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1529 				return (CPC_ATTR_REQUIRES_PRIVILEGE);
1530 			}
1531 			if (attrs[i].ka_val != 0) {
1532 				conf->core_ctl |= CORE_FFC_ANYTHR;
1533 			}
1534 		} else {
1535 			kmem_free(conf, sizeof (core_pcbe_config_t));
1536 			return (CPC_INVALID_ATTRIBUTE);
1537 		}
1538 	}
1539 
1540 	conf->core_picno = picnum;
1541 	conf->core_pictype = CORE_FFC;
1542 	conf->core_rawpic = preset & mask_ffc;
1543 	conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc);
1544 
1545 	/* All fixed-function counters have the same control register */
1546 	conf->core_pes = PERF_FIXED_CTR_CTRL;
1547 
1548 	if (flags & CPC_COUNT_USER)
1549 		conf->core_ctl |= CORE_FFC_USR_EN;
1550 	if (flags & CPC_COUNT_SYSTEM)
1551 		conf->core_ctl |= CORE_FFC_OS_EN;
1552 	if (flags & CPC_OVF_NOTIFY_EMT)
1553 		conf->core_ctl |= CORE_FFC_PMI;
1554 
1555 	*data = conf;
1556 	return (0);
1557 }
1558 
1559 /*ARGSUSED*/
1560 static int
1561 core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
1562     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
1563     void *token)
1564 {
1565 	int			ret;
1566 	core_pcbe_config_t	*conf;
1567 
1568 	/*
1569 	 * If we've been handed an existing configuration, we need only preset
1570 	 * the counter value.
1571 	 */
1572 	if (*data != NULL) {
1573 		conf = *data;
1574 		ASSERT(conf->core_pictype == CORE_GPC ||
1575 		    conf->core_pictype == CORE_FFC);
1576 		if (conf->core_pictype == CORE_GPC)
1577 			conf->core_rawpic = preset & mask_gpc;
1578 		else /* CORE_FFC */
1579 			conf->core_rawpic = preset & mask_ffc;
1580 		return (0);
1581 	}
1582 
1583 	if (picnum >= total_pmc) {
1584 		return (CPC_INVALID_PICNUM);
1585 	}
1586 
1587 	if (picnum < num_gpc) {
1588 		ret = configure_gpc(picnum, event, preset, flags,
1589 		    nattrs, attrs, data);
1590 	} else {
1591 		ret = configure_ffc(picnum, event, preset, flags,
1592 		    nattrs, attrs, data);
1593 	}
1594 	return (ret);
1595 }
1596 
1597 static void
1598 core_pcbe_program(void *token)
1599 {
1600 	core_pcbe_config_t	*cfg;
1601 	uint64_t		perf_global_ctrl;
1602 	uint64_t		perf_fixed_ctr_ctrl;
1603 	uint64_t		curcr4;
1604 
1605 	core_pcbe_allstop();
1606 
1607 	curcr4 = getcr4();
1608 	if (kcpc_allow_nonpriv(token))
1609 		/* Allow RDPMC at any ring level */
1610 		setcr4(curcr4 | CR4_PCE);
1611 	else
1612 		/* Allow RDPMC only at ring 0 */
1613 		setcr4(curcr4 & ~CR4_PCE);
1614 
1615 	/* Clear any overflow indicators before programming the counters */
1616 	WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask);
1617 
1618 	cfg = NULL;
1619 	perf_global_ctrl = 0;
1620 	perf_fixed_ctr_ctrl = 0;
1621 	cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL);
1622 	while (cfg != NULL) {
1623 		ASSERT(cfg->core_pictype == CORE_GPC ||
1624 		    cfg->core_pictype == CORE_FFC);
1625 
1626 		if (cfg->core_pictype == CORE_GPC) {
1627 			/*
1628 			 * General-purpose counter registers have write
1629 			 * restrictions where only the lower 32-bits can be
1630 			 * written to.  The rest of the relevant bits are
1631 			 * written to by extension from bit 31 (all ZEROS if
1632 			 * bit-31 is ZERO and all ONE if bit-31 is ONE).  This
1633 			 * makes it possible to write to the counter register
1634 			 * only values that have all ONEs or all ZEROs in the
1635 			 * higher bits.
1636 			 */
1637 			if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) ||
1638 			    ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) ==
1639 			    BITS_EXTENDED_FROM_31)) {
1640 				/*
1641 				 * Straighforward case where the higher bits
1642 				 * are all ZEROs or all ONEs.
1643 				 */
1644 				WRMSR(cfg->core_pmc,
1645 				    (cfg->core_rawpic & mask_gpc));
1646 			} else {
1647 				/*
1648 				 * The high order bits are not all the same.
1649 				 * We save what is currently in the registers
1650 				 * and do not write to it.  When we want to do
1651 				 * a read from this register later (in
1652 				 * core_pcbe_sample()), we subtract the value
1653 				 * we save here to get the actual event count.
1654 				 *
1655 				 * NOTE: As a result, we will not get overflow
1656 				 * interrupts as expected.
1657 				 */
1658 				RDMSR(cfg->core_pmc, cfg->core_rawpic);
1659 				cfg->core_rawpic = cfg->core_rawpic & mask_gpc;
1660 			}
1661 			WRMSR(cfg->core_pes, cfg->core_ctl);
1662 			perf_global_ctrl |= 1ull << cfg->core_picno;
1663 		} else {
1664 			/*
1665 			 * Unlike the general-purpose counters, all relevant
1666 			 * bits of fixed-function counters can be written to.
1667 			 */
1668 			WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc);
1669 
1670 			/*
1671 			 * Collect the control bits for all the
1672 			 * fixed-function counters and write it at one shot
1673 			 * later in this function
1674 			 */
1675 			perf_fixed_ctr_ctrl |= cfg->core_ctl <<
1676 			    ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE);
1677 			perf_global_ctrl |=
1678 			    1ull << (cfg->core_picno - num_gpc + 32);
1679 		}
1680 
1681 		cfg = (core_pcbe_config_t *)
1682 		    kcpc_next_config(token, cfg, NULL);
1683 	}
1684 
1685 	/* Enable all the counters */
1686 	WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl);
1687 	WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl);
1688 }
1689 
1690 static void
1691 core_pcbe_allstop(void)
1692 {
1693 	/* Disable all the counters together */
1694 	WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED);
1695 
1696 	setcr4(getcr4() & ~CR4_PCE);
1697 }
1698 
1699 static void
1700 core_pcbe_sample(void *token)
1701 {
1702 	uint64_t		*daddr;
1703 	uint64_t		curpic;
1704 	core_pcbe_config_t	*cfg;
1705 	uint64_t			counter_mask;
1706 
1707 	cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr);
1708 	while (cfg != NULL) {
1709 		ASSERT(cfg->core_pictype == CORE_GPC ||
1710 		    cfg->core_pictype == CORE_FFC);
1711 
1712 		curpic = rdmsr(cfg->core_pmc);
1713 
1714 		DTRACE_PROBE4(core__pcbe__sample,
1715 		    uint64_t, cfg->core_pmc,
1716 		    uint64_t, curpic,
1717 		    uint64_t, cfg->core_rawpic,
1718 		    uint64_t, *daddr);
1719 
1720 		if (cfg->core_pictype == CORE_GPC) {
1721 			counter_mask = mask_gpc;
1722 		} else {
1723 			counter_mask = mask_ffc;
1724 		}
1725 		curpic = curpic & counter_mask;
1726 		if (curpic >= cfg->core_rawpic) {
1727 			*daddr += curpic - cfg->core_rawpic;
1728 		} else {
1729 			/* Counter overflowed since our last sample */
1730 			*daddr += counter_mask - (cfg->core_rawpic - curpic) +
1731 			    1;
1732 		}
1733 		cfg->core_rawpic = *daddr & counter_mask;
1734 
1735 		cfg =
1736 		    (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr);
1737 	}
1738 }
1739 
1740 static void
1741 core_pcbe_free(void *config)
1742 {
1743 	kmem_free(config, sizeof (core_pcbe_config_t));
1744 }
1745 
1746 static struct modlpcbe core_modlpcbe = {
1747 	&mod_pcbeops,
1748 	"Core Performance Counters",
1749 	&core_pcbe_ops
1750 };
1751 
1752 static struct modlinkage core_modl = {
1753 	MODREV_1,
1754 	&core_modlpcbe,
1755 };
1756 
1757 int
1758 _init(void)
1759 {
1760 	if (core_pcbe_init() != 0) {
1761 		return (ENOTSUP);
1762 	}
1763 	return (mod_install(&core_modl));
1764 }
1765 
1766 int
1767 _fini(void)
1768 {
1769 	return (mod_remove(&core_modl));
1770 }
1771 
1772 int
1773 _info(struct modinfo *mi)
1774 {
1775 	return (mod_info(&core_modl, mi));
1776 }
1777