xref: /illumos-gate/usr/src/uts/intel/pcbe/core_pcbe.c (revision 7417cfdecea1902cef03c0d61a72df97d945925d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * This file contains preset event names from the Performance Application
27  * Programming Interface v3.5 which included the following notice:
28  *
29  *                             Copyright (c) 2005,6
30  *                           Innovative Computing Labs
31  *                         Computer Science Department,
32  *                            University of Tennessee,
33  *                                 Knoxville, TN.
34  *                              All Rights Reserved.
35  *
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions are met:
39  *
40  *    * Redistributions of source code must retain the above copyright notice,
41  *      this list of conditions and the following disclaimer.
42  *    * Redistributions in binary form must reproduce the above copyright
43  *      notice, this list of conditions and the following disclaimer in the
44  *      documentation and/or other materials provided with the distribution.
45  *    * Neither the name of the University of Tennessee nor the names of its
46  *      contributors may be used to endorse or promote products derived from
47  *      this software without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
50  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
53  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59  * POSSIBILITY OF SUCH DAMAGE.
60  *
61  *
62  * This open source software license conforms to the BSD License template.
63  */
64 
65 
66 /*
67  * Performance Counter Back-End for Intel processors supporting Architectural
68  * Performance Monitoring.
69  */
70 
71 #include <sys/cpuvar.h>
72 #include <sys/param.h>
73 #include <sys/cpc_impl.h>
74 #include <sys/cpc_pcbe.h>
75 #include <sys/modctl.h>
76 #include <sys/inttypes.h>
77 #include <sys/systm.h>
78 #include <sys/cmn_err.h>
79 #include <sys/x86_archext.h>
80 #include <sys/sdt.h>
81 #include <sys/archsystm.h>
82 #include <sys/privregs.h>
83 #include <sys/ddi.h>
84 #include <sys/sunddi.h>
85 #include <sys/cred.h>
86 #include <sys/policy.h>
87 
88 static int core_pcbe_init(void);
89 static uint_t core_pcbe_ncounters(void);
90 static const char *core_pcbe_impl_name(void);
91 static const char *core_pcbe_cpuref(void);
92 static char *core_pcbe_list_events(uint_t picnum);
93 static char *core_pcbe_list_attrs(void);
94 static uint64_t core_pcbe_event_coverage(char *event);
95 static uint64_t core_pcbe_overflow_bitmap(void);
96 static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
97     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
98     void *token);
99 static void core_pcbe_program(void *token);
100 static void core_pcbe_allstop(void);
101 static void core_pcbe_sample(void *token);
102 static void core_pcbe_free(void *config);
103 
104 #define	FALSE	0
105 #define	TRUE	1
106 
107 /* Counter Type */
108 #define	CORE_GPC	0	/* General-Purpose Counter (GPC) */
109 #define	CORE_FFC	1	/* Fixed-Function Counter (FFC) */
110 
111 /* MSR Addresses */
112 #define	GPC_BASE_PMC		0x00c1	/* First GPC */
113 #define	GPC_BASE_PES		0x0186	/* First GPC Event Select register */
114 #define	FFC_BASE_PMC		0x0309	/* First FFC */
115 #define	PERF_FIXED_CTR_CTRL	0x038d	/* Used to enable/disable FFCs */
116 #define	PERF_GLOBAL_STATUS	0x038e	/* Overflow status register */
117 #define	PERF_GLOBAL_CTRL	0x038f	/* Used to enable/disable counting */
118 #define	PERF_GLOBAL_OVF_CTRL	0x0390	/* Used to clear overflow status */
119 
120 /*
121  * Processor Event Select register fields
122  */
123 #define	CORE_USR	(1ULL << 16)	/* Count while not in ring 0 */
124 #define	CORE_OS		(1ULL << 17)	/* Count while in ring 0 */
125 #define	CORE_EDGE	(1ULL << 18)	/* Enable edge detection */
126 #define	CORE_PC		(1ULL << 19)	/* Enable pin control */
127 #define	CORE_INT	(1ULL << 20)	/* Enable interrupt on overflow */
128 #define	CORE_EN		(1ULL << 22)	/* Enable counting */
129 #define	CORE_INV	(1ULL << 23)	/* Invert the CMASK */
130 #define	CORE_ANYTHR	(1ULL << 21)	/* Count event for any thread on core */
131 
132 #define	CORE_UMASK_SHIFT	8
133 #define	CORE_UMASK_MASK		0xffu
134 #define	CORE_CMASK_SHIFT	24
135 #define	CORE_CMASK_MASK		0xffu
136 
137 /*
138  * Fixed-function counter attributes
139  */
140 #define	CORE_FFC_OS_EN	(1ULL << 0)	/* Count while not in ring 0 */
141 #define	CORE_FFC_USR_EN	(1ULL << 1)	/* Count while in ring 1 */
142 #define	CORE_FFC_ANYTHR	(1ULL << 2)	/* Count event for any thread on core */
143 #define	CORE_FFC_PMI	(1ULL << 3)	/* Enable interrupt on overflow */
144 
145 /*
146  * Number of bits for specifying each FFC's attributes in the control register
147  */
148 #define	CORE_FFC_ATTR_SIZE	4
149 
150 /*
151  * CondChgd and OvfBuffer fields of global status and overflow control registers
152  */
153 #define	CONDCHGD	(1ULL << 63)
154 #define	OVFBUFFER	(1ULL << 62)
155 #define	MASK_CONDCHGD_OVFBUFFER	(CONDCHGD | OVFBUFFER)
156 
157 #define	ALL_STOPPED	0ULL
158 
159 #define	BITMASK_XBITS(x)	((1ull << (x)) - 1ull)
160 
161 /*
162  * Only the lower 32-bits can be written to in the general-purpose
163  * counters.  The higher bits are extended from bit 31; all ones if
164  * bit 31 is one and all zeros otherwise.
165  *
166  * The fixed-function counters do not have this restriction.
167  */
168 #define	BITS_EXTENDED_FROM_31	(BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31))
169 
170 #define	WRMSR(msr, value)						\
171 	wrmsr((msr), (value));						\
172 	DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value));
173 
174 #define	RDMSR(msr, value)						\
175 	(value) = rdmsr((msr));						\
176 	DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value));
177 
178 typedef struct core_pcbe_config {
179 	uint64_t	core_rawpic;
180 	uint64_t	core_ctl;	/* Event Select bits */
181 	uint64_t	core_pmc;	/* Counter register address */
182 	uint64_t	core_pes;	/* Event Select register address */
183 	uint_t		core_picno;
184 	uint8_t		core_pictype;	/* CORE_GPC or CORE_FFC */
185 } core_pcbe_config_t;
186 
187 pcbe_ops_t core_pcbe_ops = {
188 	PCBE_VER_1,			/* pcbe_ver */
189 	CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE,	/* pcbe_caps */
190 	core_pcbe_ncounters,		/* pcbe_ncounters */
191 	core_pcbe_impl_name,		/* pcbe_impl_name */
192 	core_pcbe_cpuref,		/* pcbe_cpuref */
193 	core_pcbe_list_events,		/* pcbe_list_events */
194 	core_pcbe_list_attrs,		/* pcbe_list_attrs */
195 	core_pcbe_event_coverage,	/* pcbe_event_coverage */
196 	core_pcbe_overflow_bitmap,	/* pcbe_overflow_bitmap */
197 	core_pcbe_configure,		/* pcbe_configure */
198 	core_pcbe_program,		/* pcbe_program */
199 	core_pcbe_allstop,		/* pcbe_allstop */
200 	core_pcbe_sample,		/* pcbe_sample */
201 	core_pcbe_free			/* pcbe_free */
202 };
203 
204 struct nametable_core_uarch {
205 	const char	*name;
206 	uint64_t	restricted_bits;
207 	uint8_t		event_num;
208 };
209 
210 #define	NT_END	0xFF
211 
212 /*
213  * Counting an event for all cores or all bus agents requires cpc_cpu privileges
214  */
215 #define	ALL_CORES	(1ULL << 15)
216 #define	ALL_AGENTS	(1ULL << 13)
217 
218 struct generic_events {
219 	const char	*name;
220 	uint8_t		event_num;
221 	uint8_t		umask;
222 };
223 
224 static const struct generic_events cmn_generic_events[] = {
225 	{ "PAPI_tot_cyc", 0x3c, 0x00 }, /* cpu_clk_unhalted.thread_p/core */
226 	{ "PAPI_tot_ins", 0xc0, 0x00 }, /* inst_retired.any_p		  */
227 	{ "PAPI_br_ins",  0xc4, 0x0c }, /* br_inst_retired.taken	  */
228 	{ "PAPI_br_msp",  0xc5, 0x00 }, /* br_inst_retired.mispred	  */
229 	{ "PAPI_br_ntk",  0xc4, 0x03 },
230 				/* br_inst_retired.pred_not_taken|pred_taken */
231 	{ "PAPI_br_prc",  0xc4, 0x05 },
232 				/* br_inst_retired.pred_not_taken|pred_taken */
233 	{ "PAPI_hw_int",  0xc8, 0x00 }, /* hw_int_rvc			  */
234 	{ "PAPI_tot_iis", 0xaa, 0x01 }, /* macro_insts.decoded		  */
235 	{ "PAPI_l1_dca",  0x43, 0x01 }, /* l1d_all_ref			  */
236 	{ "PAPI_l1_icm",  0x81, 0x00 }, /* l1i_misses			  */
237 	{ "PAPI_l1_icr",  0x80, 0x00 }, /* l1i_reads			  */
238 	{ "PAPI_l1_tcw",  0x41, 0x0f }, /* l1d_cache_st.mesi		  */
239 	{ "PAPI_l2_stm",  0x2a, 0x41 }, /* l2_st.self.i_state		  */
240 	{ "PAPI_l2_tca",  0x2e, 0x4f }, /* l2_rqsts.self.demand.mesi	  */
241 	{ "PAPI_l2_tch",  0x2e, 0x4e }, /* l2_rqsts.mes			  */
242 	{ "PAPI_l2_tcm",  0x2e, 0x41 }, /* l2_rqsts.self.demand.i_state   */
243 	{ "PAPI_l2_tcw",  0x2a, 0x4f }, /* l2_st.self.mesi		  */
244 	{ "PAPI_ld_ins",  0xc0, 0x01 }, /* inst_retired.loads		  */
245 	{ "PAPI_lst_ins", 0xc0, 0x03 }, /* inst_retired.loads|stores	  */
246 	{ "PAPI_sr_ins",  0xc0, 0x02 }, /* inst_retired.stores		  */
247 	{ "PAPI_tlb_dm",  0x08, 0x01 }, /* dtlb_misses.any		  */
248 	{ "PAPI_tlb_im",  0x82, 0x12 }, /* itlb.small_miss|large_miss	  */
249 	{ "PAPI_tlb_tl",  0x0c, 0x03 }, /* page_walks			  */
250 	{ "",		  NT_END, 0  }
251 };
252 
253 static const struct generic_events generic_events_pic0[] = {
254 	{ "PAPI_l1_dcm",  0xcb, 0x01 }, /* mem_load_retired.l1d_miss */
255 	{ "",		  NT_END, 0  }
256 };
257 
258 /*
259  * The events listed in the following table can be counted on all
260  * general-purpose counters on processors that are of Penryn and Merom Family
261  */
262 static const struct nametable_core_uarch cmn_gpc_events_core_uarch[] = {
263 	/* Alphabetical order of event name */
264 
265 	{ "baclears",			0x0,	0xe6 },
266 	{ "bogus_br",			0x0,	0xe4 },
267 	{ "br_bac_missp_exec",		0x0,	0x8a },
268 
269 	{ "br_call_exec",		0x0,	0x92 },
270 	{ "br_call_missp_exec",		0x0,	0x93 },
271 	{ "br_cnd_exec",		0x0,	0x8b },
272 
273 	{ "br_cnd_missp_exec",		0x0,	0x8c },
274 	{ "br_ind_call_exec",		0x0,	0x94 },
275 	{ "br_ind_exec",		0x0,	0x8d },
276 
277 	{ "br_ind_missp_exec",		0x0,	0x8e },
278 	{ "br_inst_decoded",		0x0,	0xe0 },
279 	{ "br_inst_exec",		0x0,	0x88 },
280 
281 	{ "br_inst_retired",		0x0,	0xc4 },
282 	{ "br_inst_retired_mispred",	0x0,	0xc5 },
283 	{ "br_missp_exec",		0x0,	0x89 },
284 
285 	{ "br_ret_bac_missp_exec",	0x0,	0x91 },
286 	{ "br_ret_exec",		0x0,	0x8f },
287 	{ "br_ret_missp_exec",		0x0,	0x90 },
288 
289 	{ "br_tkn_bubble_1",		0x0,	0x97 },
290 	{ "br_tkn_bubble_2",		0x0,	0x98 },
291 	{ "bus_bnr_drv",		ALL_AGENTS,	0x61 },
292 
293 	{ "bus_data_rcv",		ALL_CORES,	0x64 },
294 	{ "bus_drdy_clocks",		ALL_AGENTS,	0x62 },
295 	{ "bus_hit_drv",		ALL_AGENTS,	0x7a },
296 
297 	{ "bus_hitm_drv",		ALL_AGENTS,	0x7b },
298 	{ "bus_io_wait",		ALL_CORES,	0x7f },
299 	{ "bus_lock_clocks",		ALL_CORES | ALL_AGENTS,	0x63 },
300 
301 	{ "bus_request_outstanding",	ALL_CORES | ALL_AGENTS,	0x60 },
302 	{ "bus_trans_any",		ALL_CORES | ALL_AGENTS,	0x70 },
303 	{ "bus_trans_brd",		ALL_CORES | ALL_AGENTS,	0x65 },
304 
305 	{ "bus_trans_burst",		ALL_CORES | ALL_AGENTS,	0x6e },
306 	{ "bus_trans_def",		ALL_CORES | ALL_AGENTS,	0x6d },
307 	{ "bus_trans_ifetch",		ALL_CORES | ALL_AGENTS,	0x68 },
308 
309 	{ "bus_trans_inval",		ALL_CORES | ALL_AGENTS,	0x69 },
310 	{ "bus_trans_io",		ALL_CORES | ALL_AGENTS,	0x6c },
311 	{ "bus_trans_mem",		ALL_CORES | ALL_AGENTS,	0x6f },
312 
313 	{ "bus_trans_p",		ALL_CORES | ALL_AGENTS,	0x6b },
314 	{ "bus_trans_pwr",		ALL_CORES | ALL_AGENTS,	0x6a },
315 	{ "bus_trans_rfo",		ALL_CORES | ALL_AGENTS,	0x66 },
316 
317 	{ "bus_trans_wb",		ALL_CORES | ALL_AGENTS,	0x67 },
318 	{ "busq_empty",			ALL_CORES,	0x7d },
319 	{ "cmp_snoop",			ALL_CORES,	0x78 },
320 
321 	{ "cpu_clk_unhalted",		0x0,	0x3c },
322 	{ "cycles_int",			0x0,	0xc6 },
323 	{ "cycles_l1i_mem_stalled",	0x0,	0x86 },
324 
325 	{ "dtlb_misses",		0x0,	0x08 },
326 	{ "eist_trans",			0x0,	0x3a },
327 	{ "esp",			0x0,	0xab },
328 
329 	{ "ext_snoop",			ALL_AGENTS,	0x77 },
330 	{ "fp_mmx_trans",		0x0,	0xcc },
331 	{ "hw_int_rcv",			0x0,	0xc8 },
332 
333 	{ "ild_stall",			0x0,	0x87 },
334 	{ "inst_queue",			0x0,	0x83 },
335 	{ "inst_retired",		0x0,	0xc0 },
336 
337 	{ "itlb",			0x0,	0x82 },
338 	{ "itlb_miss_retired",		0x0,	0xc9 },
339 	{ "l1d_all_ref",		0x0,	0x43 },
340 
341 	{ "l1d_cache_ld",		0x0,	0x40 },
342 	{ "l1d_cache_lock",		0x0,	0x42 },
343 	{ "l1d_cache_st",		0x0,	0x41 },
344 
345 	{ "l1d_m_evict",		0x0,	0x47 },
346 	{ "l1d_m_repl",			0x0,	0x46 },
347 	{ "l1d_pend_miss",		0x0,	0x48 },
348 
349 	{ "l1d_prefetch",		0x0,	0x4e },
350 	{ "l1d_repl",			0x0,	0x45 },
351 	{ "l1d_split",			0x0,	0x49 },
352 
353 	{ "l1i_misses",			0x0,	0x81 },
354 	{ "l1i_reads",			0x0,	0x80 },
355 	{ "l2_ads",			ALL_CORES,	0x21 },
356 
357 	{ "l2_dbus_busy_rd",		ALL_CORES,	0x23 },
358 	{ "l2_ifetch",			ALL_CORES,	0x28 },
359 	{ "l2_ld",			ALL_CORES,	0x29 },
360 
361 	{ "l2_lines_in",		ALL_CORES,	0x24 },
362 	{ "l2_lines_out",		ALL_CORES,	0x26 },
363 	{ "l2_lock",			ALL_CORES,	0x2b },
364 
365 	{ "l2_m_lines_in",		ALL_CORES,	0x25 },
366 	{ "l2_m_lines_out",		ALL_CORES,	0x27 },
367 	{ "l2_no_req",			ALL_CORES,	0x32 },
368 
369 	{ "l2_reject_busq",		ALL_CORES,	0x30 },
370 	{ "l2_rqsts",			ALL_CORES,	0x2e },
371 	{ "l2_st",			ALL_CORES,	0x2a },
372 
373 	{ "load_block",			0x0,	0x03 },
374 	{ "load_hit_pre",		0x0,	0x4c },
375 	{ "machine_nukes",		0x0,	0xc3 },
376 
377 	{ "macro_insts",		0x0,	0xaa },
378 	{ "memory_disambiguation",	0x0,	0x09 },
379 	{ "misalign_mem_ref",		0x0,	0x05 },
380 	{ "page_walks",			0x0,	0x0c },
381 
382 	{ "pref_rqsts_dn",		0x0,	0xf8 },
383 	{ "pref_rqsts_up",		0x0,	0xf0 },
384 	{ "rat_stalls",			0x0,	0xd2 },
385 
386 	{ "resource_stalls",		0x0,	0xdc },
387 	{ "rs_uops_dispatched",		0x0,	0xa0 },
388 	{ "seg_reg_renames",		0x0,	0xd5 },
389 
390 	{ "seg_rename_stalls",		0x0,	0xd4 },
391 	{ "segment_reg_loads",		0x0,	0x06 },
392 	{ "simd_assist",		0x0,	0xcd },
393 
394 	{ "simd_comp_inst_retired",	0x0,	0xca },
395 	{ "simd_inst_retired",		0x0,	0xc7 },
396 	{ "simd_instr_retired",		0x0,	0xce },
397 
398 	{ "simd_sat_instr_retired",	0x0,	0xcf },
399 	{ "simd_sat_uop_exec",		0x0,	0xb1 },
400 	{ "simd_uop_type_exec",		0x0,	0xb3 },
401 
402 	{ "simd_uops_exec",		0x0,	0xb0 },
403 	{ "snoop_stall_drv",		ALL_CORES | ALL_AGENTS,	0x7e },
404 	{ "sse_pre_exec",		0x0,	0x07 },
405 
406 	{ "sse_pre_miss",		0x0,	0x4b },
407 	{ "store_block",		0x0,	0x04 },
408 	{ "thermal_trip",		0x0,	0x3b },
409 
410 	{ "uops_retired",		0x0,	0xc2 },
411 	{ "x87_ops_retired",		0x0,	0xc1 },
412 	{ "",				0x0,	NT_END }
413 };
414 
415 /*
416  * If any of the pic specific events require privileges, make sure to add a
417  * check in configure_gpc() to find whether an event hard-coded as a number by
418  * the user has any privilege requirements
419  */
420 static const struct nametable_core_uarch pic0_events[] = {
421 	/* Alphabetical order of event name */
422 
423 	{ "cycles_div_busy",		0x0,	0x14 },
424 	{ "fp_comp_ops_exe",		0x0,	0x10 },
425 	{ "idle_during_div",		0x0,	0x18 },
426 
427 	{ "mem_load_retired",		0x0,	0xcb },
428 	{ "rs_uops_dispatched_port",	0x0,	0xa1 },
429 	{ "",				0x0,	NT_END }
430 };
431 
432 static const struct nametable_core_uarch pic1_events[] = {
433 	/* Alphabetical order of event name */
434 
435 	{ "delayed_bypass",	0x0,	0x19 },
436 	{ "div",		0x0,	0x13 },
437 	{ "fp_assist",		0x0,	0x11 },
438 
439 	{ "mul",		0x0,	0x12 },
440 	{ "",			0x0,	NT_END }
441 };
442 
443 /* FFC entries must be in order */
444 static char *ffc_names_non_htt[] = {
445 	"instr_retired.any",
446 	"cpu_clk_unhalted.core",
447 	"cpu_clk_unhalted.ref",
448 	NULL
449 };
450 
451 static char *ffc_names_htt[] = {
452 	"instr_retired.any",
453 	"cpu_clk_unhalted.thread",
454 	"cpu_clk_unhalted.ref",
455 	NULL
456 };
457 
458 static char *ffc_genericnames[] = {
459 	"PAPI_tot_ins",
460 	"PAPI_tot_cyc",
461 	"",
462 	NULL
463 };
464 
465 static char	**ffc_names = NULL;
466 static char	**ffc_allnames = NULL;
467 static char	**gpc_names = NULL;
468 static uint32_t	versionid;
469 static uint64_t	num_gpc;
470 static uint64_t	width_gpc;
471 static uint64_t	mask_gpc;
472 static uint64_t	num_ffc;
473 static uint64_t	width_ffc;
474 static uint64_t	mask_ffc;
475 static uint_t	total_pmc;
476 static uint64_t	control_ffc;
477 static uint64_t	control_gpc;
478 static uint64_t	control_mask;
479 static uint32_t	arch_events_vector;
480 
481 #define	IMPL_NAME_LEN 100
482 static char core_impl_name[IMPL_NAME_LEN];
483 
484 static const char *core_cpuref =
485 	"See Appendix A of the \"Intel 64 and IA-32 Architectures Software" \
486 	" Developer's Manual Volume 3B: System Programming Guide, Part 2\"" \
487 	" Order Number: 253669-026US, Februrary 2008";
488 
489 struct events_table_t {
490 	uint8_t		eventselect;
491 	uint8_t		unitmask;
492 	uint64_t	supported_counters;
493 	const char	*name;
494 };
495 
496 /* Used to describe which counters support an event */
497 #define	C(x) (1 << (x))
498 #define	C0 C(0)
499 #define	C1 C(1)
500 #define	C2 C(2)
501 #define	C3 C(3)
502 #define	C_ALL 0xFFFFFFFFFFFFFFFF
503 
504 /* Architectural events */
505 #define	ARCH_EVENTS_COMMON					\
506 	{ 0xc0, 0x00, C_ALL, "inst_retired.any_p" },		\
507 	{ 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" },	\
508 	{ 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" },	\
509 	{ 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" },	\
510 	{ 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" },	\
511 	{ 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" }
512 
513 static const struct events_table_t arch_events_table_non_htt[] = {
514 	{ 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.core" },
515 	ARCH_EVENTS_COMMON
516 };
517 
518 static const struct events_table_t arch_events_table_htt[] = {
519 	{ 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" },
520 	ARCH_EVENTS_COMMON
521 };
522 
523 static char *arch_genevents_table[] = {
524 	"PAPI_tot_cyc", /* cpu_clk_unhalted.thread_p/core */
525 	"PAPI_tot_ins", /* inst_retired.any_p		  */
526 	"",		/* cpu_clk_unhalted.ref_p	  */
527 	"",		/* longest_lat_cache.reference	  */
528 	"",		/* longest_lat_cache.miss	  */
529 	"",		/* br_inst_retired.all_branches	  */
530 	"",		/* br_misp_retired.all_branches	  */
531 };
532 
533 static const struct events_table_t *arch_events_table = NULL;
534 static uint64_t known_arch_events;
535 static uint64_t known_ffc_num;
536 
537 #define	GENERICEVENTS_FAM6_NHM						       \
538 { 0xc4, 0x01, C0|C1|C2|C3, "PAPI_br_cn" },   /* br_inst_retired.conditional */ \
539 { 0x1d, 0x01, C0|C1|C2|C3, "PAPI_hw_int" },  /* hw_int.rcx		    */ \
540 { 0x17, 0x01, C0|C1|C2|C3, "PAPI_tot_iis" }, /* inst_queue_writes	    */ \
541 { 0x43, 0x01, C0|C1,	   "PAPI_l1_dca" },  /* l1d_all_ref.any		    */ \
542 { 0x24, 0x03, C0|C1|C2|C3, "PAPI_l1_dcm" },  /* l2_rqsts. loads and rfos    */ \
543 { 0x40, 0x0f, C0|C1|C2|C3, "PAPI_l1_dcr" },  /* l1d_cache_ld.mesi	    */ \
544 { 0x41, 0x0f, C0|C1|C2|C3, "PAPI_l1_dcw" },  /* l1d_cache_st.mesi	    */ \
545 { 0x80, 0x03, C0|C1|C2|C3, "PAPI_l1_ica" },  /* l1i.reads		    */ \
546 { 0x80, 0x01, C0|C1|C2|C3, "PAPI_l1_ich" },  /* l1i.hits		    */ \
547 { 0x80, 0x02, C0|C1|C2|C3, "PAPI_l1_icm" },  /* l1i.misses		    */ \
548 { 0x80, 0x03, C0|C1|C2|C3, "PAPI_l1_icr" },  /* l1i.reads		    */ \
549 { 0x24, 0x33, C0|C1|C2|C3, "PAPI_l1_ldm" },  /* l2_rqsts. loads and ifetches */\
550 { 0x24, 0xff, C0|C1|C2|C3, "PAPI_l1_tcm" },  /* l2_rqsts.references	    */ \
551 { 0x24, 0x02, C0|C1|C2|C3, "PAPI_l2_ldm" },  /* l2_rqsts.ld_miss	    */ \
552 { 0x24, 0x08, C0|C1|C2|C3, "PAPI_l2_stm" },  /* l2_rqsts.rfo_miss	    */ \
553 { 0x24, 0x3f, C0|C1|C2|C3, "PAPI_l2_tca" },				       \
554 				/* l2_rqsts. loads, rfos and ifetches */       \
555 { 0x24, 0x15, C0|C1|C2|C3, "PAPI_l2_tch" },				       \
556 				/* l2_rqsts. ld_hit, rfo_hit and ifetch_hit */ \
557 { 0x24, 0x2a, C0|C1|C2|C3, "PAPI_l2_tcm" },				       \
558 			/* l2_rqsts. ld_miss, rfo_miss and ifetch_miss */      \
559 { 0x24, 0x33, C0|C1|C2|C3, "PAPI_l2_tcr" },  /* l2_rqsts. loads and ifetches */\
560 { 0x24, 0x0c, C0|C1|C2|C3, "PAPI_l2_tcw" },  /* l2_rqsts.rfos		    */ \
561 { 0x2e, 0x4f, C0|C1|C2|C3, "PAPI_l3_tca" },  /* l3_lat_cache.reference	    */ \
562 { 0x2e, 0x41, C0|C1|C2|C3, "PAPI_l3_tcm" },  /* l3_lat_cache.misses	    */ \
563 { 0x0b, 0x01, C0|C1|C2|C3, "PAPI_ld_ins" },  /* mem_inst_retired.loads	    */ \
564 { 0x0b, 0x03, C0|C1|C2|C3, "PAPI_lst_ins" },				       \
565 				/* mem_inst_retired.loads and stores	    */ \
566 { 0x26, 0xf0, C0|C1|C2|C3, "PAPI_prf_dm" },  /* l2_data_rqsts.prefetch.mesi */ \
567 { 0x0b, 0x02, C0|C1|C2|C3, "PAPI_sr_ins" },  /* mem_inst_retired.stores	    */ \
568 { 0x49, 0x01, C0|C1|C2|C3, "PAPI_tlb_dm" },  /* dtlb_misses.any		    */ \
569 { 0x85, 0x01, C0|C1|C2|C3, "PAPI_tlb_im" }   /* itlb_misses.any		    */
570 
571 
572 #define	EVENTS_FAM6_NHM							\
573 									\
574 { 0x80, 0x04, C0|C1|C2|C3, "l1i.cycles_stalled" },			\
575 { 0x80, 0x01, C0|C1|C2|C3, "l1i.hits" },				\
576 { 0x80, 0x02, C0|C1|C2|C3, "l1i.misses" },				\
577 									\
578 { 0x80, 0x03, C0|C1|C2|C3, "l1i.reads" },				\
579 { 0x82, 0x01, C0|C1|C2|C3, "large_itlb.hit" },				\
580 { 0x87, 0x0F, C0|C1|C2|C3, "ild_stall.any" },				\
581 									\
582 { 0x87, 0x04, C0|C1|C2|C3, "ild_stall.iq_full" },			\
583 { 0x87, 0x01, C0|C1|C2|C3, "ild_stall.lcp" },				\
584 { 0x87, 0x02, C0|C1|C2|C3, "ild_stall.mru" },				\
585 									\
586 { 0x87, 0x08, C0|C1|C2|C3, "ild_stall.regen" },				\
587 { 0xE6, 0x02, C0|C1|C2|C3, "baclear.bad_target" },			\
588 { 0xE6, 0x01, C0|C1|C2|C3, "baclear.clear" },				\
589 									\
590 { 0xE8, 0x01, C0|C1|C2|C3, "bpu_clears.early" },			\
591 { 0xE8, 0x02, C0|C1|C2|C3, "bpu_clears.late" },				\
592 { 0xE5, 0x01, C0|C1|C2|C3, "bpu_missed_call_ret" },			\
593 									\
594 { 0xE0, 0x01, C0|C1|C2|C3, "br_inst_decoded" },				\
595 { 0x88, 0x7F, C0|C1|C2|C3, "br_inst_exec.any" },			\
596 { 0x88, 0x01, C0|C1|C2|C3, "br_inst_exec.cond" },			\
597 									\
598 { 0x88, 0x02, C0|C1|C2|C3, "br_inst_exec.direct" },			\
599 { 0x88, 0x10, C0|C1|C2|C3, "br_inst_exec.direct_near_call" },		\
600 { 0x88, 0x20, C0|C1|C2|C3, "br_inst_exec.indirect_near_call" },		\
601 									\
602 { 0x88, 0x04, C0|C1|C2|C3, "br_inst_exec.indirect_non_call" },		\
603 { 0x88, 0x30, C0|C1|C2|C3, "br_inst_exec.near_calls" },			\
604 { 0x88, 0x07, C0|C1|C2|C3, "br_inst_exec.non_calls" },			\
605 									\
606 { 0x88, 0x08, C0|C1|C2|C3, "br_inst_exec.return_near" },		\
607 { 0x88, 0x40, C0|C1|C2|C3, "br_inst_exec.taken" },			\
608 { 0x89, 0x7F, C0|C1|C2|C3, "br_misp_exec.any" },			\
609 									\
610 { 0x89, 0x01, C0|C1|C2|C3, "br_misp_exec.cond" },			\
611 { 0x89, 0x02, C0|C1|C2|C3, "br_misp_exec.direct" },			\
612 { 0x89, 0x10, C0|C1|C2|C3, "br_misp_exec.direct_near_call" },		\
613 									\
614 { 0x89, 0x20, C0|C1|C2|C3, "br_misp_exec.indirect_near_call" },		\
615 { 0x89, 0x04, C0|C1|C2|C3, "br_misp_exec.indirect_non_call" },		\
616 { 0x89, 0x30, C0|C1|C2|C3, "br_misp_exec.near_calls" },			\
617 									\
618 { 0x89, 0x07, C0|C1|C2|C3, "br_misp_exec.non_calls" },			\
619 { 0x89, 0x08, C0|C1|C2|C3, "br_misp_exec.return_near" },		\
620 { 0x89, 0x40, C0|C1|C2|C3, "br_misp_exec.taken" },			\
621 									\
622 { 0x17, 0x01, C0|C1|C2|C3, "inst_queue_writes" },			\
623 { 0x1E, 0x01, C0|C1|C2|C3, "inst_queue_write_cycles" },			\
624 { 0xA7, 0x01, C0|C1|C2|C3, "baclear_force_iq" },			\
625 									\
626 { 0xD0, 0x01, C0|C1|C2|C3, "macro_insts.decoded" },			\
627 { 0xA6, 0x01, C0|C1|C2|C3, "macro_insts.fusions_decoded" },		\
628 { 0x19, 0x01, C0|C1|C2|C3, "two_uop_insts_decoded" },			\
629 									\
630 { 0x18, 0x01, C0|C1|C2|C3, "inst_decoded.dec0" },			\
631 { 0xD1, 0x04, C0|C1|C2|C3, "uops_decoded.esp_folding" },		\
632 { 0xD1, 0x08, C0|C1|C2|C3, "uops_decoded.esp_sync" },			\
633 									\
634 { 0xD1, 0x02, C0|C1|C2|C3, "uops_decoded.ms" },				\
635 { 0x20, 0x01, C0|C1|C2|C3, "lsd_overflow" },				\
636 { 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.any" },				\
637 									\
638 { 0x0E, 0x02, C0|C1|C2|C3, "uops_issued.fused" },			\
639 { 0xA2, 0x20, C0|C1|C2|C3, "resource_stalls.fpcw" },			\
640 { 0xA2, 0x02, C0|C1|C2|C3, "resource_stalls.load" },			\
641 									\
642 { 0xA2, 0x40, C0|C1|C2|C3, "resource_stalls.mxcsr" },			\
643 { 0xA2, 0x04, C0|C1|C2|C3, "resource_stalls.rs_full" },			\
644 { 0xA2, 0x08, C0|C1|C2|C3, "resource_stalls.store" },			\
645 									\
646 { 0xA2, 0x01, C0|C1|C2|C3, "resource_stalls.any" },			\
647 { 0xD2, 0x01, C0|C1|C2|C3, "rat_stalls.flags" },			\
648 { 0xD2, 0x02, C0|C1|C2|C3, "rat_stalls.registers" },			\
649 									\
650 { 0xD2, 0x04, C0|C1|C2|C3, "rat_stalls.rob_read_port" },		\
651 { 0xD2, 0x08, C0|C1|C2|C3, "rat_stalls.scoreboard" },			\
652 { 0xD2, 0x0F, C0|C1|C2|C3, "rat_stalls.any" },				\
653 									\
654 { 0xD4, 0x01, C0|C1|C2|C3, "seg_rename_stalls" },			\
655 { 0xD5, 0x01, C0|C1|C2|C3, "es_reg_renames" },				\
656 { 0x10, 0x02, C0|C1|C2|C3, "fp_comp_ops_exe.mmx" },			\
657 									\
658 { 0x10, 0x80, C0|C1|C2|C3, "fp_comp_ops_exe.sse_double_precision" },	\
659 { 0x10, 0x04, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp" },			\
660 { 0x10, 0x10, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_packed" },		\
661 									\
662 { 0x10, 0x20, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_scalar" },		\
663 { 0x10, 0x40, C0|C1|C2|C3, "fp_comp_ops_exe.sse_single_precision" },	\
664 { 0x10, 0x08, C0|C1|C2|C3, "fp_comp_ops_exe.sse2_integer" },		\
665 									\
666 { 0x10, 0x01, C0|C1|C2|C3, "fp_comp_ops_exe.x87" },			\
667 { 0x14, 0x01, C0|C1|C2|C3, "arith.cycles_div_busy" },			\
668 { 0x14, 0x02, C0|C1|C2|C3, "arith.mul" },				\
669 									\
670 { 0x12, 0x04, C0|C1|C2|C3, "simd_int_128.pack" },			\
671 { 0x12, 0x20, C0|C1|C2|C3, "simd_int_128.packed_arith" },		\
672 { 0x12, 0x10, C0|C1|C2|C3, "simd_int_128.packed_logical" },		\
673 									\
674 { 0x12, 0x01, C0|C1|C2|C3, "simd_int_128.packed_mpy" },			\
675 { 0x12, 0x02, C0|C1|C2|C3, "simd_int_128.packed_shift" },		\
676 { 0x12, 0x40, C0|C1|C2|C3, "simd_int_128.shuffle_move" },		\
677 									\
678 { 0x12, 0x08, C0|C1|C2|C3, "simd_int_128.unpack" },			\
679 { 0xFD, 0x04, C0|C1|C2|C3, "simd_int_64.pack" },			\
680 { 0xFD, 0x20, C0|C1|C2|C3, "simd_int_64.packed_arith" },		\
681 									\
682 { 0xFD, 0x10, C0|C1|C2|C3, "simd_int_64.packed_logical" },		\
683 { 0xFD, 0x01, C0|C1|C2|C3, "simd_int_64.packed_mpy" },			\
684 { 0xFD, 0x02, C0|C1|C2|C3, "simd_int_64.packed_shift" },		\
685 									\
686 { 0xFD, 0x40, C0|C1|C2|C3, "simd_int_64.shuffle_move" },		\
687 { 0xFD, 0x08, C0|C1|C2|C3, "simd_int_64.unpack" },			\
688 { 0xB1, 0x01, C0|C1|C2|C3, "uops_executed.port0" },			\
689 									\
690 { 0xB1, 0x02, C0|C1|C2|C3, "uops_executed.port1" },			\
691 { 0x40, 0x04, C0|C1, "l1d_cache_ld.e_state" },				\
692 { 0x40, 0x01, C0|C1, "l1d_cache_ld.i_state" },				\
693 									\
694 { 0x40, 0x08, C0|C1, "l1d_cache_ld.m_state" },				\
695 { 0x40, 0x0F, C0|C1, "l1d_cache_ld.mesi" },				\
696 { 0x40, 0x02, C0|C1, "l1d_cache_ld.s_state" },				\
697 									\
698 { 0x41, 0x04, C0|C1, "l1d_cache_st.e_state" },				\
699 { 0x41, 0x08, C0|C1, "l1d_cache_st.m_state" },				\
700 { 0x41, 0x0F, C0|C1, "l1d_cache_st.mesi" },				\
701 									\
702 { 0x41, 0x02, C0|C1, "l1d_cache_st.s_state" },				\
703 { 0x42, 0x04, C0|C1, "l1d_cache_lock.e_state" },			\
704 { 0x42, 0x01, C0|C1, "l1d_cache_lock.hit" },				\
705 									\
706 { 0x42, 0x08, C0|C1, "l1d_cache_lock.m_state" },			\
707 { 0x42, 0x02, C0|C1, "l1d_cache_lock.s_state" },			\
708 { 0x43, 0x01, C0|C1, "l1d_all_ref.any" },				\
709 									\
710 { 0x43, 0x02, C0|C1, "l1d_all_ref.cacheable" },				\
711 { 0x4B, 0x01, C0|C1, "mmx2_mem_exec.nta" },				\
712 { 0x4C, 0x01, C0|C1, "load_hit_pre" },					\
713 									\
714 { 0x4E, 0x02, C0|C1, "l1d_prefetch.miss" },				\
715 { 0x4E, 0x01, C0|C1, "l1d_prefetch.requests" },				\
716 { 0x4E, 0x04, C0|C1, "l1d_prefetch.triggers" },				\
717 									\
718 { 0x51, 0x04, C0|C1, "l1d.m_evict" },					\
719 { 0x51, 0x02, C0|C1, "l1d.m_repl" },					\
720 { 0x51, 0x08, C0|C1, "l1d.m_snoop_evict" },				\
721 									\
722 { 0x51, 0x01, C0|C1, "l1d.repl" },					\
723 { 0x52, 0x01, C0|C1, "l1d_cache_prefetch_lock_fb_hit" },		\
724 { 0x53, 0x01, C0|C1, "l1d_cache_lock_fb_hit" },				\
725 									\
726 { 0x63, 0x02, C0|C1, "cache_lock_cycles.l1d" },				\
727 { 0x63, 0x01, C0|C1, "cache_lock_cycles.l1d_l2" },			\
728 { 0x06, 0x04, C0|C1|C2|C3, "store_blocks.at_ret" },			\
729 									\
730 { 0x06, 0x08, C0|C1|C2|C3, "store_blocks.l1d_block" },			\
731 { 0x06, 0x01, C0|C1|C2|C3, "store_blocks.not_sta" },			\
732 { 0x06, 0x02, C0|C1|C2|C3, "store_blocks.sta" },			\
733 									\
734 { 0x13, 0x07, C0|C1|C2|C3, "load_dispatch.any" },			\
735 { 0x13, 0x04, C0|C1|C2|C3, "load_dispatch.mob" },			\
736 { 0x13, 0x01, C0|C1|C2|C3, "load_dispatch.rs" },			\
737 									\
738 { 0x13, 0x02, C0|C1|C2|C3, "load_dispatch.rs_delayed" },		\
739 { 0x08, 0x01, C0|C1|C2|C3, "dtlb_load_misses.any" },			\
740 { 0x08, 0x20, C0|C1|C2|C3, "dtlb_load_misses.pde_miss" },		\
741 									\
742 { 0x08, 0x02, C0|C1|C2|C3, "dtlb_load_misses.walk_completed" },		\
743 { 0x49, 0x01, C0|C1|C2|C3, "dtlb_misses.any" },				\
744 { 0x49, 0x10, C0|C1|C2|C3, "dtlb_misses.stlb_hit" },			\
745 									\
746 { 0x49, 0x02, C0|C1|C2|C3, "dtlb_misses.walk_completed" },		\
747 { 0x4F, 0x02, C0|C1|C2|C3, "ept.epde_miss" },				\
748 { 0x4F, 0x08, C0|C1|C2|C3, "ept.epdpe_miss" },				\
749 									\
750 { 0x85, 0x01, C0|C1|C2|C3, "itlb_misses.any" },				\
751 { 0x85, 0x02, C0|C1|C2|C3, "itlb_misses.walk_completed" },		\
752 { 0x24, 0xAA, C0|C1|C2|C3, "l2_rqsts.miss" },				\
753 									\
754 { 0x24, 0xFF, C0|C1|C2|C3, "l2_rqsts.references" },			\
755 { 0x24, 0x10, C0|C1|C2|C3, "l2_rqsts.ifetch_hit" },			\
756 { 0x24, 0x20, C0|C1|C2|C3, "l2_rqsts.ifetch_miss" },			\
757 									\
758 { 0x24, 0x30, C0|C1|C2|C3, "l2_rqsts.ifetches" },			\
759 { 0x24, 0x01, C0|C1|C2|C3, "l2_rqsts.ld_hit" },				\
760 { 0x24, 0x02, C0|C1|C2|C3, "l2_rqsts.ld_miss" },			\
761 									\
762 { 0x24, 0x03, C0|C1|C2|C3, "l2_rqsts.loads" },				\
763 { 0x24, 0x40, C0|C1|C2|C3, "l2_rqsts.prefetch_hit" },			\
764 { 0x24, 0x80, C0|C1|C2|C3, "l2_rqsts.prefetch_miss" },			\
765 									\
766 { 0x24, 0xC0, C0|C1|C2|C3, "l2_rqsts.prefetches" },			\
767 { 0x24, 0x04, C0|C1|C2|C3, "l2_rqsts.rfo_hit" },			\
768 { 0x24, 0x08, C0|C1|C2|C3, "l2_rqsts.rfo_miss" },			\
769 									\
770 { 0x24, 0x0C, C0|C1|C2|C3, "l2_rqsts.rfos" },				\
771 { 0x26, 0xFF, C0|C1|C2|C3, "l2_data_rqsts.any" },			\
772 { 0x26, 0x04, C0|C1|C2|C3, "l2_data_rqsts.demand.e_state" },		\
773 									\
774 { 0x26, 0x01, C0|C1|C2|C3, "l2_data_rqsts.demand.i_state" },		\
775 { 0x26, 0x08, C0|C1|C2|C3, "l2_data_rqsts.demand.m_state" },		\
776 { 0x26, 0x0F, C0|C1|C2|C3, "l2_data_rqsts.demand.mesi" },		\
777 									\
778 { 0x26, 0x02, C0|C1|C2|C3, "l2_data_rqsts.demand.s_state" },		\
779 { 0x26, 0x40, C0|C1|C2|C3, "l2_data_rqsts.prefetch.e_state" },		\
780 { 0x26, 0x10, C0|C1|C2|C3, "l2_data_rqsts.prefetch.i_state" },		\
781 									\
782 { 0x26, 0x80, C0|C1|C2|C3, "l2_data_rqsts.prefetch.m_state" },		\
783 { 0x26, 0xF0, C0|C1|C2|C3, "l2_data_rqsts.prefetch.mesi" },		\
784 { 0x26, 0x20, C0|C1|C2|C3, "l2_data_rqsts.prefetch.s_state" },		\
785 									\
786 { 0x27, 0x40, C0|C1|C2|C3, "l2_write.lock.e_state" },			\
787 { 0x27, 0x10, C0|C1|C2|C3, "l2_write.lock.i_state" },			\
788 { 0x27, 0x20, C0|C1|C2|C3, "l2_write.lock.s_state" },			\
789 									\
790 { 0x27, 0x0E, C0|C1|C2|C3, "l2_write.rfo.hit" },			\
791 { 0x27, 0x01, C0|C1|C2|C3, "l2_write.rfo.i_state" },			\
792 { 0x27, 0x08, C0|C1|C2|C3, "l2_write.rfo.m_state" },			\
793 									\
794 { 0x27, 0x0F, C0|C1|C2|C3, "l2_write.rfo.mesi" },			\
795 { 0x27, 0x02, C0|C1|C2|C3, "l2_write.rfo.s_state" },			\
796 { 0x28, 0x04, C0|C1|C2|C3, "l1d_wb_l2.e_state" },			\
797 									\
798 { 0x28, 0x01, C0|C1|C2|C3, "l1d_wb_l2.i_state" },			\
799 { 0x28, 0x08, C0|C1|C2|C3, "l1d_wb_l2.m_state" },			\
800 { 0xF0, 0x80, C0|C1|C2|C3, "l2_transactions.any" },			\
801 									\
802 { 0xF0, 0x20, C0|C1|C2|C3, "l2_transactions.fill" },			\
803 { 0xF0, 0x04, C0|C1|C2|C3, "l2_transactions.ifetch" },			\
804 { 0xF0, 0x10, C0|C1|C2|C3, "l2_transactions.l1d_wb" },			\
805 									\
806 { 0xF0, 0x01, C0|C1|C2|C3, "l2_transactions.load" },			\
807 { 0xF0, 0x08, C0|C1|C2|C3, "l2_transactions.prefetch" },		\
808 { 0xF0, 0x02, C0|C1|C2|C3, "l2_transactions.rfo" },			\
809 									\
810 { 0xF0, 0x40, C0|C1|C2|C3, "l2_transactions.wb" },			\
811 { 0xF1, 0x07, C0|C1|C2|C3, "l2_lines_in.any" },				\
812 { 0xF1, 0x04, C0|C1|C2|C3, "l2_lines_in.e_state" },			\
813 									\
814 { 0xF1, 0x02, C0|C1|C2|C3, "l2_lines_in.s_state" },			\
815 { 0xF2, 0x0F, C0|C1|C2|C3, "l2_lines_out.any" },			\
816 { 0xF2, 0x01, C0|C1|C2|C3, "l2_lines_out.demand_clean" },		\
817 									\
818 { 0xF2, 0x02, C0|C1|C2|C3, "l2_lines_out.demand_dirty" },		\
819 { 0xF2, 0x04, C0|C1|C2|C3, "l2_lines_out.prefetch_clean" },		\
820 { 0x6C, 0x01, C0|C1|C2|C3, "io_transactions" },				\
821 									\
822 { 0xB0, 0x80, C0|C1|C2|C3, "offcore_requests.any" },			\
823 { 0xB0, 0x10, C0|C1|C2|C3, "offcore_requests.any.rfo" },		\
824 { 0xB0, 0x40, C0|C1|C2|C3, "offcore_requests.l1d_writeback" },		\
825 									\
826 { 0xB8, 0x01, C0|C1|C2|C3, "snoop_response.hit" },			\
827 { 0xB8, 0x02, C0|C1|C2|C3, "snoop_response.hite" },			\
828 { 0xB8, 0x04, C0|C1|C2|C3, "snoop_response.hitm" },			\
829 									\
830 { 0xF4, 0x10, C0|C1|C2|C3, "sq_misc.split_lock" },			\
831 { 0x0B, 0x01, C0|C1|C2|C3, "mem_inst_retired.loads" },			\
832 { 0x0B, 0x02, C0|C1|C2|C3, "mem_inst_retired.stores" },			\
833 									\
834 { 0xC0, 0x04, C0|C1|C2|C3, "inst_retired.mmx" },			\
835 { 0xC0, 0x02, C0|C1|C2|C3, "inst_retired.x87" },			\
836 { 0xC7, 0x04, C0|C1|C2|C3, "ssex_uops_retired.packed_double" },		\
837 									\
838 { 0xC7, 0x01, C0|C1|C2|C3, "ssex_uops_retired.packed_single" },		\
839 { 0xC7, 0x08, C0|C1|C2|C3, "ssex_uops_retired.scalar_double" },		\
840 { 0xC7, 0x02, C0|C1|C2|C3, "ssex_uops_retired.scalar_single" },		\
841 									\
842 { 0xC7, 0x10, C0|C1|C2|C3, "ssex_uops_retired.vector_integer" },	\
843 { 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.any" },			\
844 { 0xC2, 0x04, C0|C1|C2|C3, "uops_retired.macro_fused" },		\
845 									\
846 { 0xC8, 0x20, C0|C1|C2|C3, "itlb_miss_retired" },			\
847 { 0xCB, 0x80, C0|C1|C2|C3, "mem_load_retired.dtlb_miss" },		\
848 { 0xCB, 0x40, C0|C1|C2|C3, "mem_load_retired.hit_lfb" },		\
849 									\
850 { 0xCB, 0x01, C0|C1|C2|C3, "mem_load_retired.l1d_hit" },		\
851 { 0xCB, 0x02, C0|C1|C2|C3, "mem_load_retired.l2_hit" },			\
852 { 0xCB, 0x10, C0|C1|C2|C3, "mem_load_retired.llc_miss" },		\
853 									\
854 { 0xCB, 0x04, C0|C1|C2|C3, "mem_load_retired.llc_unshared_hit" },	\
855 { 0xCB, 0x08, C0|C1|C2|C3, "mem_load_retired.other_core_l2_hit_hitm" },	\
856 { 0x0F, 0x02, C0|C1|C2|C3, "mem_uncore_retired.other_core_l2_hitm" },	\
857 									\
858 { 0x0F, 0x08, C0|C1|C2|C3, "mem_uncore_retired.remote_cache_local_home_hit" },\
859 { 0x0F, 0x10, C0|C1|C2|C3, "mem_uncore_retired.remote_dram" },		\
860 { 0x0F, 0x20, C0|C1|C2|C3, "mem_uncore_retired.local_dram" },		\
861 									\
862 { 0x0C, 0x01, C0|C1|C2|C3, "mem_store_retired.dtlb_miss" },		\
863 { 0xC4, 0x01, C0|C1|C2|C3, "br_inst_retired.conditional" },		\
864 { 0xC4, 0x02, C0|C1|C2|C3, "br_inst_retired.near_call" },		\
865 									\
866 { 0xC5, 0x02, C0|C1|C2|C3, "br_misp_retired.near_call" },		\
867 { 0xDB, 0x01, C0|C1|C2|C3, "uop_unfusion" },				\
868 { 0xF7, 0x01, C0|C1|C2|C3, "fp_assist.all" },				\
869 									\
870 { 0xF7, 0x04, C0|C1|C2|C3, "fp_assist.input" },				\
871 { 0xF7, 0x02, C0|C1|C2|C3, "fp_assist.output" },			\
872 { 0xCC, 0x03, C0|C1|C2|C3, "fp_mmx_trans.any" },			\
873 									\
874 { 0xCC, 0x01, C0|C1|C2|C3, "fp_mmx_trans.to_fp" },			\
875 { 0xCC, 0x02, C0|C1|C2|C3, "fp_mmx_trans.to_mmx" },			\
876 { 0xC3, 0x04, C0|C1|C2|C3, "machine_clears.smc" }
877 
878 #define	GENERICEVENTS_FAM6_MOD28					       \
879 { 0xc4, 0x00, C0|C1, "PAPI_br_ins" },	/* br_inst_retired.any */	       \
880 { 0xc5, 0x00, C0|C1, "PAPI_br_msp" },	/* br_inst_retired.mispred */	       \
881 { 0xc4, 0x03, C0|C1, "PAPI_br_ntk" },					       \
882 			/* br_inst_retired.pred_not_taken|mispred_not_taken */ \
883 { 0xc4, 0x05, C0|C1, "PAPI_br_prc" },					       \
884 			/* br_inst_retired.pred_not_taken|pred_taken */	       \
885 { 0xc8, 0x00, C0|C1, "PAPI_hw_int" },	/* hw_int_rcv */	      	       \
886 { 0xaa, 0x03, C0|C1, "PAPI_tot_iis" },	/* macro_insts.all_decoded */	       \
887 { 0x40, 0x23, C0|C1, "PAPI_l1_dca" },	/* l1d_cache.l1|st */	      	       \
888 { 0x2a, 0x41, C0|C1, "PAPI_l2_stm" },	/* l2_st.self.i_state */	       \
889 { 0x2e, 0x4f, C0|C1, "PAPI_l2_tca" },	/* longest_lat_cache.reference */      \
890 { 0x2e, 0x4e, C0|C1, "PAPI_l2_tch" },   /* l2_rqsts.mes */		       \
891 { 0x2e, 0x41, C0|C1, "PAPI_l2_tcm" },	/* longest_lat_cache.miss */	       \
892 { 0x2a, 0x4f, C0|C1, "PAPI_l2_tcw" },	/* l2_st.self.mesi */		       \
893 { 0x08, 0x07, C0|C1, "PAPI_tlb_dm" },	/* data_tlb_misses.dtlb.miss */	       \
894 { 0x82, 0x02, C0|C1, "PAPI_tlb_im" }	/* itlb.misses */
895 
896 
897 #define	EVENTS_FAM6_MOD28						\
898 	{ 0x2,  0x81, C0|C1, "store_forwards.good" },                   \
899 	{ 0x6,  0x0,  C0|C1, "segment_reg_loads.any" },                 \
900 	{ 0x7,  0x1,  C0|C1, "prefetch.prefetcht0" },                   \
901 	{ 0x7,  0x6,  C0|C1, "prefetch.sw_l2" },                        \
902 	{ 0x7,  0x8,  C0|C1, "prefetch.prefetchnta" },                  \
903 	{ 0x8,  0x7,  C0|C1, "data_tlb_misses.dtlb_miss" },             \
904 	{ 0x8,  0x5,  C0|C1, "data_tlb_misses.dtlb_miss_ld" },          \
905 	{ 0x8,  0x9,  C0|C1, "data_tlb_misses.l0_dtlb_miss_ld" },	\
906 	{ 0x8,  0x6,  C0|C1, "data_tlb_misses.dtlb_miss_st" },          \
907 	{ 0xC,  0x3,  C0|C1, "page_walks.cycles" },                     \
908 	{ 0x10, 0x1,  C0|C1, "x87_comp_ops_exe.any.s" },                \
909 	{ 0x10, 0x81, C0|C1, "x87_comp_ops_exe.any.ar" },               \
910 	{ 0x11, 0x1,  C0|C1, "fp_assist" },                             \
911 	{ 0x11, 0x81, C0|C1, "fp_assist.ar" },                          \
912 	{ 0x12, 0x1,  C0|C1, "mul.s" },                                 \
913 	{ 0x12, 0x81, C0|C1, "mul.ar" },                                \
914 	{ 0x13, 0x1,  C0|C1, "div.s" },                                 \
915 	{ 0x13, 0x81, C0|C1, "div.ar" },                                \
916 	{ 0x14, 0x1,  C0|C1, "cycles_div_busy" },                       \
917 	{ 0x21, 0x0,  C0|C1, "l2_ads" },                      		\
918 	{ 0x22, 0x0,  C0|C1, "l2_dbus_busy" },                		\
919 	{ 0x24, 0x0,  C0|C1, "l2_lines_in" },   			\
920 	{ 0x25, 0x0,  C0|C1, "l2_m_lines_in" },               		\
921 	{ 0x26, 0x0,  C0|C1, "l2_lines_out" },  			\
922 	{ 0x27, 0x0,  C0|C1, "l2_m_lines_out" },			\
923 	{ 0x28, 0x0,  C0|C1, "l2_ifetch" },  				\
924 	{ 0x29, 0x0,  C0|C1, "l2_ld" },					\
925 	{ 0x2A, 0x0,  C0|C1, "l2_st" },      				\
926 	{ 0x2B, 0x0,  C0|C1, "l2_lock" },    				\
927 	{ 0x2E, 0x0,  C0|C1, "l2_rqsts" },             			\
928 	{ 0x2E, 0x41, C0|C1, "l2_rqsts.self.demand.i_state" },		\
929 	{ 0x2E, 0x4F, C0|C1, "l2_rqsts.self.demand.mesi" },		\
930 	{ 0x30, 0x0,  C0|C1, "l2_reject_bus_q" },			\
931 	{ 0x32, 0x0,  C0|C1, "l2_no_req" },                   		\
932 	{ 0x3A, 0x0,  C0|C1, "eist_trans" },                            \
933 	{ 0x3B, 0xC0, C0|C1, "thermal_trip" },                          \
934 	{ 0x3C, 0x0,  C0|C1, "cpu_clk_unhalted.core_p" },               \
935 	{ 0x3C, 0x1,  C0|C1, "cpu_clk_unhalted.bus" },                  \
936 	{ 0x3C, 0x2,  C0|C1, "cpu_clk_unhalted.no_other" },             \
937 	{ 0x40, 0x21, C0|C1, "l1d_cache.ld" },                          \
938 	{ 0x40, 0x22, C0|C1, "l1d_cache.st" },                          \
939 	{ 0x60, 0x0,  C0|C1, "bus_request_outstanding" },		\
940 	{ 0x61, 0x0,  C0|C1, "bus_bnr_drv" },                		\
941 	{ 0x62, 0x0,  C0|C1, "bus_drdy_clocks" },            		\
942 	{ 0x63, 0x0,  C0|C1, "bus_lock_clocks" },  			\
943 	{ 0x64, 0x0,  C0|C1, "bus_data_rcv" },                		\
944 	{ 0x65, 0x0,  C0|C1, "bus_trans_brd" },    			\
945 	{ 0x66, 0x0,  C0|C1, "bus_trans_rfo" },    			\
946 	{ 0x67, 0x0,  C0|C1, "bus_trans_wb" },     			\
947 	{ 0x68, 0x0,  C0|C1, "bus_trans_ifetch" }, 			\
948 	{ 0x69, 0x0,  C0|C1, "bus_trans_inval" },  			\
949 	{ 0x6A, 0x0,  C0|C1, "bus_trans_pwr" },				\
950 	{ 0x6B, 0x0,  C0|C1, "bus_trans_p" },      			\
951 	{ 0x6C, 0x0,  C0|C1, "bus_trans_io" },     			\
952 	{ 0x6D, 0x0,  C0|C1, "bus_trans_def" },    			\
953 	{ 0x6E, 0x0,  C0|C1, "bus_trans_burst" },  			\
954 	{ 0x6F, 0x0,  C0|C1, "bus_trans_mem" },    			\
955 	{ 0x70, 0x0,  C0|C1, "bus_trans_any" },    			\
956 	{ 0x77, 0x0,  C0|C1, "ext_snoop" },     			\
957 	{ 0x7A, 0x0,  C0|C1, "bus_hit_drv" },                		\
958 	{ 0x7B, 0x0,  C0|C1, "bus_hitm_drv" },               		\
959 	{ 0x7D, 0x0,  C0|C1, "busq_empty" },                  		\
960 	{ 0x7E, 0x0,  C0|C1, "snoop_stall_drv" },  			\
961 	{ 0x7F, 0x0,  C0|C1, "bus_io_wait" },				\
962 	{ 0x80, 0x3,  C0|C1, "icache.accesses" },                       \
963 	{ 0x80, 0x2,  C0|C1, "icache.misses" },                         \
964 	{ 0x82, 0x4,  C0|C1, "itlb.flush" },                            \
965 	{ 0x82, 0x2,  C0|C1, "itlb.misses" },                           \
966 	{ 0xAA, 0x2,  C0|C1, "macro_insts.cisc_decoded" },              \
967 	{ 0xAA, 0x3,  C0|C1, "macro_insts.all_decoded" },               \
968 	{ 0xB0, 0x0,  C0|C1, "simd_uops_exec.s" },                      \
969 	{ 0xB0, 0x80, C0|C1, "simd_uops_exec.ar" },                     \
970 	{ 0xB1, 0x0,  C0|C1, "simd_sat_uop_exec.s" },                   \
971 	{ 0xB1, 0x80, C0|C1, "simd_sat_uop_exec.ar" },                  \
972 	{ 0xB3, 0x1,  C0|C1, "simd_uop_type_exec.mul.s" },              \
973 	{ 0xB3, 0x81, C0|C1, "simd_uop_type_exec.mul.ar" },             \
974 	{ 0xB3, 0x02, C0|C1, "simd_uop_type_exec.shift.s" },            \
975 	{ 0xB3, 0x82, C0|C1, "simd_uop_type_exec.shift.ar" },           \
976 	{ 0xB3, 0x04, C0|C1, "simd_uop_type_exec.pack.s" },             \
977 	{ 0xB3, 0x84, C0|C1, "simd_uop_type_exec.pack.ar" },            \
978 	{ 0xB3, 0x08, C0|C1, "simd_uop_type_exec.unpack.s" },           \
979 	{ 0xB3, 0x88, C0|C1, "simd_uop_type_exec.unpack.ar" },          \
980 	{ 0xB3, 0x10, C0|C1, "simd_uop_type_exec.logical.s" },          \
981 	{ 0xB3, 0x90, C0|C1, "simd_uop_type_exec.logical.ar" },         \
982 	{ 0xB3, 0x20, C0|C1, "simd_uop_type_exec.arithmetic.s" },       \
983 	{ 0xB3, 0xA0, C0|C1, "simd_uop_type_exec.arithmetic.ar" },      \
984 	{ 0xC2, 0x10, C0|C1, "uops_retired.any" },                      \
985 	{ 0xC3, 0x1,  C0|C1, "machine_clears.smc" },                    \
986 	{ 0xC4, 0x0,  C0|C1, "br_inst_retired.any" },                   \
987 	{ 0xC4, 0x1,  C0|C1, "br_inst_retired.pred_not_taken" },        \
988 	{ 0xC4, 0x2,  C0|C1, "br_inst_retired.mispred_not_taken" },     \
989 	{ 0xC4, 0x4,  C0|C1, "br_inst_retired.pred_taken" },            \
990 	{ 0xC4, 0x8,  C0|C1, "br_inst_retired.mispred_taken" },         \
991 	{ 0xC4, 0xA,  C0|C1, "br_inst_retired.mispred" },               \
992 	{ 0xC4, 0xC,  C0|C1, "br_inst_retired.taken" },                 \
993 	{ 0xC4, 0xF,  C0|C1, "br_inst_retired.any1" },                  \
994 	{ 0xC6, 0x1,  C0|C1, "cycles_int_masked.cycles_int_masked" },   \
995 	{ 0xC6, 0x2,  C0|C1,						\
996 		"cycles_int_masked.cycles_int_pending_and_masked" },	\
997 	{ 0xC7, 0x1,  C0|C1, "simd_inst_retired.packed_single" },       \
998 	{ 0xC7, 0x2,  C0|C1, "simd_inst_retired.scalar_single" },      	\
999 	{ 0xC7, 0x4,  C0|C1, "simd_inst_retired.packed_double" },       \
1000 	{ 0xC7, 0x8,  C0|C1, "simd_inst_retired.scalar_double" },       \
1001 	{ 0xC7, 0x10, C0|C1, "simd_inst_retired.vector" },              \
1002 	{ 0xC7, 0x1F, C0|C1, "simd_inst_retired.any" },                 \
1003 	{ 0xC8, 0x00, C0|C1, "hw_int_rcv" },                            \
1004 	{ 0xCA, 0x1,  C0|C1, "simd_comp_inst_retired.packed_single" },  \
1005 	{ 0xCA, 0x2,  C0|C1, "simd_comp_inst_retired.scalar_single" }, 	\
1006 	{ 0xCA, 0x4,  C0|C1, "simd_comp_inst_retired.packed_double" },  \
1007 	{ 0xCA, 0x8,  C0|C1, "simd_comp_inst_retired.scalar_double" },  \
1008 	{ 0xCB, 0x1,  C0|C1, "mem_load_retired.l2_hit" },               \
1009 	{ 0xCB, 0x2,  C0|C1, "mem_load_retired.l2_miss" },              \
1010 	{ 0xCB, 0x4,  C0|C1, "mem_load_retired.dtlb_miss" },           	\
1011 	{ 0xCD, 0x0,  C0|C1, "simd_assist" },                           \
1012 	{ 0xCE, 0x0,  C0|C1, "simd_instr_retired" },                    \
1013 	{ 0xCF, 0x0,  C0|C1, "simd_sat_instr_retired" },                \
1014 	{ 0xE0, 0x1,  C0|C1, "br_inst_decoded" },                       \
1015 	{ 0xE4, 0x1,  C0|C1, "bogus_br" },                             	\
1016 	{ 0xE6, 0x1,  C0|C1, "baclears.any" }
1017 
1018 static const struct events_table_t *events_table = NULL;
1019 
1020 const struct events_table_t events_fam6_nhm[] = {
1021 	GENERICEVENTS_FAM6_NHM,
1022 	EVENTS_FAM6_NHM,
1023 	{ NT_END, 0, 0, "" }
1024 };
1025 
1026 const struct events_table_t events_fam6_mod28[] = {
1027 	GENERICEVENTS_FAM6_MOD28,
1028 	EVENTS_FAM6_MOD28,
1029 	{ NT_END, 0, 0, "" }
1030 };
1031 
1032 /*
1033  * Initialize string containing list of supported general-purpose counter
1034  * events for processors of Penryn and Merom Family
1035  */
1036 static void
1037 pcbe_init_core_uarch()
1038 {
1039 	const struct nametable_core_uarch	*n;
1040 	const struct generic_events		*k;
1041 	const struct nametable_core_uarch	*picspecific_events;
1042 	const struct generic_events		*picspecific_genericevents;
1043 	size_t			common_size;
1044 	size_t			size;
1045 	uint64_t		i;
1046 
1047 	gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
1048 
1049 	/* Calculate space needed to save all the common event names */
1050 	common_size = 0;
1051 	for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END; n++) {
1052 		common_size += strlen(n->name) + 1;
1053 	}
1054 
1055 	for (k = cmn_generic_events; k->event_num != NT_END; k++) {
1056 		common_size += strlen(k->name) + 1;
1057 	}
1058 
1059 	for (i = 0; i < num_gpc; i++) {
1060 		size = 0;
1061 		picspecific_genericevents = NULL;
1062 
1063 		switch (i) {
1064 			case 0:
1065 				picspecific_events = pic0_events;
1066 				picspecific_genericevents = generic_events_pic0;
1067 				break;
1068 			case 1:
1069 				picspecific_events = pic1_events;
1070 				break;
1071 			default:
1072 				picspecific_events = NULL;
1073 				break;
1074 		}
1075 		if (picspecific_events != NULL) {
1076 			for (n = picspecific_events;
1077 			    n->event_num != NT_END;
1078 			    n++) {
1079 				size += strlen(n->name) + 1;
1080 			}
1081 		}
1082 		if (picspecific_genericevents != NULL) {
1083 			for (k = picspecific_genericevents;
1084 			    k->event_num != NT_END; k++) {
1085 				size += strlen(k->name) + 1;
1086 			}
1087 		}
1088 
1089 		gpc_names[i] =
1090 		    kmem_alloc(size + common_size + 1, KM_SLEEP);
1091 
1092 		gpc_names[i][0] = '\0';
1093 		if (picspecific_events != NULL) {
1094 			for (n = picspecific_events;
1095 			    n->event_num != NT_END; n++) {
1096 				(void) strcat(gpc_names[i], n->name);
1097 				(void) strcat(gpc_names[i], ",");
1098 			}
1099 		}
1100 		if (picspecific_genericevents != NULL) {
1101 			for (k = picspecific_genericevents;
1102 			    k->event_num != NT_END; k++) {
1103 				(void) strcat(gpc_names[i], k->name);
1104 				(void) strcat(gpc_names[i], ",");
1105 			}
1106 		}
1107 		for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END;
1108 		    n++) {
1109 			(void) strcat(gpc_names[i], n->name);
1110 			(void) strcat(gpc_names[i], ",");
1111 		}
1112 		for (k = cmn_generic_events; k->event_num != NT_END; k++) {
1113 			(void) strcat(gpc_names[i], k->name);
1114 			(void) strcat(gpc_names[i], ",");
1115 		}
1116 
1117 		/*
1118 		 * Remove trailing comma.
1119 		 */
1120 		gpc_names[i][common_size + size - 1] = '\0';
1121 	}
1122 }
1123 
1124 static int
1125 core_pcbe_init(void)
1126 {
1127 	struct cpuid_regs	cp;
1128 	size_t			size;
1129 	uint64_t		i;
1130 	uint64_t		j;
1131 	uint64_t		arch_events_vector_length;
1132 	size_t			arch_events_string_length;
1133 	uint_t			model;
1134 
1135 	if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
1136 		return (-1);
1137 
1138 	/* Obtain Basic CPUID information */
1139 	cp.cp_eax = 0x0;
1140 	(void) __cpuid_insn(&cp);
1141 
1142 	/* No Architectural Performance Monitoring Leaf returned by CPUID */
1143 	if (cp.cp_eax < 0xa) {
1144 		return (-1);
1145 	}
1146 
1147 	/* Obtain the Architectural Performance Monitoring Leaf */
1148 	cp.cp_eax = 0xa;
1149 	(void) __cpuid_insn(&cp);
1150 
1151 	versionid = cp.cp_eax & 0xFF;
1152 
1153 	/*
1154 	 * Fixed-Function Counters (FFC)
1155 	 *
1156 	 * All Family 6 Model 15 and Model 23 processors have fixed-function
1157 	 * counters.  These counters were made Architectural with
1158 	 * Family 6 Model 15 Stepping 9.
1159 	 */
1160 	switch (versionid) {
1161 
1162 		case 0:
1163 			return (-1);
1164 
1165 		case 2:
1166 			num_ffc = cp.cp_edx & 0x1F;
1167 			width_ffc = (cp.cp_edx >> 5) & 0xFF;
1168 
1169 			/*
1170 			 * Some processors have an errata (AW34) where
1171 			 * versionid is reported as 2 when actually 1.
1172 			 * In this case, fixed-function counters are
1173 			 * model-specific as in Version 1.
1174 			 */
1175 			if (num_ffc != 0) {
1176 				break;
1177 			}
1178 			/* FALLTHROUGH */
1179 		case 1:
1180 			num_ffc = 3;
1181 			width_ffc = 40;
1182 			versionid = 1;
1183 			break;
1184 
1185 		default:
1186 			num_ffc = cp.cp_edx & 0x1F;
1187 			width_ffc = (cp.cp_edx >> 5) & 0xFF;
1188 			break;
1189 	}
1190 
1191 
1192 	if (num_ffc >= 64)
1193 		return (-1);
1194 
1195 	/* Set HTT-specific names of architectural & FFC events */
1196 	if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
1197 		ffc_names = ffc_names_htt;
1198 		arch_events_table = arch_events_table_htt;
1199 		known_arch_events =
1200 		    sizeof (arch_events_table_htt) /
1201 		    sizeof (struct events_table_t);
1202 		known_ffc_num =
1203 		    sizeof (ffc_names_htt) / sizeof (char *);
1204 	} else {
1205 		ffc_names = ffc_names_non_htt;
1206 		arch_events_table = arch_events_table_non_htt;
1207 		known_arch_events =
1208 		    sizeof (arch_events_table_non_htt) /
1209 		    sizeof (struct events_table_t);
1210 		known_ffc_num =
1211 		    sizeof (ffc_names_non_htt) / sizeof (char *);
1212 	}
1213 
1214 	if (num_ffc >= known_ffc_num) {
1215 		/*
1216 		 * The system seems to have more fixed-function counters than
1217 		 * what this PCBE is able to handle correctly.  Default to the
1218 		 * maximum number of fixed-function counters that this driver
1219 		 * is aware of.
1220 		 */
1221 		num_ffc = known_ffc_num - 1;
1222 	}
1223 
1224 	mask_ffc = BITMASK_XBITS(width_ffc);
1225 	control_ffc = BITMASK_XBITS(num_ffc);
1226 
1227 	/*
1228 	 * General Purpose Counters (GPC)
1229 	 */
1230 	num_gpc = (cp.cp_eax >> 8) & 0xFF;
1231 	width_gpc = (cp.cp_eax >> 16) & 0xFF;
1232 
1233 	if (num_gpc >= 64)
1234 		return (-1);
1235 
1236 	mask_gpc = BITMASK_XBITS(width_gpc);
1237 
1238 	control_gpc = BITMASK_XBITS(num_gpc);
1239 
1240 	control_mask = (control_ffc << 32) | control_gpc;
1241 
1242 	total_pmc = num_gpc + num_ffc;
1243 	if (total_pmc > 64) {
1244 		/* Too wide for the overflow bitmap */
1245 		return (-1);
1246 	}
1247 
1248 	/* FFC names */
1249 	ffc_allnames = kmem_alloc(num_ffc * sizeof (char *), KM_SLEEP);
1250 	for (i = 0; i < num_ffc; i++) {
1251 		ffc_allnames[i] = kmem_alloc(
1252 		    strlen(ffc_names[i]) + strlen(ffc_genericnames[i]) + 2,
1253 		    KM_SLEEP);
1254 
1255 		ffc_allnames[i][0] = '\0';
1256 		(void) strcat(ffc_allnames[i], ffc_names[i]);
1257 
1258 		/* Check if this ffc has a generic name */
1259 		if (strcmp(ffc_genericnames[i], "") != 0) {
1260 			(void) strcat(ffc_allnames[i], ",");
1261 			(void) strcat(ffc_allnames[i], ffc_genericnames[i]);
1262 		}
1263 	}
1264 
1265 	/* GPC events for Family 6 Models 15, 23 and 29 only */
1266 	if ((cpuid_getfamily(CPU) == 6) &&
1267 	    ((cpuid_getmodel(CPU) == 15) || (cpuid_getmodel(CPU) == 23) ||
1268 	    (cpuid_getmodel(CPU) == 29))) {
1269 		(void) snprintf(core_impl_name, IMPL_NAME_LEN,
1270 		    "Core Microarchitecture");
1271 		pcbe_init_core_uarch();
1272 		return (0);
1273 	}
1274 
1275 	(void) snprintf(core_impl_name, IMPL_NAME_LEN,
1276 	    "Intel Arch PerfMon v%d on Family %d Model %d",
1277 	    versionid, cpuid_getfamily(CPU), cpuid_getmodel(CPU));
1278 
1279 	/*
1280 	 * Architectural events
1281 	 */
1282 	arch_events_vector_length = (cp.cp_eax >> 24) & 0xFF;
1283 
1284 	ASSERT(known_arch_events == arch_events_vector_length);
1285 
1286 	/*
1287 	 * To handle the case where a new performance monitoring setup is run
1288 	 * on a non-debug kernel
1289 	 */
1290 	if (known_arch_events > arch_events_vector_length) {
1291 		known_arch_events = arch_events_vector_length;
1292 	} else {
1293 		arch_events_vector_length = known_arch_events;
1294 	}
1295 
1296 	arch_events_vector = cp.cp_ebx &
1297 	    BITMASK_XBITS(arch_events_vector_length);
1298 
1299 	/*
1300 	 * Process architectural and non-architectural events using GPC
1301 	 */
1302 	if (num_gpc > 0) {
1303 
1304 		gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
1305 
1306 		/* Calculate space required for the architectural gpc events */
1307 		arch_events_string_length = 0;
1308 		for (i = 0; i < known_arch_events; i++) {
1309 			if (((1U << i) & arch_events_vector) == 0) {
1310 				arch_events_string_length +=
1311 				    strlen(arch_events_table[i].name) + 1;
1312 				if (strcmp(arch_genevents_table[i], "") != 0) {
1313 					arch_events_string_length +=
1314 					    strlen(arch_genevents_table[i]) + 1;
1315 				}
1316 			}
1317 		}
1318 
1319 		/* Non-architectural events list */
1320 		model = cpuid_getmodel(CPU);
1321 		switch (model) {
1322 			/* Nehalem */
1323 			case 26:
1324 			case 30:
1325 			case 31:
1326 			/* Westmere */
1327 			case 37:
1328 			case 44:
1329 			/* Nehalem-EX */
1330 			case 46:
1331 			case 47:
1332 				events_table = events_fam6_nhm;
1333 				break;
1334 			case 28:
1335 				events_table = events_fam6_mod28;
1336 				break;
1337 		}
1338 
1339 		for (i = 0; i < num_gpc; i++) {
1340 
1341 			/*
1342 			 * Determine length of all supported event names
1343 			 * (architectural + non-architectural)
1344 			 */
1345 			size = arch_events_string_length;
1346 			for (j = 0; events_table != NULL &&
1347 			    events_table[j].eventselect != NT_END;
1348 			    j++) {
1349 				if (C(i) & events_table[j].supported_counters) {
1350 					size += strlen(events_table[j].name) +
1351 					    1;
1352 				}
1353 			}
1354 
1355 			/* Allocate memory for this pics list */
1356 			gpc_names[i] = kmem_alloc(size + 1, KM_SLEEP);
1357 			gpc_names[i][0] = '\0';
1358 			if (size == 0) {
1359 				continue;
1360 			}
1361 
1362 			/*
1363 			 * Create the list of all supported events
1364 			 * (architectural + non-architectural)
1365 			 */
1366 			for (j = 0; j < known_arch_events; j++) {
1367 				if (((1U << j) & arch_events_vector) == 0) {
1368 					(void) strcat(gpc_names[i],
1369 					    arch_events_table[j].name);
1370 					(void) strcat(gpc_names[i], ",");
1371 					if (strcmp(
1372 					    arch_genevents_table[j], "")
1373 					    != 0) {
1374 						(void) strcat(gpc_names[i],
1375 						    arch_genevents_table[j]);
1376 						(void) strcat(gpc_names[i],
1377 						    ",");
1378 					}
1379 				}
1380 			}
1381 
1382 			for (j = 0; events_table != NULL &&
1383 			    events_table[j].eventselect != NT_END;
1384 			    j++) {
1385 				if (C(i) & events_table[j].supported_counters) {
1386 					(void) strcat(gpc_names[i],
1387 					    events_table[j].name);
1388 					(void) strcat(gpc_names[i], ",");
1389 				}
1390 			}
1391 
1392 			/* Remove trailing comma */
1393 			gpc_names[i][size - 1] = '\0';
1394 		}
1395 	}
1396 
1397 	return (0);
1398 }
1399 
1400 static uint_t core_pcbe_ncounters()
1401 {
1402 	return (total_pmc);
1403 }
1404 
1405 static const char *core_pcbe_impl_name(void)
1406 {
1407 	return (core_impl_name);
1408 }
1409 
1410 static const char *core_pcbe_cpuref(void)
1411 {
1412 	return (core_cpuref);
1413 }
1414 
1415 static char *core_pcbe_list_events(uint_t picnum)
1416 {
1417 	ASSERT(picnum < cpc_ncounters);
1418 
1419 	if (picnum < num_gpc) {
1420 		return (gpc_names[picnum]);
1421 	} else {
1422 		return (ffc_allnames[picnum - num_gpc]);
1423 	}
1424 }
1425 
1426 static char *core_pcbe_list_attrs(void)
1427 {
1428 	if (versionid >= 3) {
1429 		return ("edge,inv,umask,cmask,anythr");
1430 	} else {
1431 		return ("edge,pc,inv,umask,cmask");
1432 	}
1433 }
1434 
1435 static const struct nametable_core_uarch *
1436 find_gpcevent_core_uarch(char *name,
1437     const struct nametable_core_uarch *nametable)
1438 {
1439 	const struct nametable_core_uarch *n;
1440 	int compare_result = -1;
1441 
1442 	for (n = nametable; n->event_num != NT_END; n++) {
1443 		compare_result = strcmp(name, n->name);
1444 		if (compare_result <= 0) {
1445 			break;
1446 		}
1447 	}
1448 
1449 	if (compare_result == 0) {
1450 		return (n);
1451 	}
1452 
1453 	return (NULL);
1454 }
1455 
1456 static const struct generic_events *
1457 find_generic_events(char *name, const struct generic_events *table)
1458 {
1459 	const struct generic_events *n;
1460 
1461 	for (n = table; n->event_num != NT_END; n++) {
1462 		if (strcmp(name, n->name) == 0) {
1463 			return (n);
1464 		};
1465 	}
1466 
1467 	return (NULL);
1468 }
1469 
1470 static const struct events_table_t *
1471 find_gpcevent(char *name)
1472 {
1473 	int i;
1474 
1475 	/* Search architectural events */
1476 	for (i = 0; i < known_arch_events; i++) {
1477 		if (strcmp(name, arch_events_table[i].name) == 0 ||
1478 		    strcmp(name, arch_genevents_table[i]) == 0) {
1479 			if (((1U << i) & arch_events_vector) == 0) {
1480 				return (&arch_events_table[i]);
1481 			}
1482 		}
1483 	}
1484 
1485 	/* Search non-architectural events */
1486 	if (events_table != NULL) {
1487 		for (i = 0; events_table[i].eventselect != NT_END; i++) {
1488 			if (strcmp(name, events_table[i].name) == 0) {
1489 				return (&events_table[i]);
1490 			}
1491 		}
1492 	}
1493 
1494 	return (NULL);
1495 }
1496 
1497 static uint64_t
1498 core_pcbe_event_coverage(char *event)
1499 {
1500 	uint64_t bitmap;
1501 	uint64_t bitmask;
1502 	const struct events_table_t *n;
1503 	int i;
1504 
1505 	bitmap = 0;
1506 
1507 	/* Is it an event that a GPC can track? */
1508 	if (versionid >= 3) {
1509 		n = find_gpcevent(event);
1510 		if (n != NULL) {
1511 			bitmap |= (n->supported_counters &
1512 			    BITMASK_XBITS(num_gpc));
1513 		}
1514 	} else {
1515 		if (find_generic_events(event, cmn_generic_events) != NULL) {
1516 			bitmap |= BITMASK_XBITS(num_gpc);
1517 		} if (find_generic_events(event, generic_events_pic0) != NULL) {
1518 			bitmap |= 1ULL;
1519 		} else if (find_gpcevent_core_uarch(event,
1520 		    cmn_gpc_events_core_uarch) != NULL) {
1521 			bitmap |= BITMASK_XBITS(num_gpc);
1522 		} else if (find_gpcevent_core_uarch(event, pic0_events) !=
1523 		    NULL) {
1524 			bitmap |= 1ULL;
1525 		} else if (find_gpcevent_core_uarch(event, pic1_events) !=
1526 		    NULL) {
1527 			bitmap |= 1ULL << 1;
1528 		}
1529 	}
1530 
1531 	/* Check if the event can be counted in the fixed-function counters */
1532 	if (num_ffc > 0) {
1533 		bitmask = 1ULL << num_gpc;
1534 		for (i = 0; i < num_ffc; i++) {
1535 			if (strcmp(event, ffc_names[i]) == 0) {
1536 				bitmap |= bitmask;
1537 			} else if (strcmp(event, ffc_genericnames[i]) == 0) {
1538 				bitmap |= bitmask;
1539 			}
1540 			bitmask = bitmask << 1;
1541 		}
1542 	}
1543 
1544 	return (bitmap);
1545 }
1546 
1547 static uint64_t
1548 core_pcbe_overflow_bitmap(void)
1549 {
1550 	uint64_t interrupt_status;
1551 	uint64_t intrbits_ffc;
1552 	uint64_t intrbits_gpc;
1553 	extern int kcpc_hw_overflow_intr_installed;
1554 	uint64_t overflow_bitmap;
1555 
1556 	RDMSR(PERF_GLOBAL_STATUS, interrupt_status);
1557 	WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status);
1558 
1559 	interrupt_status = interrupt_status & control_mask;
1560 	intrbits_ffc = (interrupt_status >> 32) & control_ffc;
1561 	intrbits_gpc = interrupt_status & control_gpc;
1562 	overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc;
1563 
1564 	ASSERT(kcpc_hw_overflow_intr_installed);
1565 	(*kcpc_hw_enable_cpc_intr)();
1566 
1567 	return (overflow_bitmap);
1568 }
1569 
1570 static int
1571 check_cpc_securitypolicy(core_pcbe_config_t *conf,
1572     const struct nametable_core_uarch *n)
1573 {
1574 	if (conf->core_ctl & n->restricted_bits) {
1575 		if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1576 			return (CPC_ATTR_REQUIRES_PRIVILEGE);
1577 		}
1578 	}
1579 	return (0);
1580 }
1581 
1582 static int
1583 configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1584     uint_t nattrs, kcpc_attr_t *attrs, void **data)
1585 {
1586 	core_pcbe_config_t	conf;
1587 	const struct nametable_core_uarch	*n;
1588 	const struct generic_events *k = NULL;
1589 	const struct nametable_core_uarch	*m;
1590 	const struct nametable_core_uarch	*picspecific_events;
1591 	struct nametable_core_uarch	nt_raw = { "", 0x0, 0x0 };
1592 	uint_t			i;
1593 	long			event_num;
1594 	const struct events_table_t *eventcode;
1595 
1596 	if (((preset & BITS_EXTENDED_FROM_31) != 0) &&
1597 	    ((preset & BITS_EXTENDED_FROM_31) !=
1598 	    BITS_EXTENDED_FROM_31)) {
1599 
1600 		/*
1601 		 * Bits beyond bit-31 in the general-purpose counters can only
1602 		 * be written to by extension of bit 31.  We cannot preset
1603 		 * these bits to any value other than all 1s or all 0s.
1604 		 */
1605 		return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1606 	}
1607 
1608 	if (versionid >= 3) {
1609 		eventcode = find_gpcevent(event);
1610 		if (eventcode != NULL) {
1611 			if ((C(picnum) & eventcode->supported_counters) == 0) {
1612 				return (CPC_PIC_NOT_CAPABLE);
1613 			}
1614 			if (nattrs > 0 &&
1615 			    (strncmp("PAPI_", event, 5) == 0)) {
1616 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1617 			}
1618 			conf.core_ctl = eventcode->eventselect;
1619 			conf.core_ctl |= eventcode->unitmask <<
1620 			    CORE_UMASK_SHIFT;
1621 		} else {
1622 			/* Event specified as raw event code */
1623 			if (ddi_strtol(event, NULL, 0, &event_num) != 0) {
1624 				return (CPC_INVALID_EVENT);
1625 			}
1626 			conf.core_ctl = event_num & 0xFF;
1627 		}
1628 	} else {
1629 		if ((k = find_generic_events(event, cmn_generic_events)) !=
1630 		    NULL ||
1631 		    (picnum == 0 &&
1632 		    (k = find_generic_events(event, generic_events_pic0)) !=
1633 		    NULL)) {
1634 			if (nattrs > 0) {
1635 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1636 			}
1637 			conf.core_ctl = k->event_num;
1638 			conf.core_ctl |= k->umask << CORE_UMASK_SHIFT;
1639 		} else {
1640 			/* Not a generic event */
1641 
1642 			n = find_gpcevent_core_uarch(event,
1643 			    cmn_gpc_events_core_uarch);
1644 			if (n == NULL) {
1645 				switch (picnum) {
1646 					case 0:
1647 						picspecific_events =
1648 						    pic0_events;
1649 						break;
1650 					case 1:
1651 						picspecific_events =
1652 						    pic1_events;
1653 						break;
1654 					default:
1655 						picspecific_events = NULL;
1656 						break;
1657 				}
1658 				if (picspecific_events != NULL) {
1659 					n = find_gpcevent_core_uarch(event,
1660 					    picspecific_events);
1661 				}
1662 			}
1663 			if (n == NULL) {
1664 
1665 				/*
1666 				 * Check if this is a case where the event was
1667 				 * specified directly by its event number
1668 				 * instead of its name string.
1669 				 */
1670 				if (ddi_strtol(event, NULL, 0, &event_num) !=
1671 				    0) {
1672 					return (CPC_INVALID_EVENT);
1673 				}
1674 
1675 				event_num = event_num & 0xFF;
1676 
1677 				/*
1678 				 * Search the event table to find out if the
1679 				 * event specified has an privilege
1680 				 * requirements.  Currently none of the
1681 				 * pic-specific counters have any privilege
1682 				 * requirements.  Hence only the table
1683 				 * cmn_gpc_events_core_uarch is searched.
1684 				 */
1685 				for (m = cmn_gpc_events_core_uarch;
1686 				    m->event_num != NT_END;
1687 				    m++) {
1688 					if (event_num == m->event_num) {
1689 						break;
1690 					}
1691 				}
1692 				if (m->event_num == NT_END) {
1693 					nt_raw.event_num = (uint8_t)event_num;
1694 					n = &nt_raw;
1695 				} else {
1696 					n = m;
1697 				}
1698 			}
1699 			conf.core_ctl = n->event_num; /* Event Select */
1700 		}
1701 	}
1702 
1703 
1704 	conf.core_picno = picnum;
1705 	conf.core_pictype = CORE_GPC;
1706 	conf.core_rawpic = preset & mask_gpc;
1707 
1708 	conf.core_pes = GPC_BASE_PES + picnum;
1709 	conf.core_pmc = GPC_BASE_PMC + picnum;
1710 
1711 	for (i = 0; i < nattrs; i++) {
1712 		if (strncmp(attrs[i].ka_name, "umask", 6) == 0) {
1713 			if ((attrs[i].ka_val | CORE_UMASK_MASK) !=
1714 			    CORE_UMASK_MASK) {
1715 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1716 			}
1717 			/* Clear out the default umask */
1718 			conf.core_ctl &= ~ (CORE_UMASK_MASK <<
1719 			    CORE_UMASK_SHIFT);
1720 			/* Use the user provided umask */
1721 			conf.core_ctl |= attrs[i].ka_val <<
1722 			    CORE_UMASK_SHIFT;
1723 		} else  if (strncmp(attrs[i].ka_name, "edge", 6) == 0) {
1724 			if (attrs[i].ka_val != 0)
1725 				conf.core_ctl |= CORE_EDGE;
1726 		} else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) {
1727 			if (attrs[i].ka_val != 0)
1728 				conf.core_ctl |= CORE_INV;
1729 		} else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) {
1730 			if ((attrs[i].ka_val | CORE_CMASK_MASK) !=
1731 			    CORE_CMASK_MASK) {
1732 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1733 			}
1734 			conf.core_ctl |= attrs[i].ka_val <<
1735 			    CORE_CMASK_SHIFT;
1736 		} else if (strncmp(attrs[i].ka_name, "anythr", 7) ==
1737 		    0) {
1738 			if (versionid < 3)
1739 				return (CPC_INVALID_ATTRIBUTE);
1740 			if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1741 				return (CPC_ATTR_REQUIRES_PRIVILEGE);
1742 			}
1743 			if (attrs[i].ka_val != 0)
1744 				conf.core_ctl |= CORE_ANYTHR;
1745 		} else {
1746 			return (CPC_INVALID_ATTRIBUTE);
1747 		}
1748 	}
1749 
1750 	if (flags & CPC_COUNT_USER)
1751 		conf.core_ctl |= CORE_USR;
1752 	if (flags & CPC_COUNT_SYSTEM)
1753 		conf.core_ctl |= CORE_OS;
1754 	if (flags & CPC_OVF_NOTIFY_EMT)
1755 		conf.core_ctl |= CORE_INT;
1756 	conf.core_ctl |= CORE_EN;
1757 
1758 	if (versionid < 3 && k == NULL) {
1759 		if (check_cpc_securitypolicy(&conf, n) != 0) {
1760 			return (CPC_ATTR_REQUIRES_PRIVILEGE);
1761 		}
1762 	}
1763 
1764 	*data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1765 	*((core_pcbe_config_t *)*data) = conf;
1766 
1767 	return (0);
1768 }
1769 
1770 static int
1771 configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1772     uint_t nattrs, kcpc_attr_t *attrs, void **data)
1773 {
1774 	core_pcbe_config_t	*conf;
1775 	uint_t			i;
1776 
1777 	if (picnum - num_gpc >= num_ffc) {
1778 		return (CPC_INVALID_PICNUM);
1779 	}
1780 
1781 	if ((strcmp(ffc_names[picnum-num_gpc], event) != 0) &&
1782 	    (strcmp(ffc_genericnames[picnum-num_gpc], event) != 0)) {
1783 		return (CPC_INVALID_EVENT);
1784 	}
1785 
1786 	if ((versionid < 3) && (nattrs != 0)) {
1787 		return (CPC_INVALID_ATTRIBUTE);
1788 	}
1789 
1790 	conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1791 	conf->core_ctl = 0;
1792 
1793 	for (i = 0; i < nattrs; i++) {
1794 		if (strncmp(attrs[i].ka_name, "anythr", 7) == 0) {
1795 			if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1796 				return (CPC_ATTR_REQUIRES_PRIVILEGE);
1797 			}
1798 			if (attrs[i].ka_val != 0) {
1799 				conf->core_ctl |= CORE_FFC_ANYTHR;
1800 			}
1801 		} else {
1802 			kmem_free(conf, sizeof (core_pcbe_config_t));
1803 			return (CPC_INVALID_ATTRIBUTE);
1804 		}
1805 	}
1806 
1807 	conf->core_picno = picnum;
1808 	conf->core_pictype = CORE_FFC;
1809 	conf->core_rawpic = preset & mask_ffc;
1810 	conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc);
1811 
1812 	/* All fixed-function counters have the same control register */
1813 	conf->core_pes = PERF_FIXED_CTR_CTRL;
1814 
1815 	if (flags & CPC_COUNT_USER)
1816 		conf->core_ctl |= CORE_FFC_USR_EN;
1817 	if (flags & CPC_COUNT_SYSTEM)
1818 		conf->core_ctl |= CORE_FFC_OS_EN;
1819 	if (flags & CPC_OVF_NOTIFY_EMT)
1820 		conf->core_ctl |= CORE_FFC_PMI;
1821 
1822 	*data = conf;
1823 	return (0);
1824 }
1825 
1826 /*ARGSUSED*/
1827 static int
1828 core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
1829     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
1830     void *token)
1831 {
1832 	int			ret;
1833 	core_pcbe_config_t	*conf;
1834 
1835 	/*
1836 	 * If we've been handed an existing configuration, we need only preset
1837 	 * the counter value.
1838 	 */
1839 	if (*data != NULL) {
1840 		conf = *data;
1841 		ASSERT(conf->core_pictype == CORE_GPC ||
1842 		    conf->core_pictype == CORE_FFC);
1843 		if (conf->core_pictype == CORE_GPC)
1844 			conf->core_rawpic = preset & mask_gpc;
1845 		else /* CORE_FFC */
1846 			conf->core_rawpic = preset & mask_ffc;
1847 		return (0);
1848 	}
1849 
1850 	if (picnum >= total_pmc) {
1851 		return (CPC_INVALID_PICNUM);
1852 	}
1853 
1854 	if (picnum < num_gpc) {
1855 		ret = configure_gpc(picnum, event, preset, flags,
1856 		    nattrs, attrs, data);
1857 	} else {
1858 		ret = configure_ffc(picnum, event, preset, flags,
1859 		    nattrs, attrs, data);
1860 	}
1861 	return (ret);
1862 }
1863 
1864 static void
1865 core_pcbe_program(void *token)
1866 {
1867 	core_pcbe_config_t	*cfg;
1868 	uint64_t		perf_global_ctrl;
1869 	uint64_t		perf_fixed_ctr_ctrl;
1870 	uint64_t		curcr4;
1871 
1872 	core_pcbe_allstop();
1873 
1874 	curcr4 = getcr4();
1875 	if (kcpc_allow_nonpriv(token))
1876 		/* Allow RDPMC at any ring level */
1877 		setcr4(curcr4 | CR4_PCE);
1878 	else
1879 		/* Allow RDPMC only at ring 0 */
1880 		setcr4(curcr4 & ~CR4_PCE);
1881 
1882 	/* Clear any overflow indicators before programming the counters */
1883 	WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask);
1884 
1885 	cfg = NULL;
1886 	perf_global_ctrl = 0;
1887 	perf_fixed_ctr_ctrl = 0;
1888 	cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL);
1889 	while (cfg != NULL) {
1890 		ASSERT(cfg->core_pictype == CORE_GPC ||
1891 		    cfg->core_pictype == CORE_FFC);
1892 
1893 		if (cfg->core_pictype == CORE_GPC) {
1894 			/*
1895 			 * General-purpose counter registers have write
1896 			 * restrictions where only the lower 32-bits can be
1897 			 * written to.  The rest of the relevant bits are
1898 			 * written to by extension from bit 31 (all ZEROS if
1899 			 * bit-31 is ZERO and all ONE if bit-31 is ONE).  This
1900 			 * makes it possible to write to the counter register
1901 			 * only values that have all ONEs or all ZEROs in the
1902 			 * higher bits.
1903 			 */
1904 			if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) ||
1905 			    ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) ==
1906 			    BITS_EXTENDED_FROM_31)) {
1907 				/*
1908 				 * Straighforward case where the higher bits
1909 				 * are all ZEROs or all ONEs.
1910 				 */
1911 				WRMSR(cfg->core_pmc,
1912 				    (cfg->core_rawpic & mask_gpc));
1913 			} else {
1914 				/*
1915 				 * The high order bits are not all the same.
1916 				 * We save what is currently in the registers
1917 				 * and do not write to it.  When we want to do
1918 				 * a read from this register later (in
1919 				 * core_pcbe_sample()), we subtract the value
1920 				 * we save here to get the actual event count.
1921 				 *
1922 				 * NOTE: As a result, we will not get overflow
1923 				 * interrupts as expected.
1924 				 */
1925 				RDMSR(cfg->core_pmc, cfg->core_rawpic);
1926 				cfg->core_rawpic = cfg->core_rawpic & mask_gpc;
1927 			}
1928 			WRMSR(cfg->core_pes, cfg->core_ctl);
1929 			perf_global_ctrl |= 1ull << cfg->core_picno;
1930 		} else {
1931 			/*
1932 			 * Unlike the general-purpose counters, all relevant
1933 			 * bits of fixed-function counters can be written to.
1934 			 */
1935 			WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc);
1936 
1937 			/*
1938 			 * Collect the control bits for all the
1939 			 * fixed-function counters and write it at one shot
1940 			 * later in this function
1941 			 */
1942 			perf_fixed_ctr_ctrl |= cfg->core_ctl <<
1943 			    ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE);
1944 			perf_global_ctrl |=
1945 			    1ull << (cfg->core_picno - num_gpc + 32);
1946 		}
1947 
1948 		cfg = (core_pcbe_config_t *)
1949 		    kcpc_next_config(token, cfg, NULL);
1950 	}
1951 
1952 	/* Enable all the counters */
1953 	WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl);
1954 	WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl);
1955 }
1956 
1957 static void
1958 core_pcbe_allstop(void)
1959 {
1960 	/* Disable all the counters together */
1961 	WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED);
1962 
1963 	setcr4(getcr4() & ~CR4_PCE);
1964 }
1965 
1966 static void
1967 core_pcbe_sample(void *token)
1968 {
1969 	uint64_t		*daddr;
1970 	uint64_t		curpic;
1971 	core_pcbe_config_t	*cfg;
1972 	uint64_t			counter_mask;
1973 
1974 	cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr);
1975 	while (cfg != NULL) {
1976 		ASSERT(cfg->core_pictype == CORE_GPC ||
1977 		    cfg->core_pictype == CORE_FFC);
1978 
1979 		curpic = rdmsr(cfg->core_pmc);
1980 
1981 		DTRACE_PROBE4(core__pcbe__sample,
1982 		    uint64_t, cfg->core_pmc,
1983 		    uint64_t, curpic,
1984 		    uint64_t, cfg->core_rawpic,
1985 		    uint64_t, *daddr);
1986 
1987 		if (cfg->core_pictype == CORE_GPC) {
1988 			counter_mask = mask_gpc;
1989 		} else {
1990 			counter_mask = mask_ffc;
1991 		}
1992 		curpic = curpic & counter_mask;
1993 		if (curpic >= cfg->core_rawpic) {
1994 			*daddr += curpic - cfg->core_rawpic;
1995 		} else {
1996 			/* Counter overflowed since our last sample */
1997 			*daddr += counter_mask - (cfg->core_rawpic - curpic) +
1998 			    1;
1999 		}
2000 		cfg->core_rawpic = *daddr & counter_mask;
2001 
2002 		cfg =
2003 		    (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr);
2004 	}
2005 }
2006 
2007 static void
2008 core_pcbe_free(void *config)
2009 {
2010 	kmem_free(config, sizeof (core_pcbe_config_t));
2011 }
2012 
2013 static struct modlpcbe core_modlpcbe = {
2014 	&mod_pcbeops,
2015 	"Core Performance Counters",
2016 	&core_pcbe_ops
2017 };
2018 
2019 static struct modlinkage core_modl = {
2020 	MODREV_1,
2021 	&core_modlpcbe,
2022 };
2023 
2024 int
2025 _init(void)
2026 {
2027 	if (core_pcbe_init() != 0) {
2028 		return (ENOTSUP);
2029 	}
2030 	return (mod_install(&core_modl));
2031 }
2032 
2033 int
2034 _fini(void)
2035 {
2036 	return (mod_remove(&core_modl));
2037 }
2038 
2039 int
2040 _info(struct modinfo *mi)
2041 {
2042 	return (mod_info(&core_modl, mi));
2043 }
2044