xref: /illumos-gate/usr/src/uts/intel/pcbe/opteron_pcbe.c (revision 77b329fe26621b3cfb925eff15c715cb80abd1f8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Performance Counter Back-End for AMD Opteron and AMD Athlon 64 processors.
30  */
31 
32 #include <sys/cpuvar.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/cpc_pcbe.h>
36 #include <sys/kmem.h>
37 #include <sys/sdt.h>
38 #include <sys/modctl.h>
39 #include <sys/errno.h>
40 #include <sys/debug.h>
41 #include <sys/archsystm.h>
42 #include <sys/x86_archext.h>
43 #include <sys/privregs.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 
47 static int opt_pcbe_init(void);
48 static uint_t opt_pcbe_ncounters(void);
49 static const char *opt_pcbe_impl_name(void);
50 static const char *opt_pcbe_cpuref(void);
51 static char *opt_pcbe_list_events(uint_t picnum);
52 static char *opt_pcbe_list_attrs(void);
53 static uint64_t opt_pcbe_event_coverage(char *event);
54 static uint64_t opt_pcbe_overflow_bitmap(void);
55 static int opt_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
56     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
57     void *token);
58 static void opt_pcbe_program(void *token);
59 static void opt_pcbe_allstop(void);
60 static void opt_pcbe_sample(void *token);
61 static void opt_pcbe_free(void *config);
62 
63 static pcbe_ops_t opt_pcbe_ops = {
64 	PCBE_VER_1,
65 	CPC_CAP_OVERFLOW_INTERRUPT,
66 	opt_pcbe_ncounters,
67 	opt_pcbe_impl_name,
68 	opt_pcbe_cpuref,
69 	opt_pcbe_list_events,
70 	opt_pcbe_list_attrs,
71 	opt_pcbe_event_coverage,
72 	opt_pcbe_overflow_bitmap,
73 	opt_pcbe_configure,
74 	opt_pcbe_program,
75 	opt_pcbe_allstop,
76 	opt_pcbe_sample,
77 	opt_pcbe_free
78 };
79 
80 /*
81  * Define offsets and masks for the fields in the Performance
82  * Event-Select (PES) registers.
83  */
84 #define	OPT_PES_HOST_SHIFT	41
85 #define	OPT_PES_GUEST_SHIFT	40
86 #define	OPT_PES_CMASK_SHIFT	24
87 #define	OPT_PES_CMASK_MASK	0xFF
88 #define	OPT_PES_INV_SHIFT	23
89 #define	OPT_PES_ENABLE_SHIFT	22
90 #define	OPT_PES_INT_SHIFT	20
91 #define	OPT_PES_PC_SHIFT	19
92 #define	OPT_PES_EDGE_SHIFT	18
93 #define	OPT_PES_OS_SHIFT	17
94 #define	OPT_PES_USR_SHIFT	16
95 #define	OPT_PES_UMASK_SHIFT	8
96 #define	OPT_PES_UMASK_MASK	0xFF
97 
98 #define	OPT_PES_INV		(1ULL << OPT_PES_INV_SHIFT)
99 #define	OPT_PES_ENABLE		(1ULL << OPT_PES_ENABLE_SHIFT)
100 #define	OPT_PES_INT		(1ULL << OPT_PES_INT_SHIFT)
101 #define	OPT_PES_PC		(1ULL << OPT_PES_PC_SHIFT)
102 #define	OPT_PES_EDGE		(1ULL << OPT_PES_EDGE_SHIFT)
103 #define	OPT_PES_OS		(1ULL << OPT_PES_OS_SHIFT)
104 #define	OPT_PES_USR		(1ULL << OPT_PES_USR_SHIFT)
105 #define	OPT_PES_HOST		(1ULL << OPT_PES_HOST_SHIFT)
106 #define	OPT_PES_GUEST		(1ULL << OPT_PES_GUEST_SHIFT)
107 
108 typedef struct _opt_pcbe_config {
109 	uint8_t		opt_picno;	/* Counter number: 0, 1, 2, or 3 */
110 	uint64_t	opt_evsel;	/* Event Selection register */
111 	uint64_t	opt_rawpic;	/* Raw counter value */
112 } opt_pcbe_config_t;
113 
114 opt_pcbe_config_t nullcfgs[4] = {
115 	{ 0, 0, 0 },
116 	{ 1, 0, 0 },
117 	{ 2, 0, 0 },
118 	{ 3, 0, 0 }
119 };
120 
121 typedef struct _amd_event {
122 	char		*name;
123 	uint16_t	emask;		/* Event mask setting */
124 	uint8_t		umask_valid;	/* Mask of unreserved UNIT_MASK bits */
125 } amd_event_t;
126 
127 /*
128  * Base MSR addresses for the PerfEvtSel registers and the counters themselves.
129  * Add counter number to base address to get corresponding MSR address.
130  */
131 #define	PES_BASE_ADDR	0xC0010000
132 #define	PIC_BASE_ADDR	0xC0010004
133 
134 #define	MASK48		0xFFFFFFFFFFFF
135 
136 #define	EV_END {NULL, 0, 0}
137 
138 #define	AMD_cmn_events							\
139 	{ "FP_dispatched_fpu_ops",			0x0, 0x3F },	\
140 	{ "FP_cycles_no_fpu_ops_retired",		0x1, 0x0 },	\
141 	{ "FP_dispatched_fpu_ops_ff",			0x2, 0x0 },	\
142 	{ "LS_seg_reg_load",				0x20, 0x7F },	\
143 	{ "LS_uarch_resync_self_modify",		0x21, 0x0 },	\
144 	{ "LS_uarch_resync_snoop",			0x22, 0x0 },	\
145 	{ "LS_buffer_2_full",				0x23, 0x0 },	\
146 	{ "LS_retired_cflush",				0x26, 0x0 },	\
147 	{ "LS_retired_cpuid",				0x27, 0x0 },	\
148 	{ "DC_access",					0x40, 0x0 },	\
149 	{ "DC_miss",					0x41, 0x0 },	\
150 	{ "DC_refill_from_L2",				0x42, 0x1F },	\
151 	{ "DC_refill_from_system",			0x43, 0x1F },	\
152 	{ "DC_misaligned_data_ref",			0x47, 0x0 },	\
153 	{ "DC_uarch_late_cancel_access",		0x48, 0x0 },	\
154 	{ "DC_uarch_early_cancel_access",		0x49, 0x0 },	\
155 	{ "DC_dispatched_prefetch_instr",		0x4B, 0x7 },	\
156 	{ "DC_dcache_accesses_by_locks",		0x4C, 0x2 },	\
157 	{ "BU_memory_requests",				0x65, 0x83},	\
158 	{ "BU_data_prefetch",				0x67, 0x3 },	\
159 	{ "BU_cpu_clk_unhalted",			0x76, 0x0 },	\
160 	{ "IC_fetch",					0x80, 0x0 },	\
161 	{ "IC_miss",					0x81, 0x0 },	\
162 	{ "IC_refill_from_L2",				0x82, 0x0 },	\
163 	{ "IC_refill_from_system",			0x83, 0x0 },	\
164 	{ "IC_itlb_L1_miss_L2_hit",			0x84, 0x0 },	\
165 	{ "IC_uarch_resync_snoop",			0x86, 0x0 },	\
166 	{ "IC_instr_fetch_stall",			0x87, 0x0 },	\
167 	{ "IC_return_stack_hit",			0x88, 0x0 },	\
168 	{ "IC_return_stack_overflow",			0x89, 0x0 },	\
169 	{ "FR_retired_x86_instr_w_excp_intr",		0xC0, 0x0 },	\
170 	{ "FR_retired_uops",				0xC1, 0x0 },	\
171 	{ "FR_retired_branches_w_excp_intr",		0xC2, 0x0 },	\
172 	{ "FR_retired_branches_mispred",		0xC3, 0x0 },	\
173 	{ "FR_retired_taken_branches",			0xC4, 0x0 },	\
174 	{ "FR_retired_taken_branches_mispred",		0xC5, 0x0 },	\
175 	{ "FR_retired_far_ctl_transfer",		0xC6, 0x0 },	\
176 	{ "FR_retired_resyncs",				0xC7, 0x0 },	\
177 	{ "FR_retired_near_rets",			0xC8, 0x0 },	\
178 	{ "FR_retired_near_rets_mispred",		0xC9, 0x0 },	\
179 	{ "FR_retired_taken_branches_mispred_addr_miscomp",	0xCA, 0x0 },\
180 	{ "FR_retired_fastpath_double_op_instr",	0xCC, 0x7 },	\
181 	{ "FR_intr_masked_cycles",			0xCD, 0x0 },	\
182 	{ "FR_intr_masked_while_pending_cycles",	0xCE, 0x0 },	\
183 	{ "FR_taken_hardware_intrs",			0xCF, 0x0 },	\
184 	{ "FR_nothing_to_dispatch",			0xD0, 0x0 },	\
185 	{ "FR_dispatch_stalls",				0xD1, 0x0 },	\
186 	{ "FR_dispatch_stall_branch_abort_to_retire",	0xD2, 0x0 },	\
187 	{ "FR_dispatch_stall_serialization",		0xD3, 0x0 },	\
188 	{ "FR_dispatch_stall_segment_load",		0xD4, 0x0 },	\
189 	{ "FR_dispatch_stall_reorder_buffer_full",	0xD5, 0x0 },	\
190 	{ "FR_dispatch_stall_resv_stations_full",	0xD6, 0x0 },	\
191 	{ "FR_dispatch_stall_fpu_full",			0xD7, 0x0 },	\
192 	{ "FR_dispatch_stall_ls_full",			0xD8, 0x0 },	\
193 	{ "FR_dispatch_stall_waiting_all_quiet",	0xD9, 0x0 },	\
194 	{ "FR_dispatch_stall_far_ctl_trsfr_resync_branch_pend",	0xDA, 0x0 },\
195 	{ "FR_fpu_exception",				0xDB, 0xF },	\
196 	{ "FR_num_brkpts_dr0",				0xDC, 0x0 },	\
197 	{ "FR_num_brkpts_dr1",				0xDD, 0x0 },	\
198 	{ "FR_num_brkpts_dr2",				0xDE, 0x0 },	\
199 	{ "FR_num_brkpts_dr3",				0xDF, 0x0 },	\
200 	{ "NB_mem_ctrlr_bypass_counter_saturation",	0xE4, 0xF }
201 
202 #define	OPT_events							\
203 	{ "LS_locked_operation",			0x24, 0x7 },	\
204 	{ "DC_copyback",				0x44, 0x1F },	\
205 	{ "DC_dtlb_L1_miss_L2_hit",			0x45, 0x0 },	\
206 	{ "DC_dtlb_L1_miss_L2_miss",			0x46, 0x0 },	\
207 	{ "DC_1bit_ecc_error_found",			0x4A, 0x3 },	\
208 	{ "BU_system_read_responses",			0x6C, 0x7 },	\
209 	{ "BU_quadwords_written_to_system",		0x6D, 0x1 },	\
210 	{ "BU_internal_L2_req",				0x7D, 0x1F },	\
211 	{ "BU_fill_req_missed_L2",			0x7E, 0x7 },	\
212 	{ "BU_fill_into_L2",				0x7F, 0x1 },	\
213 	{ "IC_itlb_L1_miss_L2_miss",			0x85, 0x0 },	\
214 	{ "FR_retired_fpu_instr",			0xCB, 0xF },	\
215 	{ "NB_mem_ctrlr_page_access",			0xE0, 0x7 },	\
216 	{ "NB_mem_ctrlr_page_table_overflow",		0xE1, 0x0 },	\
217 	{ "NB_mem_ctrlr_turnaround",			0xE3, 0x7 },	\
218 	{ "NB_ECC_errors",				0xE8, 0x80},	\
219 	{ "NB_sized_commands",				0xEB, 0x7F },	\
220 	{ "NB_probe_result",				0xEC, 0x7F},	\
221 	{ "NB_gart_events",				0xEE, 0x7 },	\
222 	{ "NB_ht_bus0_bandwidth",			0xF6, 0xF },	\
223 	{ "NB_ht_bus1_bandwidth",			0xF7, 0xF },	\
224 	{ "NB_ht_bus2_bandwidth",			0xF8, 0xF }
225 
226 #define	OPT_RevD_events							\
227 	{ "NB_sized_blocks",				0xE5, 0x3C }
228 
229 #define	OPT_RevE_events							\
230 	{ "NB_cpu_io_to_mem_io",			0xE9, 0xFF},	\
231 	{ "NB_cache_block_commands",			0xEA, 0x3D}
232 
233 #define	AMD_FAMILY_10h_cmn_events					\
234 	{ "FP_retired_sse_ops",				0x3,   0x7F},	\
235 	{ "FP_retired_move_ops",			0x4,   0xF},	\
236 	{ "FP_retired_serialize_ops",			0x5,   0xF},	\
237 	{ "FP_serialize_ops_cycles",			0x6,   0x3},	\
238 	{ "DC_copyback",				0x44,  0x7F },	\
239 	{ "DC_dtlb_L1_miss_L2_hit",			0x45,  0x3 },	\
240 	{ "DC_dtlb_L1_miss_L2_miss",			0x46,  0x7 },	\
241 	{ "DC_1bit_ecc_error_found",			0x4A,  0xF },	\
242 	{ "DC_dtlb_L1_hit",				0x4D,  0x7 },	\
243 	{ "BU_system_read_responses",			0x6C,  0x17 },	\
244 	{ "BU_octwords_written_to_system",		0x6D,  0x1 },	\
245 	{ "BU_internal_L2_req",				0x7D,  0x3F },	\
246 	{ "BU_fill_req_missed_L2",			0x7E,  0xF },	\
247 	{ "BU_fill_into_L2",				0x7F,  0x3 },	\
248 	{ "IC_itlb_L1_miss_L2_miss",			0x85,  0x3 },	\
249 	{ "IC_eviction",				0x8B,  0x0 },	\
250 	{ "IC_cache_lines_invalidate",			0x8C,  0xF },	\
251 	{ "IC_itlb_reload",				0x99,  0x0 },	\
252 	{ "IC_itlb_reload_aborted",			0x9A,  0x0 },	\
253 	{ "FR_retired_mmx_sse_fp_instr",		0xCB,  0x7 },	\
254 	{ "NB_mem_ctrlr_page_access",			0xE0,  0xFF },	\
255 	{ "NB_mem_ctrlr_page_table_overflow",		0xE1,  0x3 },	\
256 	{ "NB_mem_ctrlr_turnaround",			0xE3,  0x3F },	\
257 	{ "NB_thermal_status",				0xE8,  0x7C},	\
258 	{ "NB_sized_commands",				0xEB,  0x3F },	\
259 	{ "NB_probe_results_upstream_req",		0xEC,  0xFF},	\
260 	{ "NB_gart_events",				0xEE,  0xFF },	\
261 	{ "NB_ht_bus0_bandwidth",			0xF6,  0xBF },	\
262 	{ "NB_ht_bus1_bandwidth",			0xF7,  0xBF },	\
263 	{ "NB_ht_bus2_bandwidth",			0xF8,  0xBF },	\
264 	{ "NB_ht_bus3_bandwidth",			0x1F9, 0xBF },	\
265 	{ "LS_locked_operation",			0x24,  0xF },	\
266 	{ "LS_cancelled_store_to_load_fwd_ops",		0x2A,  0x7 },	\
267 	{ "LS_smi_received",				0x2B,  0x0 },	\
268 	{ "LS_ineffective_prefetch",			0x52,  0x9 },	\
269 	{ "LS_global_tlb_flush",			0x54,  0x0 },	\
270 	{ "NB_mem_ctrlr_dram_cmd_slots_missed",		0xE2,  0x3 },	\
271 	{ "NB_mem_ctrlr_req",				0x1F0, 0xFF },	\
272 	{ "CB_cpu_to_dram_req_to_target",		0x1E0, 0xFF },	\
273 	{ "CB_io_to_dram_req_to_target",		0x1E1, 0xFF },	\
274 	{ "CB_cpu_read_cmd_latency_to_target_0_to_3",	0x1E2, 0xFF },	\
275 	{ "CB_cpu_read_cmd_req_to_target_0_to_3",	0x1E3, 0xFF },	\
276 	{ "CB_cpu_read_cmd_latency_to_target_4_to_7",	0x1E4, 0xFF },	\
277 	{ "CB_cpu_read_cmd_req_to_target_4_to_7",	0x1E5, 0xFF },	\
278 	{ "CB_cpu_cmd_latency_to_target_0_to_7",	0x1E6, 0xFF },	\
279 	{ "CB_cpu_req_to_target_0_to_7",		0x1E7, 0xFF },	\
280 	{ "L3_read_req",				0x4E0, 0xF7 },	\
281 	{ "L3_miss",					0x4E1, 0xF7 },	\
282 	{ "L3_l2_eviction_l3_fill",			0x4E2, 0xFF },	\
283 	{ "L3_eviction",				0x4E3, 0xF  }
284 
285 static amd_event_t opt_events[] = {
286 	AMD_cmn_events,
287 	OPT_events,
288 	EV_END
289 };
290 
291 static amd_event_t opt_events_rev_D[] = {
292 	AMD_cmn_events,
293 	OPT_events,
294 	OPT_RevD_events,
295 	EV_END
296 };
297 
298 static amd_event_t opt_events_rev_E[] = {
299 	AMD_cmn_events,
300 	OPT_events,
301 	OPT_RevD_events,
302 	OPT_RevE_events,
303 	EV_END
304 };
305 
306 static amd_event_t family_10h_events[] = {
307 	AMD_cmn_events,
308 	OPT_RevE_events,
309 	AMD_FAMILY_10h_cmn_events,
310 	EV_END
311 };
312 
313 static char	*evlist;
314 static size_t	evlist_sz;
315 static amd_event_t *amd_events = NULL;
316 static uint_t amd_family;
317 
318 #define	BITS(v, u, l)   \
319 	(((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
320 
321 #define	OPTERON_FAMILY	0xf
322 #define	AMD_FAMILY_10H	0x10
323 
324 static int
325 opt_pcbe_init(void)
326 {
327 	amd_event_t		*evp;
328 	uint32_t		rev;
329 
330 	amd_family = cpuid_getfamily(CPU);
331 
332 	/*
333 	 * Make sure this really _is_ an Opteron or Athlon 64 system. The kernel
334 	 * loads this module based on its name in the module directory, but it
335 	 * could have been renamed.
336 	 */
337 	if (cpuid_getvendor(CPU) != X86_VENDOR_AMD ||
338 	    (amd_family != OPTERON_FAMILY && amd_family != AMD_FAMILY_10H))
339 		return (-1);
340 
341 	/*
342 	 * Figure out processor revision here and assign appropriate
343 	 * event configuration.
344 	 */
345 
346 	rev = cpuid_getchiprev(CPU);
347 
348 	if (amd_family == OPTERON_FAMILY) {
349 		if (!X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_D)) {
350 			amd_events = opt_events;
351 		} else if X86_CHIPREV_MATCH(rev, X86_CHIPREV_AMD_F_REV_D) {
352 			amd_events = opt_events_rev_D;
353 		} else if (X86_CHIPREV_MATCH(rev, X86_CHIPREV_AMD_F_REV_E) ||
354 		    X86_CHIPREV_MATCH(rev, X86_CHIPREV_AMD_F_REV_F) ||
355 		    X86_CHIPREV_MATCH(rev, X86_CHIPREV_AMD_F_REV_G)) {
356 			amd_events = opt_events_rev_E;
357 		} else {
358 			amd_events = opt_events;
359 		}
360 	} else {
361 		amd_events = family_10h_events;
362 	}
363 
364 	/*
365 	 * Construct event list.
366 	 *
367 	 * First pass:  Calculate size needed. We'll need an additional byte
368 	 *		for the NULL pointer during the last strcat.
369 	 *
370 	 * Second pass: Copy strings.
371 	 */
372 	for (evp = amd_events; evp->name != NULL; evp++)
373 		evlist_sz += strlen(evp->name) + 1;
374 
375 	evlist = kmem_alloc(evlist_sz + 1, KM_SLEEP);
376 	evlist[0] = '\0';
377 
378 	for (evp = amd_events; evp->name != NULL; evp++) {
379 		(void) strcat(evlist, evp->name);
380 		(void) strcat(evlist, ",");
381 	}
382 	/*
383 	 * Remove trailing comma.
384 	 */
385 	evlist[evlist_sz - 1] = '\0';
386 
387 	return (0);
388 }
389 
390 static uint_t
391 opt_pcbe_ncounters(void)
392 {
393 	return (4);
394 }
395 
396 static const char *
397 opt_pcbe_impl_name(void)
398 {
399 	if (amd_family == OPTERON_FAMILY) {
400 		return ("AMD Opteron & Athlon64");
401 	} else if (amd_family == AMD_FAMILY_10H) {
402 		return ("AMD Family 10h");
403 	} else {
404 		return ("Unknown AMD processor");
405 	}
406 }
407 
408 static const char *
409 opt_pcbe_cpuref(void)
410 {
411 	if (amd_family == OPTERON_FAMILY) {
412 		return ("See Chapter 10 of the \"BIOS and Kernel Developer's"
413 		" Guide for the AMD Athlon 64 and AMD Opteron Processors,\" "
414 		"AMD publication #26094");
415 	} else if (amd_family == AMD_FAMILY_10H) {
416 		return ("See section 3.15 of the \"BIOS and Kernel "
417 		"Developer's Guide (BKDG) For AMD Family 10h Processors,\" "
418 		"AMD publication #31116");
419 	} else {
420 		return ("Unknown AMD processor");
421 	}
422 }
423 
424 /*ARGSUSED*/
425 static char *
426 opt_pcbe_list_events(uint_t picnum)
427 {
428 	return (evlist);
429 }
430 
431 static char *
432 opt_pcbe_list_attrs(void)
433 {
434 	return ("edge,pc,inv,cmask,umask");
435 }
436 
437 /*ARGSUSED*/
438 static uint64_t
439 opt_pcbe_event_coverage(char *event)
440 {
441 	/*
442 	 * Fortunately, all counters can count all events.
443 	 */
444 	return (0xF);
445 }
446 
447 static uint64_t
448 opt_pcbe_overflow_bitmap(void)
449 {
450 	/*
451 	 * Unfortunately, this chip cannot detect which counter overflowed, so
452 	 * we must act as if they all did.
453 	 */
454 	return (0xF);
455 }
456 
457 static amd_event_t *
458 find_event(char *name)
459 {
460 	amd_event_t	*evp;
461 
462 	for (evp = amd_events; evp->name != NULL; evp++)
463 		if (strcmp(name, evp->name) == 0)
464 			return (evp);
465 
466 	return (NULL);
467 }
468 
469 /*ARGSUSED*/
470 static int
471 opt_pcbe_configure(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
472     uint_t nattrs, kcpc_attr_t *attrs, void **data, void *token)
473 {
474 	opt_pcbe_config_t	*cfg;
475 	amd_event_t		*evp;
476 	amd_event_t		ev_raw = { "raw", 0, 0xFF };
477 	int			i;
478 	uint64_t		evsel = 0, evsel_tmp = 0;
479 
480 	/*
481 	 * If we've been handed an existing configuration, we need only preset
482 	 * the counter value.
483 	 */
484 	if (*data != NULL) {
485 		cfg = *data;
486 		cfg->opt_rawpic = preset & MASK48;
487 		return (0);
488 	}
489 
490 	if (picnum >= 4)
491 		return (CPC_INVALID_PICNUM);
492 
493 	if ((evp = find_event(event)) == NULL) {
494 		long tmp;
495 
496 		/*
497 		 * If ddi_strtol() likes this event, use it as a raw event code.
498 		 */
499 		if (ddi_strtol(event, NULL, 0, &tmp) != 0)
500 			return (CPC_INVALID_EVENT);
501 
502 		ev_raw.emask = tmp;
503 		evp = &ev_raw;
504 	}
505 
506 	/*
507 	 * Configuration of EventSelect register for family 10h processors.
508 	 */
509 	if (amd_family == AMD_FAMILY_10H) {
510 
511 		/* Set GuestOnly bit to 0 and HostOnly bit to 1 */
512 		evsel &= ~OPT_PES_HOST;
513 		evsel &= ~OPT_PES_GUEST;
514 
515 		/* Set bits [35:32] for extended part of Event Select field */
516 		evsel_tmp = evp->emask & 0x0f00;
517 		evsel |= evsel_tmp << 24;
518 	}
519 
520 	evsel |= evp->emask & 0x00ff;
521 
522 	if (flags & CPC_COUNT_USER)
523 		evsel |= OPT_PES_USR;
524 	if (flags & CPC_COUNT_SYSTEM)
525 		evsel |= OPT_PES_OS;
526 	if (flags & CPC_OVF_NOTIFY_EMT)
527 		evsel |= OPT_PES_INT;
528 
529 	for (i = 0; i < nattrs; i++) {
530 		if (strcmp(attrs[i].ka_name, "edge") == 0) {
531 			if (attrs[i].ka_val != 0)
532 				evsel |= OPT_PES_EDGE;
533 		} else if (strcmp(attrs[i].ka_name, "pc") == 0) {
534 			if (attrs[i].ka_val != 0)
535 				evsel |= OPT_PES_PC;
536 		} else if (strcmp(attrs[i].ka_name, "inv") == 0) {
537 			if (attrs[i].ka_val != 0)
538 				evsel |= OPT_PES_INV;
539 		} else if (strcmp(attrs[i].ka_name, "cmask") == 0) {
540 			if ((attrs[i].ka_val | OPT_PES_CMASK_MASK) !=
541 			    OPT_PES_CMASK_MASK)
542 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
543 			evsel |= attrs[i].ka_val << OPT_PES_CMASK_SHIFT;
544 		} else if (strcmp(attrs[i].ka_name, "umask") == 0) {
545 			if ((attrs[i].ka_val | evp->umask_valid) !=
546 			    evp->umask_valid)
547 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
548 			evsel |= attrs[i].ka_val << OPT_PES_UMASK_SHIFT;
549 		} else
550 			return (CPC_INVALID_ATTRIBUTE);
551 	}
552 
553 	cfg = kmem_alloc(sizeof (*cfg), KM_SLEEP);
554 
555 	cfg->opt_picno = picnum;
556 	cfg->opt_evsel = evsel;
557 	cfg->opt_rawpic = preset & MASK48;
558 
559 	*data = cfg;
560 	return (0);
561 }
562 
563 static void
564 opt_pcbe_program(void *token)
565 {
566 	opt_pcbe_config_t	*cfgs[4] = { &nullcfgs[0], &nullcfgs[1],
567 						&nullcfgs[2], &nullcfgs[3] };
568 	opt_pcbe_config_t	*pcfg = NULL;
569 	int			i;
570 	ulong_t			curcr4 = getcr4();
571 
572 	/*
573 	 * Allow nonprivileged code to read the performance counters if desired.
574 	 */
575 	if (kcpc_allow_nonpriv(token))
576 		setcr4(curcr4 | CR4_PCE);
577 	else
578 		setcr4(curcr4 & ~CR4_PCE);
579 
580 	/*
581 	 * Query kernel for all configs which will be co-programmed.
582 	 */
583 	do {
584 		pcfg = (opt_pcbe_config_t *)kcpc_next_config(token, pcfg, NULL);
585 
586 		if (pcfg != NULL) {
587 			ASSERT(pcfg->opt_picno < 4);
588 			cfgs[pcfg->opt_picno] = pcfg;
589 		}
590 	} while (pcfg != NULL);
591 
592 	/*
593 	 * Program in two loops. The first configures and presets the counter,
594 	 * and the second loop enables the counters. This ensures that the
595 	 * counters are all enabled as closely together in time as possible.
596 	 */
597 
598 	for (i = 0; i < 4; i++) {
599 		wrmsr(PES_BASE_ADDR + i, cfgs[i]->opt_evsel);
600 		wrmsr(PIC_BASE_ADDR + i, cfgs[i]->opt_rawpic);
601 	}
602 
603 	for (i = 0; i < 4; i++) {
604 		wrmsr(PES_BASE_ADDR + i, cfgs[i]->opt_evsel |
605 		    (uint64_t)(uintptr_t)OPT_PES_ENABLE);
606 	}
607 }
608 
609 static void
610 opt_pcbe_allstop(void)
611 {
612 	int		i;
613 
614 	for (i = 0; i < 4; i++)
615 		wrmsr(PES_BASE_ADDR + i, 0ULL);
616 
617 	/*
618 	 * Disable non-privileged access to the counter registers.
619 	 */
620 	setcr4(getcr4() & ~CR4_PCE);
621 }
622 
623 static void
624 opt_pcbe_sample(void *token)
625 {
626 	opt_pcbe_config_t	*cfgs[4] = { NULL, NULL, NULL, NULL };
627 	opt_pcbe_config_t	*pcfg = NULL;
628 	int			i;
629 	uint64_t		curpic[4];
630 	uint64_t		*addrs[4];
631 	uint64_t		*tmp;
632 	int64_t			diff;
633 
634 	for (i = 0; i < 4; i++)
635 		curpic[i] = rdmsr(PIC_BASE_ADDR + i);
636 
637 	/*
638 	 * Query kernel for all configs which are co-programmed.
639 	 */
640 	do {
641 		pcfg = (opt_pcbe_config_t *)kcpc_next_config(token, pcfg, &tmp);
642 
643 		if (pcfg != NULL) {
644 			ASSERT(pcfg->opt_picno < 4);
645 			cfgs[pcfg->opt_picno] = pcfg;
646 			addrs[pcfg->opt_picno] = tmp;
647 		}
648 	} while (pcfg != NULL);
649 
650 	for (i = 0; i < 4; i++) {
651 		if (cfgs[i] == NULL)
652 			continue;
653 
654 		diff = (curpic[i] - cfgs[i]->opt_rawpic) & MASK48;
655 		*addrs[i] += diff;
656 		DTRACE_PROBE4(opt__pcbe__sample, int, i, uint64_t, *addrs[i],
657 		    uint64_t, curpic[i], uint64_t, cfgs[i]->opt_rawpic);
658 		cfgs[i]->opt_rawpic = *addrs[i] & MASK48;
659 	}
660 }
661 
662 static void
663 opt_pcbe_free(void *config)
664 {
665 	kmem_free(config, sizeof (opt_pcbe_config_t));
666 }
667 
668 
669 static struct modlpcbe modlpcbe = {
670 	&mod_pcbeops,
671 	"AMD Performance Counters v%I%",
672 	&opt_pcbe_ops
673 };
674 
675 static struct modlinkage modl = {
676 	MODREV_1,
677 	&modlpcbe,
678 };
679 
680 int
681 _init(void)
682 {
683 	int ret;
684 
685 	if (opt_pcbe_init() != 0)
686 		return (ENOTSUP);
687 
688 	if ((ret = mod_install(&modl)) != 0)
689 		kmem_free(evlist, evlist_sz + 1);
690 
691 	return (ret);
692 }
693 
694 int
695 _fini(void)
696 {
697 	int ret;
698 
699 	if ((ret = mod_remove(&modl)) == 0)
700 		kmem_free(evlist, evlist_sz + 1);
701 	return (ret);
702 }
703 
704 int
705 _info(struct modinfo *mi)
706 {
707 	return (mod_info(&modl, mi));
708 }
709