1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <string.h>
29 #include <alloca.h>
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <libintl.h>
33 
34 #include "libcpc.h"
35 #include "libcpc_impl.h"
36 
37 /*
38  * Configuration data for Pentium Pro performance counters.
39  *
40  * Definitions taken from [3].  See the reference to
41  * understand what any of these settings actually means.
42  *
43  * [3] "Pentium Pro Family Developer's Manual, Volume 3:
44  *     Operating Systems Writer's Manual," January 1996
45  */
46 
47 #define	V_P5	(1u << 0)		/* specific to Pentium cpus */
48 #define	V_P5mmx	(1u << 1)		/* " MMX instructions */
49 #define	V_P6	(1u << 2)		/* specific to Pentium II cpus */
50 #define	V_P6mmx	(1u << 3)		/* " MMX instructions */
51 #define	V_END	0
52 
53 /*
54  * map from "cpu version" to flag bits
55  */
56 static const uint_t cpuvermap[] = {
57 	V_P5,		/* CPC_PENTIUM */
58 	V_P5 | V_P5mmx,	/* CPC_PENTIUM_MMX */
59 	V_P6,		/* CPC_PENTIUM_PRO */
60 	V_P6 | V_P6mmx,	/* CPC_PENTIUM_PRO_MMX */
61 };
62 
63 struct nametable {
64 	const uint_t	ver;
65 	const uint8_t	bits;
66 	const char	*name;
67 };
68 
69 /*
70  * Basic Pentium events
71  */
72 #define	P5_EVENTS(v)						\
73 	{v,		0x0,	"data_read"},			\
74 	{v,		0x1,	"data_write"},			\
75 	{v,		0x2,	"data_tlb_miss"},		\
76 	{v,		0x3,	"data_read_miss"},		\
77 	{v,		0x4,	"data_write_miss"},		\
78 	{v,		0x5,	"write_hit_to_M_or_E"},		\
79 	{v,		0x6,	"dcache_lines_wrback"},		\
80 	{v,		0x7,	"external_snoops"},		\
81 	{v,		0x8,	"external_dcache_snoop_hits"},	\
82 	{v,		0x9,	"memory_access_in_both_pipes"},	\
83 	{v,		0xa,	"bank_conflicts"},		\
84 	{v,		0xb,	"misaligned_ref"},		\
85 	{v,		0xc,	"code_read"},			\
86 	{v,		0xd,	"code_tlb_miss"},		\
87 	{v,		0xe,	"code_cache_miss"},		\
88 	{v,		0xf,	"any_segreg_loaded"},		\
89 	{v,		0x12,	"branches"},			\
90 	{v,		0x13,	"btb_hits"},			\
91 	{v,		0x14,	"taken_or_btb_hit"},		\
92 	{v,		0x15,	"pipeline_flushes"},		\
93 	{v,		0x16,	"instr_exec"},			\
94 	{v,		0x17,	"instr_exec_V_pipe"},		\
95 	{v,		0x18,	"clks_bus_cycle"},		\
96 	{v,		0x19,	"clks_full_wbufs"},		\
97 	{v,		0x1a,	"pipe_stall_read"},		\
98 	{v,		0x1b,	"stall_on_write_ME"},		\
99 	{v,		0x1c,	"locked_bus_cycle"},		\
100 	{v,		0x1d,	"io_rw_cycles"},		\
101 	{v,		0x1e,	"reads_noncache_mem"},		\
102 	{v,		0x1f,	"pipeline_agi_stalls"},		\
103 	{v,		0x22,	"flops"},			\
104 	{v,		0x23,	"bp_match_dr0"},		\
105 	{v,		0x24,	"bp_match_dr1"},		\
106 	{v,		0x25,	"bp_match_dr2"},		\
107 	{v,		0x26,	"bp_match_dr3"},		\
108 	{v,		0x27,	"hw_intrs"},			\
109 	{v,		0x28,	"data_rw"},			\
110 	{v,		0x29,	"data_rw_miss"}
111 
112 static const struct nametable P5mmx_names0[] = {
113 	P5_EVENTS(V_P5),
114 	{V_P5mmx,	0x2a,	"bus_ownership_latency"},
115 	{V_P5mmx,	0x2b,	"mmx_instr_upipe"},
116 	{V_P5mmx,	0x2c,	"cache_M_line_sharing"},
117 	{V_P5mmx,	0x2d,	"emms_instr"},
118 	{V_P5mmx,	0x2e,	"bus_util_processor"},
119 	{V_P5mmx,	0x2f,	"sat_mmx_instr"},
120 	{V_P5mmx,	0x30,	"clks_not_HLT"},
121 	{V_P5mmx,	0x31,	"mmx_data_read"},
122 	{V_P5mmx,	0x32,	"clks_fp_stall"},
123 	{V_P5mmx,	0x33,	"d1_starv_fifo_0"},
124 	{V_P5mmx,	0x34,	"mmx_data_write"},
125 	{V_P5mmx,	0x35,	"pipe_flush_wbp"},
126 	{V_P5mmx,	0x36,	"mmx_misalign_data_refs"},
127 	{V_P5mmx,	0x37,	"rets_pred_incorrect"},
128 	{V_P5mmx,	0x38,	"mmx_multiply_unit_interlock"},
129 	{V_P5mmx,	0x39,	"rets"},
130 	{V_P5mmx,	0x3a,	"btb_false_entries"},
131 	{V_P5mmx,	0x3b,	"clocks_stall_full_wb"},
132 	{V_END}
133 };
134 
135 static const struct nametable P5mmx_names1[] = {
136 	P5_EVENTS(V_P5),
137 	{V_P5mmx,	0x2a,	"bus_ownership_transfers"},
138 	{V_P5mmx,	0x2b,	"mmx_instr_vpipe"},
139 	{V_P5mmx,	0x2c,	"cache_lint_sharing"},
140 	{V_P5mmx,	0x2d,	"mmx_fp_transitions"},
141 	{V_P5mmx,	0x2e,	"writes_noncache_mem"},
142 	{V_P5mmx,	0x2f,	"sats_performed"},
143 	{V_P5mmx,	0x30,	"clks_dcache_tlb_miss"},
144 	{V_P5mmx,	0x31,	"mmx_data_read_miss"},
145 	{V_P5mmx,	0x32,	"taken_br"},
146 	{V_P5mmx,	0x33,	"d1_starv_fifo_1"},
147 	{V_P5mmx,	0x34,	"mmx_data_write_miss"},
148 	{V_P5mmx,	0x35,	"pipe_flush_wbp_wb"},
149 	{V_P5mmx,	0x36,	"mmx_pipe_stall_data_read"},
150 	{V_P5mmx,	0x37,	"rets_pred"},
151 	{V_P5mmx,	0x38,	"movd_movq_stall"},
152 	{V_P5mmx,	0x39,	"rsb_overflow"},
153 	{V_P5mmx,	0x3a,	"btb_mispred_nt"},
154 	{V_P5mmx,	0x3b,	"mmx_stall_write_ME"},
155 	{V_END}
156 };
157 
158 static const struct nametable *P5mmx_names[2] = {
159 	P5mmx_names0,
160 	P5mmx_names1
161 };
162 
163 /*
164  * Pentium Pro and Pentium II events
165  */
166 static const struct nametable P6_names[] = {
167 	/*
168 	 * Data cache unit
169 	 */
170 	{V_P6,		0x43,	"data_mem_refs"},
171 	{V_P6,		0x45,	"dcu_lines_in"},
172 	{V_P6,		0x46,	"dcu_m_lines_in"},
173 	{V_P6,		0x47,	"dcu_m_lines_out"},
174 	{V_P6,		0x48,	"dcu_miss_outstanding"},
175 
176 	/*
177 	 * Instruction fetch unit
178 	 */
179 	{V_P6,		0x80,	"ifu_ifetch"},
180 	{V_P6,		0x81,	"ifu_ifetch_miss"},
181 	{V_P6,		0x85,	"itlb_miss"},
182 	{V_P6,		0x86,	"ifu_mem_stall"},
183 	{V_P6,		0x87,	"ild_stall"},
184 
185 	/*
186 	 * L2 cache
187 	 */
188 	{V_P6,		0x28,	"l2_ifetch"},
189 	{V_P6,		0x29,	"l2_ld"},
190 	{V_P6,		0x2a,	"l2_st"},
191 	{V_P6,		0x24,	"l2_lines_in"},
192 	{V_P6,		0x26,	"l2_lines_out"},
193 	{V_P6,		0x25,	"l2_m_lines_inm"},
194 	{V_P6,		0x27,	"l2_m_lines_outm"},
195 	{V_P6,		0x2e,	"l2_rqsts"},
196 	{V_P6,		0x21,	"l2_ads"},
197 	{V_P6,		0x22,	"l2_dbus_busy"},
198 	{V_P6,		0x23,	"l2_dbus_busy_rd"},
199 
200 	/*
201 	 * External bus logic
202 	 */
203 	{V_P6,		0x62,	"bus_drdy_clocks"},
204 	{V_P6,		0x63,	"bus_lock_clocks"},
205 	{V_P6,		0x60,	"bus_req_outstanding"},
206 	{V_P6,		0x65,	"bus_tran_brd"},
207 	{V_P6,		0x66,	"bus_tran_rfo"},
208 	{V_P6,		0x67,	"bus_trans_wb"},
209 	{V_P6,		0x68,	"bus_tran_ifetch"},
210 	{V_P6,		0x69,	"bus_tran_inval"},
211 	{V_P6,		0x6a,	"bus_tran_pwr"},
212 	{V_P6,		0x6b,	"bus_trans_p"},
213 	{V_P6,		0x6c,	"bus_trans_io"},
214 	{V_P6,		0x6d,	"bus_tran_def"},
215 	{V_P6,		0x6e,	"bus_tran_burst"},
216 	{V_P6,		0x70,	"bus_tran_any"},
217 	{V_P6,		0x6f,	"bus_tran_mem"},
218 	{V_P6,		0x64,	"bus_data_rcv"},
219 	{V_P6,		0x61,	"bus_bnr_drv"},
220 	{V_P6,		0x7a,	"bus_hit_drv"},
221 	{V_P6,		0x7b,	"bus_hitm_drv"},
222 	{V_P6,		0x7e,	"bus_snoop_stall"},
223 
224 	/*
225 	 * Floating point unit
226 	 */
227 	{V_P6,		0xc1,	"flops"},		/* 0 only */
228 	{V_P6,		0x10,	"fp_comp_ops_exe"},	/* 0 only */
229 	{V_P6,		0x11,	"fp_assist"},		/* 1 only */
230 	{V_P6,		0x12,	"mul"},			/* 1 only */
231 	{V_P6,		0x13,	"div"},			/* 1 only */
232 	{V_P6,		0x14,	"cycles_div_busy"},	/* 0 only */
233 
234 	/*
235 	 * Memory ordering
236 	 */
237 	{V_P6,		0x3,	"ld_blocks"},
238 	{V_P6,		0x4,	"sb_drains"},
239 	{V_P6,		0x5,	"misalign_mem_ref"},
240 
241 	/*
242 	 * Instruction decoding and retirement
243 	 */
244 	{V_P6,		0xc0,	"inst_retired"},
245 	{V_P6,		0xc2,	"uops_retired"},
246 	{V_P6,		0xd0,	"inst_decoder"},
247 
248 	/*
249 	 * Interrupts
250 	 */
251 	{V_P6,		0xc8,	"hw_int_rx"},
252 	{V_P6,		0xc6,	"cycles_int_masked"},
253 	{V_P6,		0xc7,	"cycles_int_pending_and_masked"},
254 
255 	/*
256 	 * Branches
257 	 */
258 	{V_P6,		0xc4,	"br_inst_retired"},
259 	{V_P6,		0xc5,	"br_miss_pred_retired"},
260 	{V_P6,		0xc9,	"br_taken_retired"},
261 	{V_P6,		0xca,	"br_miss_pred_taken_ret"},
262 	{V_P6,		0xe0,	"br_inst_decoded"},
263 	{V_P6,		0xe2,	"btb_misses"},
264 	{V_P6,		0xe4,	"br_bogus"},
265 	{V_P6,		0xe6,	"baclears"},
266 
267 	/*
268 	 * Stalls
269 	 */
270 	{V_P6,		0xa2,	"resource_stalls"},
271 	{V_P6,		0xd2,	"partial_rat_stalls"},
272 
273 	/*
274 	 * Segment register loads
275 	 */
276 	{V_P6,		0x6,	"segment_reg_loads"},
277 
278 	/*
279 	 * Clocks
280 	 */
281 	{V_P6,		0x79,	"cpu_clk_unhalted"},
282 
283 	/*
284 	 * MMX
285 	 */
286 	{V_P6mmx,	0xb0,	"mmx_instr_exec"},
287 	{V_P6mmx,	0xb1,	"mmx_sat_instr_exec"},
288 	{V_P6mmx,	0xb2,	"mmx_uops_exec"},
289 	{V_P6mmx,	0xb3,	"mmx_instr_type_exec"},
290 	{V_P6mmx,	0xcc,	"fp_mmx_trans"},
291 	{V_P6mmx,	0xcd,	"mmx_assists"},
292 	{V_P6mmx,	0xce,	"mmx_instr_ret"},
293 	{V_P6mmx,	0xd4,	"seg_rename_stalls"},
294 	{V_P6mmx,	0xd5,	"seg_reg_renames"},
295 	{V_P6mmx,	0xd6,	"ret_seg_renames"},
296 
297 	{V_END}
298 };
299 
300 #define	MAPCPUVER(cpuver)	(cpuvermap[(cpuver) - CPC_PENTIUM])
301 
302 static int
validargs(int cpuver,int regno)303 validargs(int cpuver, int regno)
304 {
305 	if (regno < 0 || regno > 1)
306 		return (0);
307 	cpuver -= CPC_PENTIUM;
308 	if (cpuver < 0 ||
309 	    cpuver >= sizeof (cpuvermap) / sizeof (cpuvermap[0]))
310 		return (0);
311 	return (1);
312 }
313 
314 /*ARGSUSED*/
315 static int
versionmatch(int cpuver,int regno,const struct nametable * n)316 versionmatch(int cpuver, int regno, const struct nametable *n)
317 {
318 	if (!validargs(cpuver, regno) || (n->ver & MAPCPUVER(cpuver)) == 0)
319 		return (0);
320 
321 	switch (MAPCPUVER(cpuver)) {
322 	case V_P5:
323 	case V_P5 | V_P5mmx:
324 		break;
325 	case V_P6:
326 	case V_P6 | V_P6mmx:
327 		switch (n->bits) {
328 		case 0xc1:	/* flops */
329 		case 0x10:	/* fp_comp_ops_exe */
330 		case 0x14:	/* cycles_div_busy */
331 			/* only reg0 counts these */
332 			if (regno == 1)
333 				return (0);
334 			break;
335 		case 0x11:	/* fp_assist */
336 		case 0x12:	/* mul */
337 		case 0x13:	/* div */
338 			/* only 1 can count these */
339 			if (regno == 0)
340 				return (0);
341 			break;
342 		default:
343 			break;
344 		}
345 		break;
346 	default:
347 		return (0);
348 	}
349 
350 	return (1);
351 }
352 
353 static const struct nametable *
getnametable(int cpuver,int regno)354 getnametable(int cpuver, int regno)
355 {
356 	const struct nametable *n;
357 
358 	if (!validargs(cpuver, regno))
359 		return (NULL);
360 
361 	switch (MAPCPUVER(cpuver)) {
362 	case V_P5:
363 	case V_P5 | V_P5mmx:
364 		n = P5mmx_names[regno];
365 		break;
366 	case V_P6:
367 	case V_P6 | V_P6mmx:
368 		n = P6_names;
369 		break;
370 	default:
371 		n = NULL;
372 		break;
373 	}
374 
375 	return (n);
376 }
377 
378 void
cpc_walk_names(int cpuver,int regno,void * arg,void (* action)(void *,int,const char *,uint8_t))379 cpc_walk_names(int cpuver, int regno, void *arg,
380     void (*action)(void *, int, const char *, uint8_t))
381 {
382 	const struct nametable *n;
383 
384 	if ((n = getnametable(cpuver, regno)) == NULL)
385 		return;
386 	for (; n->ver != V_END; n++)
387 		if (versionmatch(cpuver, regno, n))
388 			action(arg, regno, n->name, n->bits);
389 }
390 
391 const char *
__cpc_reg_to_name(int cpuver,int regno,uint8_t bits)392 __cpc_reg_to_name(int cpuver, int regno, uint8_t bits)
393 {
394 	const struct nametable *n;
395 
396 	if ((n = getnametable(cpuver, regno)) == NULL)
397 		return (NULL);
398 	for (; n->ver != V_END; n++)
399 		if (bits == n->bits && versionmatch(cpuver, regno, n))
400 			return (n->name);
401 	return (NULL);
402 }
403 
404 /*
405  * Register names can be specified as strings or even as numbers
406  */
407 int
__cpc_name_to_reg(int cpuver,int regno,const char * name,uint8_t * bits)408 __cpc_name_to_reg(int cpuver, int regno, const char *name, uint8_t *bits)
409 {
410 	const struct nametable *n;
411 	char *eptr = NULL;
412 	long value;
413 
414 	if ((n = getnametable(cpuver, regno)) == NULL || name == NULL)
415 		return (-1);
416 	for (; n->ver != V_END; n++)
417 		if (strcmp(name, n->name) == 0 &&
418 		    versionmatch(cpuver, regno, n)) {
419 			*bits = n->bits;
420 			return (0);
421 		}
422 
423 	value = strtol(name, &eptr, 0);
424 	if (name != eptr && value >= 0 && value <= UINT8_MAX) {
425 		*bits = (uint8_t)value;
426 		return (0);
427 	}
428 
429 	return (-1);
430 }
431 
432 const char *
cpc_getcciname(int cpuver)433 cpc_getcciname(int cpuver)
434 {
435 	if (validargs(cpuver, 0))
436 		switch (MAPCPUVER(cpuver)) {
437 		case V_P5:
438 			return ("Pentium");
439 		case V_P5 | V_P5mmx:
440 			return ("Pentium with MMX");
441 		case V_P6:
442 			return ("Pentium Pro, Pentium II");
443 		case V_P6 | V_P6mmx:
444 			return ("Pentium Pro with MMX, Pentium II");
445 		default:
446 			break;
447 		}
448 	return (NULL);
449 }
450 
451 const char *
cpc_getcpuref(int cpuver)452 cpc_getcpuref(int cpuver)
453 {
454 	if (validargs(cpuver, 0))
455 		switch (MAPCPUVER(cpuver)) {
456 		case V_P5:
457 		case V_P5 | V_P5mmx:
458 			return (gettext(
459 			    "See Appendix A.2 of the \"Intel Architecture "
460 			    "Software Developer's Manual,\" 243192, 1997"));
461 		case V_P6:
462 		case V_P6 | V_P6mmx:
463 			return (gettext(
464 			    "See Appendix A.1 of the \"Intel Architecture "
465 			    "Software Developer's Manual,\" 243192, 1997"));
466 		default:
467 			break;
468 		}
469 	return (NULL);
470 }
471 
472 /*
473  * This is a functional interface to allow CPUs with fewer %pic registers
474  * to share the same data structure as those with more %pic registers
475  * within the same instruction set family.
476  */
477 uint_t
cpc_getnpic(int cpuver)478 cpc_getnpic(int cpuver)
479 {
480 	switch (cpuver) {
481 	case CPC_PENTIUM:
482 	case CPC_PENTIUM_MMX:
483 	case CPC_PENTIUM_PRO:
484 	case CPC_PENTIUM_PRO_MMX:
485 #define	EVENT	((cpc_event_t *)0)
486 		return (sizeof (EVENT->ce_pic) / sizeof	(EVENT->ce_pic[0]));
487 #undef	EVENT
488 	default:
489 		return (0);
490 	}
491 }
492 
493 #define	BITS(v, u, l)	\
494 	(((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
495 
496 #include "getcpuid.h"
497 
498 /*
499  * Return the version of the current processor.
500  *
501  * Version -1 is defined as 'not performance counter capable'
502  */
503 int
cpc_getcpuver(void)504 cpc_getcpuver(void)
505 {
506 	static int ver = -1;
507 	uint32_t maxeax;
508 	uint32_t vbuf[4];
509 
510 	if (ver != -1)
511 		return (ver);
512 
513 	maxeax = cpc_getcpuid(0, &vbuf[0], &vbuf[2], &vbuf[1]);
514 	{
515 		char *vendor = (char *)vbuf;
516 		vendor[12] = '\0';
517 
518 		if (strcmp(vendor, "GenuineIntel") != 0)
519 			return (ver);
520 	}
521 
522 	if (maxeax >= 1) {
523 		int family, model;
524 		uint32_t eax, ebx, ecx, edx;
525 
526 		eax = cpc_getcpuid(1, &ebx, &ecx, &edx);
527 
528 		if ((family = BITS(eax, 11, 8)) == 0xf)
529 			family = BITS(eax, 27, 20);
530 		if ((model = BITS(eax, 7, 4)) == 0xf)
531 			model = BITS(eax, 19, 16);
532 
533 		/*
534 		 * map family and model into the performance
535 		 * counter architectures we currently understand.
536 		 *
537 		 * See application note AP485 (from developer.intel.com)
538 		 * for further explanation.
539 		 */
540 		switch (family) {
541 		case 5:		/* Pentium and Pentium with MMX */
542 			ver = model < 4 ?
543 				CPC_PENTIUM : CPC_PENTIUM_MMX;
544 			break;
545 		case 6:		/* Pentium Pro and Pentium II and III */
546 			ver = BITS(edx, 23, 23) ?	   /* mmx check */
547 				CPC_PENTIUM_PRO_MMX : CPC_PENTIUM_PRO;
548 			break;
549 		default:
550 		case 0xf:	/* Pentium IV */
551 			break;
552 		}
553 	}
554 
555 	return (ver);
556 }
557