1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#include <sys/types.h>
30#include <string.h>
31#include <alloca.h>
32#include <stdlib.h>
33#include <stdio.h>
34#include <libintl.h>
35
36#include "libcpc.h"
37#include "libcpc_impl.h"
38
39/*
40 * Configuration data for Pentium Pro performance counters.
41 *
42 * Definitions taken from [3].  See the reference to
43 * understand what any of these settings actually means.
44 *
45 * [3] "Pentium Pro Family Developer's Manual, Volume 3:
46 *     Operating Systems Writer's Manual," January 1996
47 */
48
49#define	V_P5	(1u << 0)		/* specific to Pentium cpus */
50#define	V_P5mmx	(1u << 1)		/* " MMX instructions */
51#define	V_P6	(1u << 2)		/* specific to Pentium II cpus */
52#define	V_P6mmx	(1u << 3)		/* " MMX instructions */
53#define	V_END	0
54
55/*
56 * map from "cpu version" to flag bits
57 */
58static const uint_t cpuvermap[] = {
59	V_P5,		/* CPC_PENTIUM */
60	V_P5 | V_P5mmx,	/* CPC_PENTIUM_MMX */
61	V_P6,		/* CPC_PENTIUM_PRO */
62	V_P6 | V_P6mmx,	/* CPC_PENTIUM_PRO_MMX */
63};
64
65struct nametable {
66	const uint_t	ver;
67	const uint8_t	bits;
68	const char	*name;
69};
70
71/*
72 * Basic Pentium events
73 */
74#define	P5_EVENTS(v)						\
75	{v,		0x0,	"data_read"},			\
76	{v,		0x1,	"data_write"},			\
77	{v,		0x2,	"data_tlb_miss"},		\
78	{v,		0x3,	"data_read_miss"},		\
79	{v,		0x4,	"data_write_miss"},		\
80	{v,		0x5,	"write_hit_to_M_or_E"},		\
81	{v,		0x6,	"dcache_lines_wrback"},		\
82	{v,		0x7,	"external_snoops"},		\
83	{v,		0x8,	"external_dcache_snoop_hits"},	\
84	{v,		0x9,	"memory_access_in_both_pipes"},	\
85	{v,		0xa,	"bank_conflicts"},		\
86	{v,		0xb,	"misaligned_ref"},		\
87	{v,		0xc,	"code_read"},			\
88	{v,		0xd,	"code_tlb_miss"},		\
89	{v,		0xe,	"code_cache_miss"},		\
90	{v,		0xf,	"any_segreg_loaded"},		\
91	{v,		0x12,	"branches"},			\
92	{v,		0x13,	"btb_hits"},			\
93	{v,		0x14,	"taken_or_btb_hit"},		\
94	{v,		0x15,	"pipeline_flushes"},		\
95	{v,		0x16,	"instr_exec"},			\
96	{v,		0x17,	"instr_exec_V_pipe"},		\
97	{v,		0x18,	"clks_bus_cycle"},		\
98	{v,		0x19,	"clks_full_wbufs"},		\
99	{v,		0x1a,	"pipe_stall_read"},		\
100	{v,		0x1b,	"stall_on_write_ME"},		\
101	{v,		0x1c,	"locked_bus_cycle"},		\
102	{v,		0x1d,	"io_rw_cycles"},		\
103	{v,		0x1e,	"reads_noncache_mem"},		\
104	{v,		0x1f,	"pipeline_agi_stalls"},		\
105	{v,		0x22,	"flops"},			\
106	{v,		0x23,	"bp_match_dr0"},		\
107	{v,		0x24,	"bp_match_dr1"},		\
108	{v,		0x25,	"bp_match_dr2"},		\
109	{v,		0x26,	"bp_match_dr3"},		\
110	{v,		0x27,	"hw_intrs"},			\
111	{v,		0x28,	"data_rw"},			\
112	{v,		0x29,	"data_rw_miss"}
113
114static const struct nametable P5mmx_names0[] = {
115	P5_EVENTS(V_P5),
116	{V_P5mmx,	0x2a,	"bus_ownership_latency"},
117	{V_P5mmx,	0x2b,	"mmx_instr_upipe"},
118	{V_P5mmx,	0x2c,	"cache_M_line_sharing"},
119	{V_P5mmx,	0x2d,	"emms_instr"},
120	{V_P5mmx,	0x2e,	"bus_util_processor"},
121	{V_P5mmx,	0x2f,	"sat_mmx_instr"},
122	{V_P5mmx,	0x30,	"clks_not_HLT"},
123	{V_P5mmx,	0x31,	"mmx_data_read"},
124	{V_P5mmx,	0x32,	"clks_fp_stall"},
125	{V_P5mmx,	0x33,	"d1_starv_fifo_0"},
126	{V_P5mmx,	0x34,	"mmx_data_write"},
127	{V_P5mmx,	0x35,	"pipe_flush_wbp"},
128	{V_P5mmx,	0x36,	"mmx_misalign_data_refs"},
129	{V_P5mmx,	0x37,	"rets_pred_incorrect"},
130	{V_P5mmx,	0x38,	"mmx_multiply_unit_interlock"},
131	{V_P5mmx,	0x39,	"rets"},
132	{V_P5mmx,	0x3a,	"btb_false_entries"},
133	{V_P5mmx,	0x3b,	"clocks_stall_full_wb"},
134	{V_END}
135};
136
137static const struct nametable P5mmx_names1[] = {
138	P5_EVENTS(V_P5),
139	{V_P5mmx,	0x2a,	"bus_ownership_transfers"},
140	{V_P5mmx,	0x2b,	"mmx_instr_vpipe"},
141	{V_P5mmx,	0x2c,	"cache_lint_sharing"},
142	{V_P5mmx,	0x2d,	"mmx_fp_transitions"},
143	{V_P5mmx,	0x2e,	"writes_noncache_mem"},
144	{V_P5mmx,	0x2f,	"sats_performed"},
145	{V_P5mmx,	0x30,	"clks_dcache_tlb_miss"},
146	{V_P5mmx,	0x31,	"mmx_data_read_miss"},
147	{V_P5mmx,	0x32,	"taken_br"},
148	{V_P5mmx,	0x33,	"d1_starv_fifo_1"},
149	{V_P5mmx,	0x34,	"mmx_data_write_miss"},
150	{V_P5mmx,	0x35,	"pipe_flush_wbp_wb"},
151	{V_P5mmx,	0x36,	"mmx_pipe_stall_data_read"},
152	{V_P5mmx,	0x37,	"rets_pred"},
153	{V_P5mmx,	0x38,	"movd_movq_stall"},
154	{V_P5mmx,	0x39,	"rsb_overflow"},
155	{V_P5mmx,	0x3a,	"btb_mispred_nt"},
156	{V_P5mmx,	0x3b,	"mmx_stall_write_ME"},
157	{V_END}
158};
159
160static const struct nametable *P5mmx_names[2] = {
161	P5mmx_names0,
162	P5mmx_names1
163};
164
165/*
166 * Pentium Pro and Pentium II events
167 */
168static const struct nametable P6_names[] = {
169	/*
170	 * Data cache unit
171	 */
172	{V_P6,		0x43,	"data_mem_refs"},
173	{V_P6,		0x45,	"dcu_lines_in"},
174	{V_P6,		0x46,	"dcu_m_lines_in"},
175	{V_P6,		0x47,	"dcu_m_lines_out"},
176	{V_P6,		0x48,	"dcu_miss_outstanding"},
177
178	/*
179	 * Instruction fetch unit
180	 */
181	{V_P6,		0x80,	"ifu_ifetch"},
182	{V_P6,		0x81,	"ifu_ifetch_miss"},
183	{V_P6,		0x85,	"itlb_miss"},
184	{V_P6,		0x86,	"ifu_mem_stall"},
185	{V_P6,		0x87,	"ild_stall"},
186
187	/*
188	 * L2 cache
189	 */
190	{V_P6,		0x28,	"l2_ifetch"},
191	{V_P6,		0x29,	"l2_ld"},
192	{V_P6,		0x2a,	"l2_st"},
193	{V_P6,		0x24,	"l2_lines_in"},
194	{V_P6,		0x26,	"l2_lines_out"},
195	{V_P6,		0x25,	"l2_m_lines_inm"},
196	{V_P6,		0x27,	"l2_m_lines_outm"},
197	{V_P6,		0x2e,	"l2_rqsts"},
198	{V_P6,		0x21,	"l2_ads"},
199	{V_P6,		0x22,	"l2_dbus_busy"},
200	{V_P6,		0x23,	"l2_dbus_busy_rd"},
201
202	/*
203	 * External bus logic
204	 */
205	{V_P6,		0x62,	"bus_drdy_clocks"},
206	{V_P6,		0x63,	"bus_lock_clocks"},
207	{V_P6,		0x60,	"bus_req_outstanding"},
208	{V_P6,		0x65,	"bus_tran_brd"},
209	{V_P6,		0x66,	"bus_tran_rfo"},
210	{V_P6,		0x67,	"bus_trans_wb"},
211	{V_P6,		0x68,	"bus_tran_ifetch"},
212	{V_P6,		0x69,	"bus_tran_inval"},
213	{V_P6,		0x6a,	"bus_tran_pwr"},
214	{V_P6,		0x6b,	"bus_trans_p"},
215	{V_P6,		0x6c,	"bus_trans_io"},
216	{V_P6,		0x6d,	"bus_tran_def"},
217	{V_P6,		0x6e,	"bus_tran_burst"},
218	{V_P6,		0x70,	"bus_tran_any"},
219	{V_P6,		0x6f,	"bus_tran_mem"},
220	{V_P6,		0x64,	"bus_data_rcv"},
221	{V_P6,		0x61,	"bus_bnr_drv"},
222	{V_P6,		0x7a,	"bus_hit_drv"},
223	{V_P6,		0x7b,	"bus_hitm_drv"},
224	{V_P6,		0x7e,	"bus_snoop_stall"},
225
226	/*
227	 * Floating point unit
228	 */
229	{V_P6,		0xc1,	"flops"},		/* 0 only */
230	{V_P6,		0x10,	"fp_comp_ops_exe"},	/* 0 only */
231	{V_P6,		0x11,	"fp_assist"},		/* 1 only */
232	{V_P6,		0x12,	"mul"},			/* 1 only */
233	{V_P6,		0x13,	"div"},			/* 1 only */
234	{V_P6,		0x14,	"cycles_div_busy"},	/* 0 only */
235
236	/*
237	 * Memory ordering
238	 */
239	{V_P6,		0x3,	"ld_blocks"},
240	{V_P6,		0x4,	"sb_drains"},
241	{V_P6,		0x5,	"misalign_mem_ref"},
242
243	/*
244	 * Instruction decoding and retirement
245	 */
246	{V_P6,		0xc0,	"inst_retired"},
247	{V_P6,		0xc2,	"uops_retired"},
248	{V_P6,		0xd0,	"inst_decoder"},
249
250	/*
251	 * Interrupts
252	 */
253	{V_P6,		0xc8,	"hw_int_rx"},
254	{V_P6,		0xc6,	"cycles_int_masked"},
255	{V_P6,		0xc7,	"cycles_int_pending_and_masked"},
256
257	/*
258	 * Branches
259	 */
260	{V_P6,		0xc4,	"br_inst_retired"},
261	{V_P6,		0xc5,	"br_miss_pred_retired"},
262	{V_P6,		0xc9,	"br_taken_retired"},
263	{V_P6,		0xca,	"br_miss_pred_taken_ret"},
264	{V_P6,		0xe0,	"br_inst_decoded"},
265	{V_P6,		0xe2,	"btb_misses"},
266	{V_P6,		0xe4,	"br_bogus"},
267	{V_P6,		0xe6,	"baclears"},
268
269	/*
270	 * Stalls
271	 */
272	{V_P6,		0xa2,	"resource_stalls"},
273	{V_P6,		0xd2,	"partial_rat_stalls"},
274
275	/*
276	 * Segment register loads
277	 */
278	{V_P6,		0x6,	"segment_reg_loads"},
279
280	/*
281	 * Clocks
282	 */
283	{V_P6,		0x79,	"cpu_clk_unhalted"},
284
285	/*
286	 * MMX
287	 */
288	{V_P6mmx,	0xb0,	"mmx_instr_exec"},
289	{V_P6mmx,	0xb1,	"mmx_sat_instr_exec"},
290	{V_P6mmx,	0xb2,	"mmx_uops_exec"},
291	{V_P6mmx,	0xb3,	"mmx_instr_type_exec"},
292	{V_P6mmx,	0xcc,	"fp_mmx_trans"},
293	{V_P6mmx,	0xcd,	"mmx_assists"},
294	{V_P6mmx,	0xce,	"mmx_instr_ret"},
295	{V_P6mmx,	0xd4,	"seg_rename_stalls"},
296	{V_P6mmx,	0xd5,	"seg_reg_renames"},
297	{V_P6mmx,	0xd6,	"ret_seg_renames"},
298
299	{V_END}
300};
301
302#define	MAPCPUVER(cpuver)	(cpuvermap[(cpuver) - CPC_PENTIUM])
303
304static int
305validargs(int cpuver, int regno)
306{
307	if (regno < 0 || regno > 1)
308		return (0);
309	cpuver -= CPC_PENTIUM;
310	if (cpuver < 0 ||
311	    cpuver >= sizeof (cpuvermap) / sizeof (cpuvermap[0]))
312		return (0);
313	return (1);
314}
315
316/*ARGSUSED*/
317static int
318versionmatch(int cpuver, int regno, const struct nametable *n)
319{
320	if (!validargs(cpuver, regno) || (n->ver & MAPCPUVER(cpuver)) == 0)
321		return (0);
322
323	switch (MAPCPUVER(cpuver)) {
324	case V_P5:
325	case V_P5 | V_P5mmx:
326		break;
327	case V_P6:
328	case V_P6 | V_P6mmx:
329		switch (n->bits) {
330		case 0xc1:	/* flops */
331		case 0x10:	/* fp_comp_ops_exe */
332		case 0x14:	/* cycles_div_busy */
333			/* only reg0 counts these */
334			if (regno == 1)
335				return (0);
336			break;
337		case 0x11:	/* fp_assist */
338		case 0x12:	/* mul */
339		case 0x13:	/* div */
340			/* only 1 can count these */
341			if (regno == 0)
342				return (0);
343			break;
344		default:
345			break;
346		}
347		break;
348	default:
349		return (0);
350	}
351
352	return (1);
353}
354
355static const struct nametable *
356getnametable(int cpuver, int regno)
357{
358	const struct nametable *n;
359
360	if (!validargs(cpuver, regno))
361		return (NULL);
362
363	switch (MAPCPUVER(cpuver)) {
364	case V_P5:
365	case V_P5 | V_P5mmx:
366		n = P5mmx_names[regno];
367		break;
368	case V_P6:
369	case V_P6 | V_P6mmx:
370		n = P6_names;
371		break;
372	default:
373		n = NULL;
374		break;
375	}
376
377	return (n);
378}
379
380void
381cpc_walk_names(int cpuver, int regno, void *arg,
382    void (*action)(void *, int, const char *, uint8_t))
383{
384	const struct nametable *n;
385
386	if ((n = getnametable(cpuver, regno)) == NULL)
387		return;
388	for (; n->ver != V_END; n++)
389		if (versionmatch(cpuver, regno, n))
390			action(arg, regno, n->name, n->bits);
391}
392
393const char *
394__cpc_reg_to_name(int cpuver, int regno, uint8_t bits)
395{
396	const struct nametable *n;
397
398	if ((n = getnametable(cpuver, regno)) == NULL)
399		return (NULL);
400	for (; n->ver != V_END; n++)
401		if (bits == n->bits && versionmatch(cpuver, regno, n))
402			return (n->name);
403	return (NULL);
404}
405
406/*
407 * Register names can be specified as strings or even as numbers
408 */
409int
410__cpc_name_to_reg(int cpuver, int regno, const char *name, uint8_t *bits)
411{
412	const struct nametable *n;
413	char *eptr = NULL;
414	long value;
415
416	if ((n = getnametable(cpuver, regno)) == NULL || name == NULL)
417		return (-1);
418	for (; n->ver != V_END; n++)
419		if (strcmp(name, n->name) == 0 &&
420		    versionmatch(cpuver, regno, n)) {
421			*bits = n->bits;
422			return (0);
423		}
424
425	value = strtol(name, &eptr, 0);
426	if (name != eptr && value >= 0 && value <= UINT8_MAX) {
427		*bits = (uint8_t)value;
428		return (0);
429	}
430
431	return (-1);
432}
433
434const char *
435cpc_getcciname(int cpuver)
436{
437	if (validargs(cpuver, 0))
438		switch (MAPCPUVER(cpuver)) {
439		case V_P5:
440			return ("Pentium");
441		case V_P5 | V_P5mmx:
442			return ("Pentium with MMX");
443		case V_P6:
444			return ("Pentium Pro, Pentium II");
445		case V_P6 | V_P6mmx:
446			return ("Pentium Pro with MMX, Pentium II");
447		default:
448			break;
449		}
450	return (NULL);
451}
452
453const char *
454cpc_getcpuref(int cpuver)
455{
456	if (validargs(cpuver, 0))
457		switch (MAPCPUVER(cpuver)) {
458		case V_P5:
459		case V_P5 | V_P5mmx:
460			return (gettext(
461			    "See Appendix A.2 of the \"Intel Architecture "
462			    "Software Developer's Manual,\" 243192, 1997"));
463		case V_P6:
464		case V_P6 | V_P6mmx:
465			return (gettext(
466			    "See Appendix A.1 of the \"Intel Architecture "
467			    "Software Developer's Manual,\" 243192, 1997"));
468		default:
469			break;
470		}
471	return (NULL);
472}
473
474/*
475 * This is a functional interface to allow CPUs with fewer %pic registers
476 * to share the same data structure as those with more %pic registers
477 * within the same instruction set family.
478 */
479uint_t
480cpc_getnpic(int cpuver)
481{
482	switch (cpuver) {
483	case CPC_PENTIUM:
484	case CPC_PENTIUM_MMX:
485	case CPC_PENTIUM_PRO:
486	case CPC_PENTIUM_PRO_MMX:
487#define	EVENT	((cpc_event_t *)0)
488		return (sizeof (EVENT->ce_pic) / sizeof	(EVENT->ce_pic[0]));
489#undef	EVENT
490	default:
491		return (0);
492	}
493}
494
495#define	BITS(v, u, l)	\
496	(((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
497
498#include "getcpuid.h"
499
500/*
501 * Return the version of the current processor.
502 *
503 * Version -1 is defined as 'not performance counter capable'
504 */
505int
506cpc_getcpuver(void)
507{
508	static int ver = -1;
509	uint32_t maxeax;
510	uint32_t vbuf[4];
511
512	if (ver != -1)
513		return (ver);
514
515	maxeax = cpc_getcpuid(0, &vbuf[0], &vbuf[2], &vbuf[1]);
516	{
517		char *vendor = (char *)vbuf;
518		vendor[12] = '\0';
519
520		if (strcmp(vendor, "GenuineIntel") != 0)
521			return (ver);
522	}
523
524	if (maxeax >= 1) {
525		int family, model;
526		uint32_t eax, ebx, ecx, edx;
527
528		eax = cpc_getcpuid(1, &ebx, &ecx, &edx);
529
530		if ((family = BITS(eax, 11, 8)) == 0xf)
531			family = BITS(eax, 27, 20);
532		if ((model = BITS(eax, 7, 4)) == 0xf)
533			model = BITS(eax, 19, 16);
534
535		/*
536		 * map family and model into the performance
537		 * counter architectures we currently understand.
538		 *
539		 * See application note AP485 (from developer.intel.com)
540		 * for further explanation.
541		 */
542		switch (family) {
543		case 5:		/* Pentium and Pentium with MMX */
544			ver = model < 4 ?
545				CPC_PENTIUM : CPC_PENTIUM_MMX;
546			break;
547		case 6:		/* Pentium Pro and Pentium II and III */
548			ver = BITS(edx, 23, 23) ?	   /* mmx check */
549				CPC_PENTIUM_PRO_MMX : CPC_PENTIUM_PRO;
550			break;
551		default:
552		case 0xf:	/* Pentium IV */
553			break;
554		}
555	}
556
557	return (ver);
558}
559