/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include "libcpc.h" #include "libcpc_impl.h" /* * Configuration data for Pentium Pro performance counters. * * Definitions taken from [3]. See the reference to * understand what any of these settings actually means. * * [3] "Pentium Pro Family Developer's Manual, Volume 3: * Operating Systems Writer's Manual," January 1996 */ #define V_P5 (1u << 0) /* specific to Pentium cpus */ #define V_P5mmx (1u << 1) /* " MMX instructions */ #define V_P6 (1u << 2) /* specific to Pentium II cpus */ #define V_P6mmx (1u << 3) /* " MMX instructions */ #define V_END 0 /* * map from "cpu version" to flag bits */ static const uint_t cpuvermap[] = { V_P5, /* CPC_PENTIUM */ V_P5 | V_P5mmx, /* CPC_PENTIUM_MMX */ V_P6, /* CPC_PENTIUM_PRO */ V_P6 | V_P6mmx, /* CPC_PENTIUM_PRO_MMX */ }; struct nametable { const uint_t ver; const uint8_t bits; const char *name; }; /* * Basic Pentium events */ #define P5_EVENTS(v) \ {v, 0x0, "data_read"}, \ {v, 0x1, "data_write"}, \ {v, 0x2, "data_tlb_miss"}, \ {v, 0x3, "data_read_miss"}, \ {v, 0x4, "data_write_miss"}, \ {v, 0x5, "write_hit_to_M_or_E"}, \ {v, 0x6, "dcache_lines_wrback"}, \ {v, 0x7, "external_snoops"}, \ {v, 0x8, "external_dcache_snoop_hits"}, \ {v, 0x9, "memory_access_in_both_pipes"}, \ {v, 0xa, "bank_conflicts"}, \ {v, 0xb, "misaligned_ref"}, \ {v, 0xc, "code_read"}, \ {v, 0xd, "code_tlb_miss"}, \ {v, 0xe, "code_cache_miss"}, \ {v, 0xf, "any_segreg_loaded"}, \ {v, 0x12, "branches"}, \ {v, 0x13, "btb_hits"}, \ {v, 0x14, "taken_or_btb_hit"}, \ {v, 0x15, "pipeline_flushes"}, \ {v, 0x16, "instr_exec"}, \ {v, 0x17, "instr_exec_V_pipe"}, \ {v, 0x18, "clks_bus_cycle"}, \ {v, 0x19, "clks_full_wbufs"}, \ {v, 0x1a, "pipe_stall_read"}, \ {v, 0x1b, "stall_on_write_ME"}, \ {v, 0x1c, "locked_bus_cycle"}, \ {v, 0x1d, "io_rw_cycles"}, \ {v, 0x1e, "reads_noncache_mem"}, \ {v, 0x1f, "pipeline_agi_stalls"}, \ {v, 0x22, "flops"}, \ {v, 0x23, "bp_match_dr0"}, \ {v, 0x24, "bp_match_dr1"}, \ {v, 0x25, "bp_match_dr2"}, \ {v, 0x26, "bp_match_dr3"}, \ {v, 0x27, "hw_intrs"}, \ {v, 0x28, "data_rw"}, \ {v, 0x29, "data_rw_miss"} static const struct nametable P5mmx_names0[] = { P5_EVENTS(V_P5), {V_P5mmx, 0x2a, "bus_ownership_latency"}, {V_P5mmx, 0x2b, "mmx_instr_upipe"}, {V_P5mmx, 0x2c, "cache_M_line_sharing"}, {V_P5mmx, 0x2d, "emms_instr"}, {V_P5mmx, 0x2e, "bus_util_processor"}, {V_P5mmx, 0x2f, "sat_mmx_instr"}, {V_P5mmx, 0x30, "clks_not_HLT"}, {V_P5mmx, 0x31, "mmx_data_read"}, {V_P5mmx, 0x32, "clks_fp_stall"}, {V_P5mmx, 0x33, "d1_starv_fifo_0"}, {V_P5mmx, 0x34, "mmx_data_write"}, {V_P5mmx, 0x35, "pipe_flush_wbp"}, {V_P5mmx, 0x36, "mmx_misalign_data_refs"}, {V_P5mmx, 0x37, "rets_pred_incorrect"}, {V_P5mmx, 0x38, "mmx_multiply_unit_interlock"}, {V_P5mmx, 0x39, "rets"}, {V_P5mmx, 0x3a, "btb_false_entries"}, {V_P5mmx, 0x3b, "clocks_stall_full_wb"}, {V_END} }; static const struct nametable P5mmx_names1[] = { P5_EVENTS(V_P5), {V_P5mmx, 0x2a, "bus_ownership_transfers"}, {V_P5mmx, 0x2b, "mmx_instr_vpipe"}, {V_P5mmx, 0x2c, "cache_lint_sharing"}, {V_P5mmx, 0x2d, "mmx_fp_transitions"}, {V_P5mmx, 0x2e, "writes_noncache_mem"}, {V_P5mmx, 0x2f, "sats_performed"}, {V_P5mmx, 0x30, "clks_dcache_tlb_miss"}, {V_P5mmx, 0x31, "mmx_data_read_miss"}, {V_P5mmx, 0x32, "taken_br"}, {V_P5mmx, 0x33, "d1_starv_fifo_1"}, {V_P5mmx, 0x34, "mmx_data_write_miss"}, {V_P5mmx, 0x35, "pipe_flush_wbp_wb"}, {V_P5mmx, 0x36, "mmx_pipe_stall_data_read"}, {V_P5mmx, 0x37, "rets_pred"}, {V_P5mmx, 0x38, "movd_movq_stall"}, {V_P5mmx, 0x39, "rsb_overflow"}, {V_P5mmx, 0x3a, "btb_mispred_nt"}, {V_P5mmx, 0x3b, "mmx_stall_write_ME"}, {V_END} }; static const struct nametable *P5mmx_names[2] = { P5mmx_names0, P5mmx_names1 }; /* * Pentium Pro and Pentium II events */ static const struct nametable P6_names[] = { /* * Data cache unit */ {V_P6, 0x43, "data_mem_refs"}, {V_P6, 0x45, "dcu_lines_in"}, {V_P6, 0x46, "dcu_m_lines_in"}, {V_P6, 0x47, "dcu_m_lines_out"}, {V_P6, 0x48, "dcu_miss_outstanding"}, /* * Instruction fetch unit */ {V_P6, 0x80, "ifu_ifetch"}, {V_P6, 0x81, "ifu_ifetch_miss"}, {V_P6, 0x85, "itlb_miss"}, {V_P6, 0x86, "ifu_mem_stall"}, {V_P6, 0x87, "ild_stall"}, /* * L2 cache */ {V_P6, 0x28, "l2_ifetch"}, {V_P6, 0x29, "l2_ld"}, {V_P6, 0x2a, "l2_st"}, {V_P6, 0x24, "l2_lines_in"}, {V_P6, 0x26, "l2_lines_out"}, {V_P6, 0x25, "l2_m_lines_inm"}, {V_P6, 0x27, "l2_m_lines_outm"}, {V_P6, 0x2e, "l2_rqsts"}, {V_P6, 0x21, "l2_ads"}, {V_P6, 0x22, "l2_dbus_busy"}, {V_P6, 0x23, "l2_dbus_busy_rd"}, /* * External bus logic */ {V_P6, 0x62, "bus_drdy_clocks"}, {V_P6, 0x63, "bus_lock_clocks"}, {V_P6, 0x60, "bus_req_outstanding"}, {V_P6, 0x65, "bus_tran_brd"}, {V_P6, 0x66, "bus_tran_rfo"}, {V_P6, 0x67, "bus_trans_wb"}, {V_P6, 0x68, "bus_tran_ifetch"}, {V_P6, 0x69, "bus_tran_inval"}, {V_P6, 0x6a, "bus_tran_pwr"}, {V_P6, 0x6b, "bus_trans_p"}, {V_P6, 0x6c, "bus_trans_io"}, {V_P6, 0x6d, "bus_tran_def"}, {V_P6, 0x6e, "bus_tran_burst"}, {V_P6, 0x70, "bus_tran_any"}, {V_P6, 0x6f, "bus_tran_mem"}, {V_P6, 0x64, "bus_data_rcv"}, {V_P6, 0x61, "bus_bnr_drv"}, {V_P6, 0x7a, "bus_hit_drv"}, {V_P6, 0x7b, "bus_hitm_drv"}, {V_P6, 0x7e, "bus_snoop_stall"}, /* * Floating point unit */ {V_P6, 0xc1, "flops"}, /* 0 only */ {V_P6, 0x10, "fp_comp_ops_exe"}, /* 0 only */ {V_P6, 0x11, "fp_assist"}, /* 1 only */ {V_P6, 0x12, "mul"}, /* 1 only */ {V_P6, 0x13, "div"}, /* 1 only */ {V_P6, 0x14, "cycles_div_busy"}, /* 0 only */ /* * Memory ordering */ {V_P6, 0x3, "ld_blocks"}, {V_P6, 0x4, "sb_drains"}, {V_P6, 0x5, "misalign_mem_ref"}, /* * Instruction decoding and retirement */ {V_P6, 0xc0, "inst_retired"}, {V_P6, 0xc2, "uops_retired"}, {V_P6, 0xd0, "inst_decoder"}, /* * Interrupts */ {V_P6, 0xc8, "hw_int_rx"}, {V_P6, 0xc6, "cycles_int_masked"}, {V_P6, 0xc7, "cycles_int_pending_and_masked"}, /* * Branches */ {V_P6, 0xc4, "br_inst_retired"}, {V_P6, 0xc5, "br_miss_pred_retired"}, {V_P6, 0xc9, "br_taken_retired"}, {V_P6, 0xca, "br_miss_pred_taken_ret"}, {V_P6, 0xe0, "br_inst_decoded"}, {V_P6, 0xe2, "btb_misses"}, {V_P6, 0xe4, "br_bogus"}, {V_P6, 0xe6, "baclears"}, /* * Stalls */ {V_P6, 0xa2, "resource_stalls"}, {V_P6, 0xd2, "partial_rat_stalls"}, /* * Segment register loads */ {V_P6, 0x6, "segment_reg_loads"}, /* * Clocks */ {V_P6, 0x79, "cpu_clk_unhalted"}, /* * MMX */ {V_P6mmx, 0xb0, "mmx_instr_exec"}, {V_P6mmx, 0xb1, "mmx_sat_instr_exec"}, {V_P6mmx, 0xb2, "mmx_uops_exec"}, {V_P6mmx, 0xb3, "mmx_instr_type_exec"}, {V_P6mmx, 0xcc, "fp_mmx_trans"}, {V_P6mmx, 0xcd, "mmx_assists"}, {V_P6mmx, 0xce, "mmx_instr_ret"}, {V_P6mmx, 0xd4, "seg_rename_stalls"}, {V_P6mmx, 0xd5, "seg_reg_renames"}, {V_P6mmx, 0xd6, "ret_seg_renames"}, {V_END} }; #define MAPCPUVER(cpuver) (cpuvermap[(cpuver) - CPC_PENTIUM]) static int validargs(int cpuver, int regno) { if (regno < 0 || regno > 1) return (0); cpuver -= CPC_PENTIUM; if (cpuver < 0 || cpuver >= sizeof (cpuvermap) / sizeof (cpuvermap[0])) return (0); return (1); } /*ARGSUSED*/ static int versionmatch(int cpuver, int regno, const struct nametable *n) { if (!validargs(cpuver, regno) || (n->ver & MAPCPUVER(cpuver)) == 0) return (0); switch (MAPCPUVER(cpuver)) { case V_P5: case V_P5 | V_P5mmx: break; case V_P6: case V_P6 | V_P6mmx: switch (n->bits) { case 0xc1: /* flops */ case 0x10: /* fp_comp_ops_exe */ case 0x14: /* cycles_div_busy */ /* only reg0 counts these */ if (regno == 1) return (0); break; case 0x11: /* fp_assist */ case 0x12: /* mul */ case 0x13: /* div */ /* only 1 can count these */ if (regno == 0) return (0); break; default: break; } break; default: return (0); } return (1); } static const struct nametable * getnametable(int cpuver, int regno) { const struct nametable *n; if (!validargs(cpuver, regno)) return (NULL); switch (MAPCPUVER(cpuver)) { case V_P5: case V_P5 | V_P5mmx: n = P5mmx_names[regno]; break; case V_P6: case V_P6 | V_P6mmx: n = P6_names; break; default: n = NULL; break; } return (n); } void cpc_walk_names(int cpuver, int regno, void *arg, void (*action)(void *, int, const char *, uint8_t)) { const struct nametable *n; if ((n = getnametable(cpuver, regno)) == NULL) return; for (; n->ver != V_END; n++) if (versionmatch(cpuver, regno, n)) action(arg, regno, n->name, n->bits); } const char * __cpc_reg_to_name(int cpuver, int regno, uint8_t bits) { const struct nametable *n; if ((n = getnametable(cpuver, regno)) == NULL) return (NULL); for (; n->ver != V_END; n++) if (bits == n->bits && versionmatch(cpuver, regno, n)) return (n->name); return (NULL); } /* * Register names can be specified as strings or even as numbers */ int __cpc_name_to_reg(int cpuver, int regno, const char *name, uint8_t *bits) { const struct nametable *n; char *eptr = NULL; long value; if ((n = getnametable(cpuver, regno)) == NULL || name == NULL) return (-1); for (; n->ver != V_END; n++) if (strcmp(name, n->name) == 0 && versionmatch(cpuver, regno, n)) { *bits = n->bits; return (0); } value = strtol(name, &eptr, 0); if (name != eptr && value >= 0 && value <= UINT8_MAX) { *bits = (uint8_t)value; return (0); } return (-1); } const char * cpc_getcciname(int cpuver) { if (validargs(cpuver, 0)) switch (MAPCPUVER(cpuver)) { case V_P5: return ("Pentium"); case V_P5 | V_P5mmx: return ("Pentium with MMX"); case V_P6: return ("Pentium Pro, Pentium II"); case V_P6 | V_P6mmx: return ("Pentium Pro with MMX, Pentium II"); default: break; } return (NULL); } const char * cpc_getcpuref(int cpuver) { if (validargs(cpuver, 0)) switch (MAPCPUVER(cpuver)) { case V_P5: case V_P5 | V_P5mmx: return (gettext( "See Appendix A.2 of the \"Intel Architecture " "Software Developer's Manual,\" 243192, 1997")); case V_P6: case V_P6 | V_P6mmx: return (gettext( "See Appendix A.1 of the \"Intel Architecture " "Software Developer's Manual,\" 243192, 1997")); default: break; } return (NULL); } /* * This is a functional interface to allow CPUs with fewer %pic registers * to share the same data structure as those with more %pic registers * within the same instruction set family. */ uint_t cpc_getnpic(int cpuver) { switch (cpuver) { case CPC_PENTIUM: case CPC_PENTIUM_MMX: case CPC_PENTIUM_PRO: case CPC_PENTIUM_PRO_MMX: #define EVENT ((cpc_event_t *)0) return (sizeof (EVENT->ce_pic) / sizeof (EVENT->ce_pic[0])); #undef EVENT default: return (0); } } #define BITS(v, u, l) \ (((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1)) #include "getcpuid.h" /* * Return the version of the current processor. * * Version -1 is defined as 'not performance counter capable' */ int cpc_getcpuver(void) { static int ver = -1; uint32_t maxeax; uint32_t vbuf[4]; if (ver != -1) return (ver); maxeax = cpc_getcpuid(0, &vbuf[0], &vbuf[2], &vbuf[1]); { char *vendor = (char *)vbuf; vendor[12] = '\0'; if (strcmp(vendor, "GenuineIntel") != 0) return (ver); } if (maxeax >= 1) { int family, model; uint32_t eax, ebx, ecx, edx; eax = cpc_getcpuid(1, &ebx, &ecx, &edx); if ((family = BITS(eax, 11, 8)) == 0xf) family = BITS(eax, 27, 20); if ((model = BITS(eax, 7, 4)) == 0xf) model = BITS(eax, 19, 16); /* * map family and model into the performance * counter architectures we currently understand. * * See application note AP485 (from developer.intel.com) * for further explanation. */ switch (family) { case 5: /* Pentium and Pentium with MMX */ ver = model < 4 ? CPC_PENTIUM : CPC_PENTIUM_MMX; break; case 6: /* Pentium Pro and Pentium II and III */ ver = BITS(edx, 23, 23) ? /* mmx check */ CPC_PENTIUM_PRO_MMX : CPC_PENTIUM_PRO; break; default: case 0xf: /* Pentium IV */ break; } } return (ver); }