/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Performance Counter Back-End for Intel Family 6 Models 15, 23, 26 & 28 */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int core_pcbe_init(void); static uint_t core_pcbe_ncounters(void); static const char *core_pcbe_impl_name(void); static const char *core_pcbe_cpuref(void); static char *core_pcbe_list_events(uint_t picnum); static char *core_pcbe_list_attrs(void); static uint64_t core_pcbe_event_coverage(char *event); static uint64_t core_pcbe_overflow_bitmap(void); static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset, uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data, void *token); static void core_pcbe_program(void *token); static void core_pcbe_allstop(void); static void core_pcbe_sample(void *token); static void core_pcbe_free(void *config); #define FALSE 0 #define TRUE 1 /* Counter Type */ #define CORE_GPC 0 /* General-Purpose Counter (GPC) */ #define CORE_FFC 1 /* Fixed-Function Counter (FFC) */ /* MSR Addresses */ #define GPC_BASE_PMC 0x00c1 /* First GPC */ #define GPC_BASE_PES 0x0186 /* First GPC Event Select register */ #define FFC_BASE_PMC 0x0309 /* First FFC */ #define PERF_FIXED_CTR_CTRL 0x038d /* Used to enable/disable FFCs */ #define PERF_GLOBAL_STATUS 0x038e /* Overflow status register */ #define PERF_GLOBAL_CTRL 0x038f /* Used to enable/disable counting */ #define PERF_GLOBAL_OVF_CTRL 0x0390 /* Used to clear overflow status */ /* * Processor Event Select register fields */ #define CORE_USR (1ULL << 16) /* Count while not in ring 0 */ #define CORE_OS (1ULL << 17) /* Count while in ring 0 */ #define CORE_EDGE (1ULL << 18) /* Enable edge detection */ #define CORE_PC (1ULL << 19) /* Enable pin control */ #define CORE_INT (1ULL << 20) /* Enable interrupt on overflow */ #define CORE_EN (1ULL << 22) /* Enable counting */ #define CORE_INV (1ULL << 23) /* Invert the CMASK */ #define CORE_ANYTHR (1ULL << 21) /* Count event for any thread on core */ #define CORE_UMASK_SHIFT 8 #define CORE_UMASK_MASK 0xffu #define CORE_CMASK_SHIFT 24 #define CORE_CMASK_MASK 0xffu /* * Fixed-function counter attributes */ #define CORE_FFC_OS_EN (1ULL << 0) /* Count while not in ring 0 */ #define CORE_FFC_USR_EN (1ULL << 1) /* Count while in ring 1 */ #define CORE_FFC_ANYTHR (1ULL << 2) /* Count event for any thread on core */ #define CORE_FFC_PMI (1ULL << 3) /* Enable interrupt on overflow */ /* * Number of bits for specifying each FFC's attributes in the control register */ #define CORE_FFC_ATTR_SIZE 4 /* * CondChgd and OvfBuffer fields of global status and overflow control registers */ #define CONDCHGD (1ULL << 63) #define OVFBUFFER (1ULL << 62) #define MASK_CONDCHGD_OVFBUFFER (CONDCHGD | OVFBUFFER) #define ALL_STOPPED 0ULL #define BITMASK_XBITS(x) ((1ull << (x)) - 1ull) /* * Only the lower 32-bits can be written to in the general-purpose * counters. The higher bits are extended from bit 31; all ones if * bit 31 is one and all zeros otherwise. * * The fixed-function counters do not have this restriction. */ #define BITS_EXTENDED_FROM_31 (BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31)) #define WRMSR(msr, value) \ wrmsr((msr), (value)); \ DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value)); #define RDMSR(msr, value) \ (value) = rdmsr((msr)); \ DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value)); typedef struct core_pcbe_config { uint64_t core_rawpic; uint64_t core_ctl; /* Event Select bits */ uint64_t core_pmc; /* Counter register address */ uint64_t core_pes; /* Event Select register address */ uint_t core_picno; uint8_t core_pictype; /* CORE_GPC or CORE_FFC */ } core_pcbe_config_t; pcbe_ops_t core_pcbe_ops = { PCBE_VER_1, /* pcbe_ver */ CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE, /* pcbe_caps */ core_pcbe_ncounters, /* pcbe_ncounters */ core_pcbe_impl_name, /* pcbe_impl_name */ core_pcbe_cpuref, /* pcbe_cpuref */ core_pcbe_list_events, /* pcbe_list_events */ core_pcbe_list_attrs, /* pcbe_list_attrs */ core_pcbe_event_coverage, /* pcbe_event_coverage */ core_pcbe_overflow_bitmap, /* pcbe_overflow_bitmap */ core_pcbe_configure, /* pcbe_configure */ core_pcbe_program, /* pcbe_program */ core_pcbe_allstop, /* pcbe_allstop */ core_pcbe_sample, /* pcbe_sample */ core_pcbe_free /* pcbe_free */ }; struct nametable_fam6mod15_23 { const char *name; uint64_t restricted_bits; uint8_t event_num; }; #define NT_END 0xFF /* * Counting an event for all cores or all bus agents requires cpc_cpu privileges */ #define ALL_CORES (1ULL << 15) #define ALL_AGENTS (1ULL << 13) /* * The events listed in the following table can be counted on all * general-purpose counters on processors that are of Family 6 Models 15 or 23 */ static const struct nametable_fam6mod15_23 cmn_gpc_events_f6m15_23[] = { /* Alphabetical order of event name */ { "baclears", 0x0, 0xe6 }, { "bogus_br", 0x0, 0xe4 }, { "br_bac_missp_exec", 0x0, 0x8a }, { "br_call_exec", 0x0, 0x92 }, { "br_call_missp_exec", 0x0, 0x93 }, { "br_cnd_exec", 0x0, 0x8b }, { "br_cnd_missp_exec", 0x0, 0x8c }, { "br_ind_call_exec", 0x0, 0x94 }, { "br_ind_exec", 0x0, 0x8d }, { "br_ind_missp_exec", 0x0, 0x8e }, { "br_inst_decoded", 0x0, 0xe0 }, { "br_inst_exec", 0x0, 0x88 }, { "br_inst_retired", 0x0, 0xc4 }, { "br_inst_retired_mispred", 0x0, 0xc5 }, { "br_missp_exec", 0x0, 0x89 }, { "br_ret_bac_missp_exec", 0x0, 0x91 }, { "br_ret_exec", 0x0, 0x8f }, { "br_ret_missp_exec", 0x0, 0x90 }, { "br_tkn_bubble_1", 0x0, 0x97 }, { "br_tkn_bubble_2", 0x0, 0x98 }, { "bus_bnr_drv", ALL_AGENTS, 0x61 }, { "bus_data_rcv", ALL_CORES, 0x64 }, { "bus_drdy_clocks", ALL_AGENTS, 0x62 }, { "bus_hit_drv", ALL_AGENTS, 0x7a }, { "bus_hitm_drv", ALL_AGENTS, 0x7b }, { "bus_io_wait", ALL_CORES, 0x7f }, { "bus_lock_clocks", ALL_CORES | ALL_AGENTS, 0x63 }, { "bus_request_outstanding", ALL_CORES | ALL_AGENTS, 0x60 }, { "bus_trans_any", ALL_CORES | ALL_AGENTS, 0x70 }, { "bus_trans_brd", ALL_CORES | ALL_AGENTS, 0x65 }, { "bus_trans_burst", ALL_CORES | ALL_AGENTS, 0x6e }, { "bus_trans_def", ALL_CORES | ALL_AGENTS, 0x6d }, { "bus_trans_ifetch", ALL_CORES | ALL_AGENTS, 0x68 }, { "bus_trans_inval", ALL_CORES | ALL_AGENTS, 0x69 }, { "bus_trans_io", ALL_CORES | ALL_AGENTS, 0x6c }, { "bus_trans_mem", ALL_CORES | ALL_AGENTS, 0x6f }, { "bus_trans_p", ALL_CORES | ALL_AGENTS, 0x6b }, { "bus_trans_pwr", ALL_CORES | ALL_AGENTS, 0x6a }, { "bus_trans_rfo", ALL_CORES | ALL_AGENTS, 0x66 }, { "bus_trans_wb", ALL_CORES | ALL_AGENTS, 0x67 }, { "busq_empty", ALL_CORES, 0x7d }, { "cmp_snoop", ALL_CORES, 0x78 }, { "cpu_clk_unhalted", 0x0, 0x3c }, { "cycles_int", 0x0, 0xc6 }, { "cycles_l1i_mem_stalled", 0x0, 0x86 }, { "dtlb_misses", 0x0, 0x08 }, { "eist_trans", 0x0, 0x3a }, { "esp", 0x0, 0xab }, { "ext_snoop", ALL_AGENTS, 0x77 }, { "fp_mmx_trans", 0x0, 0xcc }, { "hw_int_rcv", 0x0, 0xc8 }, { "ild_stall", 0x0, 0x87 }, { "inst_queue", 0x0, 0x83 }, { "inst_retired", 0x0, 0xc0 }, { "itlb", 0x0, 0x82 }, { "itlb_miss_retired", 0x0, 0xc9 }, { "l1d_all_ref", 0x0, 0x43 }, { "l1d_cache_ld", 0x0, 0x40 }, { "l1d_cache_lock", 0x0, 0x42 }, { "l1d_cache_st", 0x0, 0x41 }, { "l1d_m_evict", 0x0, 0x47 }, { "l1d_m_repl", 0x0, 0x46 }, { "l1d_pend_miss", 0x0, 0x48 }, { "l1d_prefetch", 0x0, 0x4e }, { "l1d_repl", 0x0, 0x45 }, { "l1d_split", 0x0, 0x49 }, { "l1i_misses", 0x0, 0x81 }, { "l1i_reads", 0x0, 0x80 }, { "l2_ads", ALL_CORES, 0x21 }, { "l2_dbus_busy_rd", ALL_CORES, 0x23 }, { "l2_ifetch", ALL_CORES, 0x28 }, { "l2_ld", ALL_CORES, 0x29 }, { "l2_lines_in", ALL_CORES, 0x24 }, { "l2_lines_out", ALL_CORES, 0x26 }, { "l2_lock", ALL_CORES, 0x2b }, { "l2_m_lines_in", ALL_CORES, 0x25 }, { "l2_m_lines_out", ALL_CORES, 0x27 }, { "l2_no_req", ALL_CORES, 0x32 }, { "l2_reject_busq", ALL_CORES, 0x30 }, { "l2_rqsts", ALL_CORES, 0x2e }, { "l2_st", ALL_CORES, 0x2a }, { "load_block", 0x0, 0x03 }, { "load_hit_pre", 0x0, 0x4c }, { "machine_nukes", 0x0, 0xc3 }, { "macro_insts", 0x0, 0xaa }, { "memory_disambiguation", 0x0, 0x09 }, { "misalign_mem_ref", 0x0, 0x05 }, { "page_walks", 0x0, 0x0c }, { "pref_rqsts_dn", 0x0, 0xf8 }, { "pref_rqsts_up", 0x0, 0xf0 }, { "rat_stalls", 0x0, 0xd2 }, { "resource_stalls", 0x0, 0xdc }, { "rs_uops_dispatched", 0x0, 0xa0 }, { "seg_reg_renames", 0x0, 0xd5 }, { "seg_rename_stalls", 0x0, 0xd4 }, { "segment_reg_loads", 0x0, 0x06 }, { "simd_assist", 0x0, 0xcd }, { "simd_comp_inst_retired", 0x0, 0xca }, { "simd_inst_retired", 0x0, 0xc7 }, { "simd_instr_retired", 0x0, 0xce }, { "simd_sat_instr_retired", 0x0, 0xcf }, { "simd_sat_uop_exec", 0x0, 0xb1 }, { "simd_uop_type_exec", 0x0, 0xb3 }, { "simd_uops_exec", 0x0, 0xb0 }, { "snoop_stall_drv", ALL_CORES | ALL_AGENTS, 0x7e }, { "sse_pre_exec", 0x0, 0x07 }, { "sse_pre_miss", 0x0, 0x4b }, { "store_block", 0x0, 0x04 }, { "thermal_trip", 0x0, 0x3b }, { "uops_retired", 0x0, 0xc2 }, { "x87_ops_retired", 0x0, 0xc1 }, { "", 0x0, NT_END } }; /* * If any of the pic specific events require privileges, make sure to add a * check in configure_gpc() to find whether an event hard-coded as a number by * the user has any privilege requirements */ static const struct nametable_fam6mod15_23 pic0_events[] = { /* Alphabetical order of event name */ { "cycles_div_busy", 0x0, 0x14 }, { "fp_comp_ops_exe", 0x0, 0x10 }, { "idle_during_div", 0x0, 0x18 }, { "mem_load_retired", 0x0, 0xcb }, { "rs_uops_dispatched_port", 0x0, 0xa1 }, { "", 0x0, NT_END } }; static const struct nametable_fam6mod15_23 pic1_events[] = { /* Alphabetical order of event name */ { "delayed_bypass", 0x0, 0x19 }, { "div", 0x0, 0x13 }, { "fp_assist", 0x0, 0x11 }, { "mul", 0x0, 0x12 }, { "", 0x0, NT_END } }; /* FFC entries must be in order */ char *ffc_names_non_htt[] = { "instr_retired.any", "cpu_clk_unhalted.core", "cpu_clk_unhalted.ref", NULL }; char *ffc_names_htt[] = { "instr_retired.any", "cpu_clk_unhalted.thread", "cpu_clk_unhalted.ref", NULL }; char **ffc_names = NULL; static char **gpc_names = NULL; static uint32_t versionid; static uint64_t num_gpc; static uint64_t width_gpc; static uint64_t mask_gpc; static uint64_t num_ffc; static uint64_t width_ffc; static uint64_t mask_ffc; static uint_t total_pmc; static uint64_t control_ffc; static uint64_t control_gpc; static uint64_t control_mask; static uint32_t arch_events_vector; #define IMPL_NAME_LEN 100 static char core_impl_name[IMPL_NAME_LEN]; static const char *core_cpuref = "See Appendix A of the \"Intel 64 and IA-32 Architectures Software" \ " Developer's Manual Volume 3B: System Programming Guide, Part 2\"" \ " Order Number: 253669-026US, Februrary 2008"; struct events_table_t { uint8_t eventselect; uint8_t unitmask; uint64_t supported_counters; const char *name; }; /* Used to describe which counters support an event */ #define C(x) (1 << (x)) #define C0 C(0) #define C1 C(1) #define C2 C(2) #define C3 C(3) #define C_ALL 0xFFFFFFFFFFFFFFFF /* Architectural events */ #define ARCH_EVENTS_COMMON \ { 0xc0, 0x00, C_ALL, "inst_retired.any_p" }, \ { 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" }, \ { 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" }, \ { 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" }, \ { 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" }, \ { 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" } const struct events_table_t arch_events_table_non_htt[] = { { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.core" }, ARCH_EVENTS_COMMON }; const struct events_table_t arch_events_table_htt[] = { { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" }, ARCH_EVENTS_COMMON }; const struct events_table_t *arch_events_table = NULL; static uint64_t known_arch_events; static uint64_t known_ffc_num; #define EVENTS_FAM6_MOD26 \ \ { 0x80, 0x04, C0|C1|C2|C3, "l1i.cycles_stalled" }, \ { 0x80, 0x01, C0|C1|C2|C3, "l1i.hits" }, \ { 0x80, 0x02, C0|C1|C2|C3, "l1i.misses" }, \ \ { 0x80, 0x03, C0|C1|C2|C3, "l1i.reads" }, \ { 0x82, 0x01, C0|C1|C2|C3, "large_itlb.hit" }, \ { 0x87, 0x0F, C0|C1|C2|C3, "ild_stall.any" }, \ \ { 0x87, 0x04, C0|C1|C2|C3, "ild_stall.iq_full" }, \ { 0x87, 0x01, C0|C1|C2|C3, "ild_stall.lcp" }, \ { 0x87, 0x02, C0|C1|C2|C3, "ild_stall.mru" }, \ \ { 0x87, 0x08, C0|C1|C2|C3, "ild_stall.regen" }, \ { 0xE6, 0x02, C0|C1|C2|C3, "baclear.bad_target" }, \ { 0xE6, 0x01, C0|C1|C2|C3, "baclear.clear" }, \ \ { 0xE8, 0x01, C0|C1|C2|C3, "bpu_clears.early" }, \ { 0xE8, 0x02, C0|C1|C2|C3, "bpu_clears.late" }, \ { 0xE5, 0x01, C0|C1|C2|C3, "bpu_missed_call_ret" }, \ \ { 0xE0, 0x01, C0|C1|C2|C3, "br_inst_decoded" }, \ { 0x88, 0x7F, C0|C1|C2|C3, "br_inst_exec.any" }, \ { 0x88, 0x01, C0|C1|C2|C3, "br_inst_exec.cond" }, \ \ { 0x88, 0x02, C0|C1|C2|C3, "br_inst_exec.direct" }, \ { 0x88, 0x10, C0|C1|C2|C3, "br_inst_exec.direct_near_call" }, \ { 0x88, 0x20, C0|C1|C2|C3, "br_inst_exec.indirect_near_call" }, \ \ { 0x88, 0x04, C0|C1|C2|C3, "br_inst_exec.indirect_non_call" }, \ { 0x88, 0x30, C0|C1|C2|C3, "br_inst_exec.near_calls" }, \ { 0x88, 0x07, C0|C1|C2|C3, "br_inst_exec.non_calls" }, \ \ { 0x88, 0x08, C0|C1|C2|C3, "br_inst_exec.return_near" }, \ { 0x88, 0x40, C0|C1|C2|C3, "br_inst_exec.taken" }, \ { 0x89, 0x7F, C0|C1|C2|C3, "br_misp_exec.any" }, \ \ { 0x89, 0x01, C0|C1|C2|C3, "br_misp_exec.cond" }, \ { 0x89, 0x02, C0|C1|C2|C3, "br_misp_exec.direct" }, \ { 0x89, 0x10, C0|C1|C2|C3, "br_misp_exec.direct_near_call" }, \ \ { 0x89, 0x20, C0|C1|C2|C3, "br_misp_exec.indirect_near_call" }, \ { 0x89, 0x04, C0|C1|C2|C3, "br_misp_exec.indirect_non_call" }, \ { 0x89, 0x30, C0|C1|C2|C3, "br_misp_exec.near_calls" }, \ \ { 0x89, 0x07, C0|C1|C2|C3, "br_misp_exec.non_calls" }, \ { 0x89, 0x08, C0|C1|C2|C3, "br_misp_exec.return_near" }, \ { 0x89, 0x40, C0|C1|C2|C3, "br_misp_exec.taken" }, \ \ { 0x17, 0x01, C0|C1|C2|C3, "inst_queue_writes" }, \ { 0x1E, 0x01, C0|C1|C2|C3, "inst_queue_write_cycles" }, \ { 0xA7, 0x01, C0|C1|C2|C3, "baclear_force_iq" }, \ \ { 0xD0, 0x01, C0|C1|C2|C3, "macro_insts.decoded" }, \ { 0xA6, 0x01, C0|C1|C2|C3, "macro_insts.fusions_decoded" }, \ { 0x19, 0x01, C0|C1|C2|C3, "two_uop_insts_decoded" }, \ \ { 0x18, 0x01, C0|C1|C2|C3, "inst_decoded.dec0" }, \ { 0xD1, 0x04, C0|C1|C2|C3, "uops_decoded.esp_folding" }, \ { 0xD1, 0x08, C0|C1|C2|C3, "uops_decoded.esp_sync" }, \ \ { 0xD1, 0x02, C0|C1|C2|C3, "uops_decoded.ms" }, \ { 0x20, 0x01, C0|C1|C2|C3, "lsd_overflow" }, \ { 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.any" }, \ \ { 0x0E, 0x02, C0|C1|C2|C3, "uops_issued.fused" }, \ { 0xA2, 0x20, C0|C1|C2|C3, "resource_stalls.fpcw" }, \ { 0xA2, 0x02, C0|C1|C2|C3, "resource_stalls.load" }, \ \ { 0xA2, 0x40, C0|C1|C2|C3, "resource_stalls.mxcsr" }, \ { 0xA2, 0x04, C0|C1|C2|C3, "resource_stalls.rs_full" }, \ { 0xA2, 0x08, C0|C1|C2|C3, "resource_stalls.store" }, \ \ { 0xA2, 0x01, C0|C1|C2|C3, "resource_stalls.any" }, \ { 0xD2, 0x01, C0|C1|C2|C3, "rat_stalls.flags" }, \ { 0xD2, 0x02, C0|C1|C2|C3, "rat_stalls.registers" }, \ \ { 0xD2, 0x04, C0|C1|C2|C3, "rat_stalls.rob_read_port" }, \ { 0xD2, 0x08, C0|C1|C2|C3, "rat_stalls.scoreboard" }, \ { 0xD2, 0x0F, C0|C1|C2|C3, "rat_stalls.any" }, \ \ { 0xD4, 0x01, C0|C1|C2|C3, "seg_rename_stalls" }, \ { 0xD5, 0x01, C0|C1|C2|C3, "es_reg_renames" }, \ { 0x10, 0x02, C0|C1|C2|C3, "fp_comp_ops_exe.mmx" }, \ \ { 0x10, 0x80, C0|C1|C2|C3, "fp_comp_ops_exe.sse_double_precision" }, \ { 0x10, 0x04, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp" }, \ { 0x10, 0x10, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_packed" }, \ \ { 0x10, 0x20, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_scalar" }, \ { 0x10, 0x40, C0|C1|C2|C3, "fp_comp_ops_exe.sse_single_precision" }, \ { 0x10, 0x08, C0|C1|C2|C3, "fp_comp_ops_exe.sse2_integer" }, \ \ { 0x10, 0x01, C0|C1|C2|C3, "fp_comp_ops_exe.x87" }, \ { 0x14, 0x01, C0|C1|C2|C3, "arith.cycles_div_busy" }, \ { 0x14, 0x02, C0|C1|C2|C3, "arith.mul" }, \ \ { 0x12, 0x04, C0|C1|C2|C3, "simd_int_128.pack" }, \ { 0x12, 0x20, C0|C1|C2|C3, "simd_int_128.packed_arith" }, \ { 0x12, 0x10, C0|C1|C2|C3, "simd_int_128.packed_logical" }, \ \ { 0x12, 0x01, C0|C1|C2|C3, "simd_int_128.packed_mpy" }, \ { 0x12, 0x02, C0|C1|C2|C3, "simd_int_128.packed_shift" }, \ { 0x12, 0x40, C0|C1|C2|C3, "simd_int_128.shuffle_move" }, \ \ { 0x12, 0x08, C0|C1|C2|C3, "simd_int_128.unpack" }, \ { 0xFD, 0x04, C0|C1|C2|C3, "simd_int_64.pack" }, \ { 0xFD, 0x20, C0|C1|C2|C3, "simd_int_64.packed_arith" }, \ \ { 0xFD, 0x10, C0|C1|C2|C3, "simd_int_64.packed_logical" }, \ { 0xFD, 0x01, C0|C1|C2|C3, "simd_int_64.packed_mpy" }, \ { 0xFD, 0x02, C0|C1|C2|C3, "simd_int_64.packed_shift" }, \ \ { 0xFD, 0x40, C0|C1|C2|C3, "simd_int_64.shuffle_move" }, \ { 0xFD, 0x08, C0|C1|C2|C3, "simd_int_64.unpack" }, \ { 0xB1, 0x01, C0|C1|C2|C3, "uops_executed.port0" }, \ \ { 0xB1, 0x02, C0|C1|C2|C3, "uops_executed.port1" }, \ { 0x40, 0x04, C0|C1, "l1d_cache_ld.e_state" }, \ { 0x40, 0x01, C0|C1, "l1d_cache_ld.i_state" }, \ \ { 0x40, 0x08, C0|C1, "l1d_cache_ld.m_state" }, \ { 0x40, 0x0F, C0|C1, "l1d_cache_ld.mesi" }, \ { 0x40, 0x02, C0|C1, "l1d_cache_ld.s_state" }, \ \ { 0x41, 0x04, C0|C1, "l1d_cache_st.e_state" }, \ { 0x41, 0x08, C0|C1, "l1d_cache_st.m_state" }, \ { 0x41, 0x0F, C0|C1, "l1d_cache_st.mesi" }, \ \ { 0x41, 0x02, C0|C1, "l1d_cache_st.s_state" }, \ { 0x42, 0x04, C0|C1, "l1d_cache_lock.e_state" }, \ { 0x42, 0x01, C0|C1, "l1d_cache_lock.hit" }, \ \ { 0x42, 0x08, C0|C1, "l1d_cache_lock.m_state" }, \ { 0x42, 0x02, C0|C1, "l1d_cache_lock.s_state" }, \ { 0x43, 0x01, C0|C1, "l1d_all_ref.any" }, \ \ { 0x43, 0x02, C0|C1, "l1d_all_ref.cacheable" }, \ { 0x4B, 0x01, C0|C1, "mmx2_mem_exec.nta" }, \ { 0x4C, 0x01, C0|C1, "load_hit_pre" }, \ \ { 0x4E, 0x02, C0|C1, "l1d_prefetch.miss" }, \ { 0x4E, 0x01, C0|C1, "l1d_prefetch.requests" }, \ { 0x4E, 0x04, C0|C1, "l1d_prefetch.triggers" }, \ \ { 0x51, 0x04, C0|C1, "l1d.m_evict" }, \ { 0x51, 0x02, C0|C1, "l1d.m_repl" }, \ { 0x51, 0x08, C0|C1, "l1d.m_snoop_evict" }, \ \ { 0x51, 0x01, C0|C1, "l1d.repl" }, \ { 0x52, 0x01, C0|C1, "l1d_cache_prefetch_lock_fb_hit" }, \ { 0x53, 0x01, C0|C1, "l1d_cache_lock_fb_hit" }, \ \ { 0x63, 0x02, C0|C1, "cache_lock_cycles.l1d" }, \ { 0x63, 0x01, C0|C1, "cache_lock_cycles.l1d_l2" }, \ { 0x06, 0x04, C0|C1|C2|C3, "store_blocks.at_ret" }, \ \ { 0x06, 0x08, C0|C1|C2|C3, "store_blocks.l1d_block" }, \ { 0x06, 0x01, C0|C1|C2|C3, "store_blocks.not_sta" }, \ { 0x06, 0x02, C0|C1|C2|C3, "store_blocks.sta" }, \ \ { 0x13, 0x07, C0|C1|C2|C3, "load_dispatch.any" }, \ { 0x13, 0x04, C0|C1|C2|C3, "load_dispatch.mob" }, \ { 0x13, 0x01, C0|C1|C2|C3, "load_dispatch.rs" }, \ \ { 0x13, 0x02, C0|C1|C2|C3, "load_dispatch.rs_delayed" }, \ { 0x08, 0x01, C0|C1|C2|C3, "dtlb_load_misses.any" }, \ { 0x08, 0x20, C0|C1|C2|C3, "dtlb_load_misses.pde_miss" }, \ \ { 0x08, 0x02, C0|C1|C2|C3, "dtlb_load_misses.walk_completed" }, \ { 0x49, 0x01, C0|C1|C2|C3, "dtlb_misses.any" }, \ { 0x49, 0x10, C0|C1|C2|C3, "dtlb_misses.stlb_hit" }, \ \ { 0x49, 0x02, C0|C1|C2|C3, "dtlb_misses.walk_completed" }, \ { 0x4F, 0x02, C0|C1|C2|C3, "ept.epde_miss" }, \ { 0x4F, 0x08, C0|C1|C2|C3, "ept.epdpe_miss" }, \ \ { 0x85, 0x01, C0|C1|C2|C3, "itlb_misses.any" }, \ { 0x85, 0x02, C0|C1|C2|C3, "itlb_misses.walk_completed" }, \ { 0x24, 0xAA, C0|C1|C2|C3, "l2_rqsts.miss" }, \ \ { 0x24, 0xFF, C0|C1|C2|C3, "l2_rqsts.references" }, \ { 0x24, 0x10, C0|C1|C2|C3, "l2_rqsts.ifetch_hit" }, \ { 0x24, 0x20, C0|C1|C2|C3, "l2_rqsts.ifetch_miss" }, \ \ { 0x24, 0x30, C0|C1|C2|C3, "l2_rqsts.ifetches" }, \ { 0x24, 0x01, C0|C1|C2|C3, "l2_rqsts.ld_hit" }, \ { 0x24, 0x02, C0|C1|C2|C3, "l2_rqsts.ld_miss" }, \ \ { 0x24, 0x03, C0|C1|C2|C3, "l2_rqsts.loads" }, \ { 0x24, 0x40, C0|C1|C2|C3, "l2_rqsts.prefetch_hit" }, \ { 0x24, 0x80, C0|C1|C2|C3, "l2_rqsts.prefetch_miss" }, \ \ { 0x24, 0xC0, C0|C1|C2|C3, "l2_rqsts.prefetches" }, \ { 0x24, 0x04, C0|C1|C2|C3, "l2_rqsts.rfo_hit" }, \ { 0x24, 0x08, C0|C1|C2|C3, "l2_rqsts.rfo_miss" }, \ \ { 0x24, 0x0C, C0|C1|C2|C3, "l2_rqsts.rfos" }, \ { 0x26, 0xFF, C0|C1|C2|C3, "l2_data_rqsts.any" }, \ { 0x26, 0x04, C0|C1|C2|C3, "l2_data_rqsts.demand.e_state" }, \ \ { 0x26, 0x01, C0|C1|C2|C3, "l2_data_rqsts.demand.i_state" }, \ { 0x26, 0x08, C0|C1|C2|C3, "l2_data_rqsts.demand.m_state" }, \ { 0x26, 0x0F, C0|C1|C2|C3, "l2_data_rqsts.demand.mesi" }, \ \ { 0x26, 0x02, C0|C1|C2|C3, "l2_data_rqsts.demand.s_state" }, \ { 0x26, 0x40, C0|C1|C2|C3, "l2_data_rqsts.prefetch.e_state" }, \ { 0x26, 0x10, C0|C1|C2|C3, "l2_data_rqsts.prefetch.i_state" }, \ \ { 0x26, 0x80, C0|C1|C2|C3, "l2_data_rqsts.prefetch.m_state" }, \ { 0x26, 0xF0, C0|C1|C2|C3, "l2_data_rqsts.prefetch.mesi" }, \ { 0x26, 0x20, C0|C1|C2|C3, "l2_data_rqsts.prefetch.s_state" }, \ \ { 0x27, 0x40, C0|C1|C2|C3, "l2_write.lock.e_state" }, \ { 0x27, 0x10, C0|C1|C2|C3, "l2_write.lock.i_state" }, \ { 0x27, 0x20, C0|C1|C2|C3, "l2_write.lock.s_state" }, \ \ { 0x27, 0x0E, C0|C1|C2|C3, "l2_write.rfo.hit" }, \ { 0x27, 0x01, C0|C1|C2|C3, "l2_write.rfo.i_state" }, \ { 0x27, 0x08, C0|C1|C2|C3, "l2_write.rfo.m_state" }, \ \ { 0x27, 0x0F, C0|C1|C2|C3, "l2_write.rfo.mesi" }, \ { 0x27, 0x02, C0|C1|C2|C3, "l2_write.rfo.s_state" }, \ { 0x28, 0x04, C0|C1|C2|C3, "l1d_wb_l2.e_state" }, \ \ { 0x28, 0x01, C0|C1|C2|C3, "l1d_wb_l2.i_state" }, \ { 0x28, 0x08, C0|C1|C2|C3, "l1d_wb_l2.m_state" }, \ { 0xF0, 0x80, C0|C1|C2|C3, "l2_transactions.any" }, \ \ { 0xF0, 0x20, C0|C1|C2|C3, "l2_transactions.fill" }, \ { 0xF0, 0x04, C0|C1|C2|C3, "l2_transactions.ifetch" }, \ { 0xF0, 0x10, C0|C1|C2|C3, "l2_transactions.l1d_wb" }, \ \ { 0xF0, 0x01, C0|C1|C2|C3, "l2_transactions.load" }, \ { 0xF0, 0x08, C0|C1|C2|C3, "l2_transactions.prefetch" }, \ { 0xF0, 0x02, C0|C1|C2|C3, "l2_transactions.rfo" }, \ \ { 0xF0, 0x40, C0|C1|C2|C3, "l2_transactions.wb" }, \ { 0xF1, 0x07, C0|C1|C2|C3, "l2_lines_in.any" }, \ { 0xF1, 0x04, C0|C1|C2|C3, "l2_lines_in.e_state" }, \ \ { 0xF1, 0x02, C0|C1|C2|C3, "l2_lines_in.s_state" }, \ { 0xF2, 0x0F, C0|C1|C2|C3, "l2_lines_out.any" }, \ { 0xF2, 0x01, C0|C1|C2|C3, "l2_lines_out.demand_clean" }, \ \ { 0xF2, 0x02, C0|C1|C2|C3, "l2_lines_out.demand_dirty" }, \ { 0xF2, 0x04, C0|C1|C2|C3, "l2_lines_out.prefetch_clean" }, \ { 0x6C, 0x01, C0|C1|C2|C3, "io_transactions" }, \ \ { 0xB0, 0x80, C0|C1|C2|C3, "offcore_requests.any" }, \ { 0xB0, 0x10, C0|C1|C2|C3, "offcore_requests.any.rfo" }, \ { 0xB0, 0x40, C0|C1|C2|C3, "offcore_requests.l1d_writeback" }, \ \ { 0xB8, 0x01, C0|C1|C2|C3, "snoop_response.hit" }, \ { 0xB8, 0x02, C0|C1|C2|C3, "snoop_response.hite" }, \ { 0xB8, 0x04, C0|C1|C2|C3, "snoop_response.hitm" }, \ \ { 0xF4, 0x10, C0|C1|C2|C3, "sq_misc.split_lock" }, \ { 0x0B, 0x01, C0|C1|C2|C3, "mem_inst_retired.loads" }, \ { 0x0B, 0x02, C0|C1|C2|C3, "mem_inst_retired.stores" }, \ \ { 0xC0, 0x04, C0|C1|C2|C3, "inst_retired.mmx" }, \ { 0xC0, 0x02, C0|C1|C2|C3, "inst_retired.x87" }, \ { 0xC7, 0x04, C0|C1|C2|C3, "ssex_uops_retired.packed_double" }, \ \ { 0xC7, 0x01, C0|C1|C2|C3, "ssex_uops_retired.packed_single" }, \ { 0xC7, 0x08, C0|C1|C2|C3, "ssex_uops_retired.scalar_double" }, \ { 0xC7, 0x02, C0|C1|C2|C3, "ssex_uops_retired.scalar_single" }, \ \ { 0xC7, 0x10, C0|C1|C2|C3, "ssex_uops_retired.vector_integer" }, \ { 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.any" }, \ { 0xC2, 0x04, C0|C1|C2|C3, "uops_retired.macro_fused" }, \ \ { 0xC8, 0x20, C0|C1|C2|C3, "itlb_miss_retired" }, \ { 0xCB, 0x80, C0|C1|C2|C3, "mem_load_retired.dtlb_miss" }, \ { 0xCB, 0x40, C0|C1|C2|C3, "mem_load_retired.hit_lfb" }, \ \ { 0xCB, 0x01, C0|C1|C2|C3, "mem_load_retired.l1d_hit" }, \ { 0xCB, 0x02, C0|C1|C2|C3, "mem_load_retired.l2_hit" }, \ { 0xCB, 0x10, C0|C1|C2|C3, "mem_load_retired.llc_miss" }, \ \ { 0xCB, 0x04, C0|C1|C2|C3, "mem_load_retired.llc_unshared_hit" }, \ { 0xCB, 0x08, C0|C1|C2|C3, "mem_load_retired.other_core_l2_hit_hitm" }, \ { 0x0F, 0x02, C0|C1|C2|C3, "mem_uncore_retired.other_core_l2_hitm" }, \ \ { 0x0F, 0x08, C0|C1|C2|C3, "mem_uncore_retired.remote_cache_local_home_hit" },\ { 0x0F, 0x10, C0|C1|C2|C3, "mem_uncore_retired.remote_dram" }, \ { 0x0F, 0x20, C0|C1|C2|C3, "mem_uncore_retired.local_dram" }, \ \ { 0x0C, 0x01, C0|C1|C2|C3, "mem_store_retired.dtlb_miss" }, \ { 0xC4, 0x01, C0|C1|C2|C3, "br_inst_retired.conditional" }, \ { 0xC4, 0x02, C0|C1|C2|C3, "br_inst_retired.near_call" }, \ \ { 0xC5, 0x02, C0|C1|C2|C3, "br_misp_retired.near_call" }, \ { 0xDB, 0x01, C0|C1|C2|C3, "uop_unfusion" }, \ { 0xF7, 0x01, C0|C1|C2|C3, "fp_assist.all" }, \ \ { 0xF7, 0x04, C0|C1|C2|C3, "fp_assist.input" }, \ { 0xF7, 0x02, C0|C1|C2|C3, "fp_assist.output" }, \ { 0xCC, 0x03, C0|C1|C2|C3, "fp_mmx_trans.any" }, \ \ { 0xCC, 0x01, C0|C1|C2|C3, "fp_mmx_trans.to_fp" }, \ { 0xCC, 0x02, C0|C1|C2|C3, "fp_mmx_trans.to_mmx" }, \ { 0xC3, 0x04, C0|C1|C2|C3, "machine_clears.smc" } #define EVENTS_FAM6_MOD28 \ { 0x2, 0x81, C0|C1, "store_forwards.good" }, \ { 0x6, 0x0, C0|C1, "segment_reg_loads.any" }, \ { 0x7, 0x1, C0|C1, "prefetch.prefetcht0" }, \ { 0x7, 0x6, C0|C1, "prefetch.sw_l2" }, \ { 0x7, 0x8, C0|C1, "prefetch.prefetchnta" }, \ { 0x8, 0x7, C0|C1, "data_tlb_misses.dtlb_miss" }, \ { 0x8, 0x5, C0|C1, "data_tlb_misses.dtlb_miss_ld" }, \ { 0x8, 0x9, C0|C1, "data_tlb_misses.l0_dtlb_miss_ld" }, \ { 0x8, 0x6, C0|C1, "data_tlb_misses.dtlb_miss_st" }, \ { 0xC, 0x3, C0|C1, "page_walks.cycles" }, \ { 0x10, 0x1, C0|C1, "x87_comp_ops_exe.any.s" }, \ { 0x10, 0x81, C0|C1, "x87_comp_ops_exe.any.ar" }, \ { 0x11, 0x1, C0|C1, "fp_assist" }, \ { 0x11, 0x81, C0|C1, "fp_assist.ar" }, \ { 0x12, 0x1, C0|C1, "mul.s" }, \ { 0x12, 0x81, C0|C1, "mul.ar" }, \ { 0x13, 0x1, C0|C1, "div.s" }, \ { 0x13, 0x81, C0|C1, "div.ar" }, \ { 0x14, 0x1, C0|C1, "cycles_div_busy" }, \ { 0x21, 0x0, C0|C1, "l2_ads" }, \ { 0x22, 0x0, C0|C1, "l2_dbus_busy" }, \ { 0x24, 0x0, C0|C1, "l2_lines_in" }, \ { 0x25, 0x0, C0|C1, "l2_m_lines_in" }, \ { 0x26, 0x0, C0|C1, "l2_lines_out" }, \ { 0x27, 0x0, C0|C1, "l2_m_lines_out" }, \ { 0x28, 0x0, C0|C1, "l2_ifetch" }, \ { 0x29, 0x0, C0|C1, "l2_ld" }, \ { 0x2A, 0x0, C0|C1, "l2_st" }, \ { 0x2B, 0x0, C0|C1, "l2_lock" }, \ { 0x2E, 0x0, C0|C1, "l2_rqsts" }, \ { 0x2E, 0x41, C0|C1, "l2_rqsts.self.demand.i_state" }, \ { 0x2E, 0x4F, C0|C1, "l2_rqsts.self.demand.mesi" }, \ { 0x30, 0x0, C0|C1, "l2_reject_bus_q" }, \ { 0x32, 0x0, C0|C1, "l2_no_req" }, \ { 0x3A, 0x0, C0|C1, "eist_trans" }, \ { 0x3B, 0xC0, C0|C1, "thermal_trip" }, \ { 0x3C, 0x0, C0|C1, "cpu_clk_unhalted.core_p" }, \ { 0x3C, 0x1, C0|C1, "cpu_clk_unhalted.bus" }, \ { 0x3C, 0x2, C0|C1, "cpu_clk_unhalted.no_other" }, \ { 0x40, 0x21, C0|C1, "l1d_cache.ld" }, \ { 0x40, 0x22, C0|C1, "l1d_cache.st" }, \ { 0x60, 0x0, C0|C1, "bus_request_outstanding" }, \ { 0x61, 0x0, C0|C1, "bus_bnr_drv" }, \ { 0x62, 0x0, C0|C1, "bus_drdy_clocks" }, \ { 0x63, 0x0, C0|C1, "bus_lock_clocks" }, \ { 0x64, 0x0, C0|C1, "bus_data_rcv" }, \ { 0x65, 0x0, C0|C1, "bus_trans_brd" }, \ { 0x66, 0x0, C0|C1, "bus_trans_rfo" }, \ { 0x67, 0x0, C0|C1, "bus_trans_wb" }, \ { 0x68, 0x0, C0|C1, "bus_trans_ifetch" }, \ { 0x69, 0x0, C0|C1, "bus_trans_inval" }, \ { 0x6A, 0x0, C0|C1, "bus_trans_pwr" }, \ { 0x6B, 0x0, C0|C1, "bus_trans_p" }, \ { 0x6C, 0x0, C0|C1, "bus_trans_io" }, \ { 0x6D, 0x0, C0|C1, "bus_trans_def" }, \ { 0x6E, 0x0, C0|C1, "bus_trans_burst" }, \ { 0x6F, 0x0, C0|C1, "bus_trans_mem" }, \ { 0x70, 0x0, C0|C1, "bus_trans_any" }, \ { 0x77, 0x0, C0|C1, "ext_snoop" }, \ { 0x7A, 0x0, C0|C1, "bus_hit_drv" }, \ { 0x7B, 0x0, C0|C1, "bus_hitm_drv" }, \ { 0x7D, 0x0, C0|C1, "busq_empty" }, \ { 0x7E, 0x0, C0|C1, "snoop_stall_drv" }, \ { 0x7F, 0x0, C0|C1, "bus_io_wait" }, \ { 0x80, 0x3, C0|C1, "icache.accesses" }, \ { 0x80, 0x2, C0|C1, "icache.misses" }, \ { 0x82, 0x4, C0|C1, "itlb.flush" }, \ { 0x82, 0x2, C0|C1, "itlb.misses" }, \ { 0xAA, 0x2, C0|C1, "macro_insts.cisc_decoded" }, \ { 0xAA, 0x3, C0|C1, "macro_insts.all_decoded" }, \ { 0xB0, 0x0, C0|C1, "simd_uops_exec.s" }, \ { 0xB0, 0x80, C0|C1, "simd_uops_exec.ar" }, \ { 0xB1, 0x0, C0|C1, "simd_sat_uop_exec.s" }, \ { 0xB1, 0x80, C0|C1, "simd_sat_uop_exec.ar" }, \ { 0xB3, 0x1, C0|C1, "simd_uop_type_exec.mul.s" }, \ { 0xB3, 0x81, C0|C1, "simd_uop_type_exec.mul.ar" }, \ { 0xB3, 0x02, C0|C1, "simd_uop_type_exec.shift.s" }, \ { 0xB3, 0x82, C0|C1, "simd_uop_type_exec.shift.ar" }, \ { 0xB3, 0x04, C0|C1, "simd_uop_type_exec.pack.s" }, \ { 0xB3, 0x84, C0|C1, "simd_uop_type_exec.pack.ar" }, \ { 0xB3, 0x08, C0|C1, "simd_uop_type_exec.unpack.s" }, \ { 0xB3, 0x88, C0|C1, "simd_uop_type_exec.unpack.ar" }, \ { 0xB3, 0x10, C0|C1, "simd_uop_type_exec.logical.s" }, \ { 0xB3, 0x90, C0|C1, "simd_uop_type_exec.logical.ar" }, \ { 0xB3, 0x20, C0|C1, "simd_uop_type_exec.arithmetic.s" }, \ { 0xB3, 0xA0, C0|C1, "simd_uop_type_exec.arithmetic.ar" }, \ { 0xC2, 0x10, C0|C1, "uops_retired.any" }, \ { 0xC3, 0x1, C0|C1, "machine_clears.smc" }, \ { 0xC4, 0x0, C0|C1, "br_inst_retired.any" }, \ { 0xC4, 0x1, C0|C1, "br_inst_retired.pred_not_taken" }, \ { 0xC4, 0x2, C0|C1, "br_inst_retired.mispred_not_taken" }, \ { 0xC4, 0x4, C0|C1, "br_inst_retired.pred_taken" }, \ { 0xC4, 0x8, C0|C1, "br_inst_retired.mispred_taken" }, \ { 0xC4, 0xA, C0|C1, "br_inst_retired.mispred" }, \ { 0xC4, 0xC, C0|C1, "br_inst_retired.taken" }, \ { 0xC4, 0xF, C0|C1, "br_inst_retired.any1" }, \ { 0xC6, 0x1, C0|C1, "cycles_int_masked.cycles_int_masked" }, \ { 0xC6, 0x2, C0|C1, \ "cycles_int_masked.cycles_int_pending_and_masked" }, \ { 0xC7, 0x1, C0|C1, "simd_inst_retired.packed_single" }, \ { 0xC7, 0x2, C0|C1, "simd_inst_retired.scalar_single" }, \ { 0xC7, 0x4, C0|C1, "simd_inst_retired.packed_double" }, \ { 0xC7, 0x8, C0|C1, "simd_inst_retired.scalar_double" }, \ { 0xC7, 0x10, C0|C1, "simd_inst_retired.vector" }, \ { 0xC7, 0x1F, C0|C1, "simd_inst_retired.any" }, \ { 0xC8, 0x00, C0|C1, "hw_int_rcv" }, \ { 0xCA, 0x1, C0|C1, "simd_comp_inst_retired.packed_single" }, \ { 0xCA, 0x2, C0|C1, "simd_comp_inst_retired.scalar_single" }, \ { 0xCA, 0x4, C0|C1, "simd_comp_inst_retired.packed_double" }, \ { 0xCA, 0x8, C0|C1, "simd_comp_inst_retired.scalar_double" }, \ { 0xCB, 0x1, C0|C1, "mem_load_retired.l2_hit" }, \ { 0xCB, 0x2, C0|C1, "mem_load_retired.l2_miss" }, \ { 0xCB, 0x4, C0|C1, "mem_load_retired.dtlb_miss" }, \ { 0xCD, 0x0, C0|C1, "simd_assist" }, \ { 0xCE, 0x0, C0|C1, "simd_instr_retired" }, \ { 0xCF, 0x0, C0|C1, "simd_sat_instr_retired" }, \ { 0xE0, 0x1, C0|C1, "br_inst_decoded" }, \ { 0xE4, 0x1, C0|C1, "bogus_br" }, \ { 0xE6, 0x1, C0|C1, "baclears.any" } #define EVENTS_FAM6_MOD37 \ { 0xB0, 0x08, C0|C1|C2|C3, "offcore_requests.any.read" }, \ { 0xB0, 0x01, C0|C1|C2|C3, "offcore_requests.demand.read_data" }, \ { 0xB0, 0x04, C0|C1|C2|C3, "offcore_requests.demand.rfo" } static const struct events_table_t *events_table = NULL; const struct events_table_t events_fam6_mod26[] = { EVENTS_FAM6_MOD26, { NT_END, 0, 0, "" } }; const struct events_table_t events_fam6_mod28[] = { EVENTS_FAM6_MOD28, { NT_END, 0, 0, "" } }; const struct events_table_t events_fam6_mod37[] = { EVENTS_FAM6_MOD26, EVENTS_FAM6_MOD37, { NT_END, 0, 0, "" } }; /* * Initialize string containing list of supported general-purpose counter * events for processors of Family 6 Models 15 and 23 */ static void pcbe_init_fam6_model15_23() { const struct nametable_fam6mod15_23 *n; const struct nametable_fam6mod15_23 *picspecific_events; size_t common_size; size_t size; uint64_t i; gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP); /* Calculate space needed to save all the common event names */ common_size = 0; for (n = cmn_gpc_events_f6m15_23; n->event_num != NT_END; n++) { common_size += strlen(n->name) + 1; } for (i = 0; i < num_gpc; i++) { size = 0; switch (i) { case 0: picspecific_events = pic0_events; break; case 1: picspecific_events = pic1_events; break; default: picspecific_events = NULL; break; } if (picspecific_events != NULL) { for (n = picspecific_events; n->event_num != NT_END; n++) { size += strlen(n->name) + 1; } } gpc_names[i] = kmem_alloc(size + common_size + 1, KM_SLEEP); gpc_names[i][0] = '\0'; if (picspecific_events != NULL) { for (n = picspecific_events; n->event_num != NT_END; n++) { (void) strcat(gpc_names[i], n->name); (void) strcat(gpc_names[i], ","); } } for (n = cmn_gpc_events_f6m15_23; n->event_num != NT_END; n++) { (void) strcat(gpc_names[i], n->name); (void) strcat(gpc_names[i], ","); } /* * Remove trailing comma. */ gpc_names[i][common_size + size - 1] = '\0'; } } static int core_pcbe_init(void) { struct cpuid_regs cp; size_t size; uint64_t i; uint64_t j; uint64_t arch_events_vector_length; size_t arch_events_string_length; if (cpuid_getvendor(CPU) != X86_VENDOR_Intel) return (-1); /* Obtain Basic CPUID information */ cp.cp_eax = 0x0; (void) __cpuid_insn(&cp); /* No Architectural Performance Monitoring Leaf returned by CPUID */ if (cp.cp_eax < 0xa) { return (-1); } /* Obtain the Architectural Performance Monitoring Leaf */ cp.cp_eax = 0xa; (void) __cpuid_insn(&cp); versionid = cp.cp_eax & 0xFF; /* * Fixed-Function Counters (FFC) * * All Family 6 Model 15 and Model 23 processors have fixed-function * counters. These counters were made Architectural with * Family 6 Model 15 Stepping 9. */ switch (versionid) { case 0: return (-1); case 2: num_ffc = cp.cp_edx & 0x1F; width_ffc = (cp.cp_edx >> 5) & 0xFF; /* * Some processors have an errata (AW34) where * versionid is reported as 2 when actually 1. * In this case, fixed-function counters are * model-specific as in Version 1. */ if (num_ffc != 0) { break; } /* FALLTHROUGH */ case 1: num_ffc = 3; width_ffc = 40; versionid = 1; break; default: num_ffc = cp.cp_edx & 0x1F; width_ffc = (cp.cp_edx >> 5) & 0xFF; break; } if (num_ffc >= 64) return (-1); /* Set HTT-specific names of architectural & FFC events */ if (x86_feature & X86_HTT) { ffc_names = ffc_names_htt; arch_events_table = arch_events_table_htt; known_arch_events = sizeof (arch_events_table_htt) / sizeof (struct events_table_t); known_ffc_num = sizeof (ffc_names_htt) / sizeof (char *); } else { ffc_names = ffc_names_non_htt; arch_events_table = arch_events_table_non_htt; known_arch_events = sizeof (arch_events_table_non_htt) / sizeof (struct events_table_t); known_ffc_num = sizeof (ffc_names_non_htt) / sizeof (char *); } if (num_ffc >= known_ffc_num) { /* * The system seems to have more fixed-function counters than * what this PCBE is able to handle correctly. Default to the * maximum number of fixed-function counters that this driver * is aware of. */ num_ffc = known_ffc_num - 1; } mask_ffc = BITMASK_XBITS(width_ffc); control_ffc = BITMASK_XBITS(num_ffc); /* * General Purpose Counters (GPC) */ num_gpc = (cp.cp_eax >> 8) & 0xFF; width_gpc = (cp.cp_eax >> 16) & 0xFF; if (num_gpc >= 64) return (-1); mask_gpc = BITMASK_XBITS(width_gpc); control_gpc = BITMASK_XBITS(num_gpc); control_mask = (control_ffc << 32) | control_gpc; total_pmc = num_gpc + num_ffc; if (total_pmc > 64) { /* Too wide for the overflow bitmap */ return (-1); } /* GPC events for Family 6 Models 15 & 23 only */ if ((cpuid_getfamily(CPU) == 6) && ((cpuid_getmodel(CPU) == 15) || (cpuid_getmodel(CPU) == 23))) { (void) snprintf(core_impl_name, IMPL_NAME_LEN, "Core Microarchitecture"); pcbe_init_fam6_model15_23(); return (0); } (void) snprintf(core_impl_name, IMPL_NAME_LEN, "Intel Arch PerfMon v%d on Family %d Model %d", versionid, cpuid_getfamily(CPU), cpuid_getmodel(CPU)); /* * Architectural events */ arch_events_vector_length = (cp.cp_eax >> 24) & 0xFF; ASSERT(known_arch_events == arch_events_vector_length); /* * To handle the case where a new performance monitoring setup is run * on a non-debug kernel */ if (known_arch_events > arch_events_vector_length) { known_arch_events = arch_events_vector_length; } else { arch_events_vector_length = known_arch_events; } arch_events_vector = cp.cp_ebx & BITMASK_XBITS(arch_events_vector_length); /* * Process architectural and non-architectural events using GPC */ if (num_gpc > 0) { gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP); /* Calculate space required for the architectural gpc events */ arch_events_string_length = 0; for (i = 0; i < known_arch_events; i++) { if (((1U << i) & arch_events_vector) == 0) { arch_events_string_length += strlen(arch_events_table[i].name) + 1; } } /* Non-architectural events list */ if (cpuid_getmodel(CPU) == 26) { events_table = events_fam6_mod26; } else if (cpuid_getmodel(CPU) == 28) { events_table = events_fam6_mod28; } else if (cpuid_getmodel(CPU) == 37) { events_table = events_fam6_mod37; } for (i = 0; i < num_gpc; i++) { /* * Determine length of all supported event names * (architectural + non-architectural) */ size = arch_events_string_length; for (j = 0; events_table != NULL && events_table[j].eventselect != NT_END; j++) { if (C(i) & events_table[j].supported_counters) { size += strlen(events_table[j].name) + 1; } } /* Allocate memory for this pics list */ gpc_names[i] = kmem_alloc(size + 1, KM_SLEEP); gpc_names[i][0] = '\0'; if (size == 0) { continue; } /* * Create the list of all supported events * (architectural + non-architectural) */ for (j = 0; j < known_arch_events; j++) { if (((1U << j) & arch_events_vector) == 0) { (void) strcat(gpc_names[i], arch_events_table[j].name); (void) strcat(gpc_names[i], ","); } } for (j = 0; events_table != NULL && events_table[j].eventselect != NT_END; j++) { if (C(i) & events_table[j].supported_counters) { (void) strcat(gpc_names[i], events_table[j].name); (void) strcat(gpc_names[i], ","); } } /* Remove trailing comma */ gpc_names[i][size - 1] = '\0'; } } /* * Fixed-function Counters (FFC) are already listed individually in * ffc_names[] */ return (0); } static uint_t core_pcbe_ncounters() { return (total_pmc); } static const char *core_pcbe_impl_name(void) { return (core_impl_name); } static const char *core_pcbe_cpuref(void) { return (core_cpuref); } static char *core_pcbe_list_events(uint_t picnum) { ASSERT(picnum < cpc_ncounters); if (picnum < num_gpc) { return (gpc_names[picnum]); } else { return (ffc_names[picnum - num_gpc]); } } static char *core_pcbe_list_attrs(void) { if (versionid >= 3) { return ("edge,inv,umask,cmask,anythr"); } else { return ("edge,pc,inv,umask,cmask"); } } static const struct nametable_fam6mod15_23 * find_gpcevent_f6m15_23(char *name, const struct nametable_fam6mod15_23 *nametable) { const struct nametable_fam6mod15_23 *n; int compare_result = -1; for (n = nametable; n->event_num != NT_END; n++) { compare_result = strcmp(name, n->name); if (compare_result <= 0) { break; } } if (compare_result == 0) { return (n); } return (NULL); } static const struct events_table_t * find_gpcevent(char *name) { int i; /* Search architectural events */ for (i = 0; i < known_arch_events; i++) { if (strcmp(name, arch_events_table[i].name) == 0) { if (((1U << i) & arch_events_vector) == 0) { return (&arch_events_table[i]); } } } /* Search non-architectural events */ if (events_table != NULL) { for (i = 0; events_table[i].eventselect != NT_END; i++) { if (strcmp(name, events_table[i].name) == 0) { return (&events_table[i]); } } } return (NULL); } static uint64_t core_pcbe_event_coverage(char *event) { uint64_t bitmap; uint64_t bitmask; const struct events_table_t *n; int i; bitmap = 0; /* Is it an event that a GPC can track? */ if (versionid >= 3) { n = find_gpcevent(event); if (n != NULL) { bitmap |= (n->supported_counters & BITMASK_XBITS(num_gpc)); } } else { if (find_gpcevent_f6m15_23(event, cmn_gpc_events_f6m15_23) != NULL) { bitmap |= BITMASK_XBITS(num_gpc); } else if (find_gpcevent_f6m15_23(event, pic0_events) != NULL) { bitmap |= 1ULL; } else if (find_gpcevent_f6m15_23(event, pic1_events) != NULL) { bitmap |= 1ULL << 1; } } /* Check if the event can be counted in the fixed-function counters */ if (num_ffc > 0) { bitmask = 1ULL << num_gpc; for (i = 0; i < num_ffc; i++) { if (strcmp(event, ffc_names[i]) == 0) { bitmap |= bitmask; } bitmask = bitmask << 1; } } return (bitmap); } static uint64_t core_pcbe_overflow_bitmap(void) { uint64_t interrupt_status; uint64_t intrbits_ffc; uint64_t intrbits_gpc; extern int kcpc_hw_overflow_intr_installed; uint64_t overflow_bitmap; RDMSR(PERF_GLOBAL_STATUS, interrupt_status); WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status); interrupt_status = interrupt_status & control_mask; intrbits_ffc = (interrupt_status >> 32) & control_ffc; intrbits_gpc = interrupt_status & control_gpc; overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc; ASSERT(kcpc_hw_overflow_intr_installed); (*kcpc_hw_enable_cpc_intr)(); return (overflow_bitmap); } static int check_cpc_securitypolicy(core_pcbe_config_t *conf, const struct nametable_fam6mod15_23 *n) { if (conf->core_ctl & n->restricted_bits) { if (secpolicy_cpc_cpu(crgetcred()) != 0) { return (CPC_ATTR_REQUIRES_PRIVILEGE); } } return (0); } static int configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data) { core_pcbe_config_t conf; const struct nametable_fam6mod15_23 *n; const struct nametable_fam6mod15_23 *m; const struct nametable_fam6mod15_23 *picspecific_events; struct nametable_fam6mod15_23 nt_raw = { "", 0x0, 0x0 }; uint_t i; long event_num; const struct events_table_t *eventcode; int umask_known; if (((preset & BITS_EXTENDED_FROM_31) != 0) && ((preset & BITS_EXTENDED_FROM_31) != BITS_EXTENDED_FROM_31)) { /* * Bits beyond bit-31 in the general-purpose counters can only * be written to by extension of bit 31. We cannot preset * these bits to any value other than all 1s or all 0s. */ return (CPC_ATTRIBUTE_OUT_OF_RANGE); } if (versionid >= 3) { eventcode = find_gpcevent(event); if (eventcode != NULL) { if ((C(picnum) & eventcode->supported_counters) == 0) { return (CPC_PIC_NOT_CAPABLE); } conf.core_ctl = eventcode->eventselect; conf.core_ctl |= eventcode->unitmask << CORE_UMASK_SHIFT; umask_known = 1; } else { /* Event specified as raw event code */ if (ddi_strtol(event, NULL, 0, &event_num) != 0) { return (CPC_INVALID_EVENT); } conf.core_ctl = event_num & 0xFF; umask_known = 0; } } else { umask_known = 0; n = find_gpcevent_f6m15_23(event, cmn_gpc_events_f6m15_23); if (n == NULL) { switch (picnum) { case 0: picspecific_events = pic0_events; break; case 1: picspecific_events = pic1_events; break; default: picspecific_events = NULL; break; } if (picspecific_events != NULL) { n = find_gpcevent_f6m15_23(event, picspecific_events); } } if (n == NULL) { /* * Check if this is a case where the event was * specified directly by its event number instead of * its name string. */ if (ddi_strtol(event, NULL, 0, &event_num) != 0) { return (CPC_INVALID_EVENT); } event_num = event_num & 0xFF; /* * Search the event table to find out if the event * specified has an privilege requirements. Currently * none of the pic-specific counters have any privilege * requirements. Hence only the table * cmn_gpc_events_f6m15_23 is searched. */ for (m = cmn_gpc_events_f6m15_23; m->event_num != NT_END; m++) { if (event_num == m->event_num) { break; } } if (m->event_num == NT_END) { nt_raw.event_num = (uint8_t)event_num; n = &nt_raw; } else { n = m; } } conf.core_ctl = n->event_num; /* Event Select */ } conf.core_picno = picnum; conf.core_pictype = CORE_GPC; conf.core_rawpic = preset & mask_gpc; conf.core_pes = GPC_BASE_PES + picnum; conf.core_pmc = GPC_BASE_PMC + picnum; for (i = 0; i < nattrs; i++) { if (strncmp(attrs[i].ka_name, "umask", 6) == 0) { if ((umask_known == 1) || ((attrs[i].ka_val | CORE_UMASK_MASK) != CORE_UMASK_MASK)) { return (CPC_ATTRIBUTE_OUT_OF_RANGE); } conf.core_ctl |= attrs[i].ka_val << CORE_UMASK_SHIFT; } else if (strncmp(attrs[i].ka_name, "edge", 6) == 0) { if (attrs[i].ka_val != 0) conf.core_ctl |= CORE_EDGE; } else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) { if (attrs[i].ka_val != 0) conf.core_ctl |= CORE_INV; } else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) { if ((attrs[i].ka_val | CORE_CMASK_MASK) != CORE_CMASK_MASK) { return (CPC_ATTRIBUTE_OUT_OF_RANGE); } conf.core_ctl |= attrs[i].ka_val << CORE_CMASK_SHIFT; } else if (strncmp(attrs[i].ka_name, "anythr", 7) == 0) { if (versionid < 3) return (CPC_INVALID_ATTRIBUTE); if (secpolicy_cpc_cpu(crgetcred()) != 0) { return (CPC_ATTR_REQUIRES_PRIVILEGE); } if (attrs[i].ka_val != 0) conf.core_ctl |= CORE_ANYTHR; } else { return (CPC_INVALID_ATTRIBUTE); } } if (flags & CPC_COUNT_USER) conf.core_ctl |= CORE_USR; if (flags & CPC_COUNT_SYSTEM) conf.core_ctl |= CORE_OS; if (flags & CPC_OVF_NOTIFY_EMT) conf.core_ctl |= CORE_INT; conf.core_ctl |= CORE_EN; if (versionid < 3) { if (check_cpc_securitypolicy(&conf, n) != 0) { return (CPC_ATTR_REQUIRES_PRIVILEGE); } } *data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP); *((core_pcbe_config_t *)*data) = conf; return (0); } static int configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data) { core_pcbe_config_t *conf; uint_t i; if (picnum - num_gpc >= num_ffc) { return (CPC_INVALID_PICNUM); } if (strcmp(ffc_names[picnum-num_gpc], event) != 0) { return (CPC_INVALID_EVENT); } if ((versionid < 3) && (nattrs != 0)) { return (CPC_INVALID_ATTRIBUTE); } conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP); conf->core_ctl = 0; for (i = 0; i < nattrs; i++) { if (strncmp(attrs[i].ka_name, "anythr", 7) == 0) { if (secpolicy_cpc_cpu(crgetcred()) != 0) { return (CPC_ATTR_REQUIRES_PRIVILEGE); } if (attrs[i].ka_val != 0) { conf->core_ctl |= CORE_FFC_ANYTHR; } } else { kmem_free(conf, sizeof (core_pcbe_config_t)); return (CPC_INVALID_ATTRIBUTE); } } conf->core_picno = picnum; conf->core_pictype = CORE_FFC; conf->core_rawpic = preset & mask_ffc; conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc); /* All fixed-function counters have the same control register */ conf->core_pes = PERF_FIXED_CTR_CTRL; if (flags & CPC_COUNT_USER) conf->core_ctl |= CORE_FFC_USR_EN; if (flags & CPC_COUNT_SYSTEM) conf->core_ctl |= CORE_FFC_OS_EN; if (flags & CPC_OVF_NOTIFY_EMT) conf->core_ctl |= CORE_FFC_PMI; *data = conf; return (0); } /*ARGSUSED*/ static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset, uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data, void *token) { int ret; core_pcbe_config_t *conf; /* * If we've been handed an existing configuration, we need only preset * the counter value. */ if (*data != NULL) { conf = *data; ASSERT(conf->core_pictype == CORE_GPC || conf->core_pictype == CORE_FFC); if (conf->core_pictype == CORE_GPC) conf->core_rawpic = preset & mask_gpc; else /* CORE_FFC */ conf->core_rawpic = preset & mask_ffc; return (0); } if (picnum >= total_pmc) { return (CPC_INVALID_PICNUM); } if (picnum < num_gpc) { ret = configure_gpc(picnum, event, preset, flags, nattrs, attrs, data); } else { ret = configure_ffc(picnum, event, preset, flags, nattrs, attrs, data); } return (ret); } static void core_pcbe_program(void *token) { core_pcbe_config_t *cfg; uint64_t perf_global_ctrl; uint64_t perf_fixed_ctr_ctrl; uint64_t curcr4; core_pcbe_allstop(); curcr4 = getcr4(); if (kcpc_allow_nonpriv(token)) /* Allow RDPMC at any ring level */ setcr4(curcr4 | CR4_PCE); else /* Allow RDPMC only at ring 0 */ setcr4(curcr4 & ~CR4_PCE); /* Clear any overflow indicators before programming the counters */ WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask); cfg = NULL; perf_global_ctrl = 0; perf_fixed_ctr_ctrl = 0; cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL); while (cfg != NULL) { ASSERT(cfg->core_pictype == CORE_GPC || cfg->core_pictype == CORE_FFC); if (cfg->core_pictype == CORE_GPC) { /* * General-purpose counter registers have write * restrictions where only the lower 32-bits can be * written to. The rest of the relevant bits are * written to by extension from bit 31 (all ZEROS if * bit-31 is ZERO and all ONE if bit-31 is ONE). This * makes it possible to write to the counter register * only values that have all ONEs or all ZEROs in the * higher bits. */ if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) || ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == BITS_EXTENDED_FROM_31)) { /* * Straighforward case where the higher bits * are all ZEROs or all ONEs. */ WRMSR(cfg->core_pmc, (cfg->core_rawpic & mask_gpc)); } else { /* * The high order bits are not all the same. * We save what is currently in the registers * and do not write to it. When we want to do * a read from this register later (in * core_pcbe_sample()), we subtract the value * we save here to get the actual event count. * * NOTE: As a result, we will not get overflow * interrupts as expected. */ RDMSR(cfg->core_pmc, cfg->core_rawpic); cfg->core_rawpic = cfg->core_rawpic & mask_gpc; } WRMSR(cfg->core_pes, cfg->core_ctl); perf_global_ctrl |= 1ull << cfg->core_picno; } else { /* * Unlike the general-purpose counters, all relevant * bits of fixed-function counters can be written to. */ WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc); /* * Collect the control bits for all the * fixed-function counters and write it at one shot * later in this function */ perf_fixed_ctr_ctrl |= cfg->core_ctl << ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE); perf_global_ctrl |= 1ull << (cfg->core_picno - num_gpc + 32); } cfg = (core_pcbe_config_t *) kcpc_next_config(token, cfg, NULL); } /* Enable all the counters */ WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl); WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl); } static void core_pcbe_allstop(void) { /* Disable all the counters together */ WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED); setcr4(getcr4() & ~CR4_PCE); } static void core_pcbe_sample(void *token) { uint64_t *daddr; uint64_t curpic; core_pcbe_config_t *cfg; uint64_t counter_mask; cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr); while (cfg != NULL) { ASSERT(cfg->core_pictype == CORE_GPC || cfg->core_pictype == CORE_FFC); curpic = rdmsr(cfg->core_pmc); DTRACE_PROBE4(core__pcbe__sample, uint64_t, cfg->core_pmc, uint64_t, curpic, uint64_t, cfg->core_rawpic, uint64_t, *daddr); if (cfg->core_pictype == CORE_GPC) { counter_mask = mask_gpc; } else { counter_mask = mask_ffc; } curpic = curpic & counter_mask; if (curpic >= cfg->core_rawpic) { *daddr += curpic - cfg->core_rawpic; } else { /* Counter overflowed since our last sample */ *daddr += counter_mask - (cfg->core_rawpic - curpic) + 1; } cfg->core_rawpic = *daddr & counter_mask; cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr); } } static void core_pcbe_free(void *config) { kmem_free(config, sizeof (core_pcbe_config_t)); } static struct modlpcbe core_modlpcbe = { &mod_pcbeops, "Core Performance Counters", &core_pcbe_ops }; static struct modlinkage core_modl = { MODREV_1, &core_modlpcbe, }; int _init(void) { if (core_pcbe_init() != 0) { return (ENOTSUP); } return (mod_install(&core_modl)); } int _fini(void) { return (mod_remove(&core_modl)); } int _info(struct modinfo *mi) { return (mod_info(&core_modl, mi)); }