blob: df8e736f79064ad27e593c31eb5aba6c15a7bcc8 [file] [log] [blame]
#include "x86/msr.h"
#include "x86/processor.h"
#include "x86/pmu.h"
#include "x86/isr.h"
#include "x86/apic.h"
#include "x86/apic-defs.h"
#include "x86/desc.h"
#include "alloc.h"
#include "vm.h"
#include "processor.h"
#include "vmalloc.h"
#include "alloc_page.h"
/* bits [63:48] provides the size of the current record in bytes */
#define RECORD_SIZE_OFFSET 48
static unsigned int max_nr_gp_events;
static unsigned long *ds_bufer;
static unsigned long *pebs_buffer;
static u64 ctr_start_val;
static bool has_baseline;
struct debug_store {
u64 bts_buffer_base;
u64 bts_index;
u64 bts_absolute_maximum;
u64 bts_interrupt_threshold;
u64 pebs_buffer_base;
u64 pebs_index;
u64 pebs_absolute_maximum;
u64 pebs_interrupt_threshold;
u64 pebs_event_reset[64];
};
struct pebs_basic {
u64 format_size;
u64 ip;
u64 applicable_counters;
u64 tsc;
};
struct pebs_meminfo {
u64 address;
u64 aux;
u64 latency;
u64 tsx_tuning;
};
struct pebs_gprs {
u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
u64 r8, r9, r10, r11, r12, r13, r14, r15;
};
struct pebs_xmm {
u64 xmm[16*2]; /* two entries for each register */
};
struct lbr_entry {
u64 from;
u64 to;
u64 info;
};
enum pmc_type {
GP = 0,
FIXED,
};
static uint32_t intel_arch_events[] = {
0x00c4, /* PERF_COUNT_HW_BRANCH_INSTRUCTIONS */
0x00c5, /* PERF_COUNT_HW_BRANCH_MISSES */
0x0300, /* PERF_COUNT_HW_REF_CPU_CYCLES */
0x003c, /* PERF_COUNT_HW_CPU_CYCLES */
0x00c0, /* PERF_COUNT_HW_INSTRUCTIONS */
0x013c, /* PERF_COUNT_HW_BUS_CYCLES */
0x4f2e, /* PERF_COUNT_HW_CACHE_REFERENCES */
0x412e, /* PERF_COUNT_HW_CACHE_MISSES */
};
/* Iterating each counter value is a waste of time, pick a few typical values. */
static u64 counter_start_values[] = {
/* if PEBS counter doesn't overflow at all */
0,
0xfffffffffff0,
/* normal counter overflow to have PEBS records */
0xfffffffffffe,
/* test whether emulated instructions should trigger PEBS */
0xffffffffffff,
};
static unsigned int get_pebs_record_size(u64 pebs_data_cfg, bool use_adaptive)
{
unsigned int sz = sizeof(struct pebs_basic);
if (!use_adaptive)
return sz;
if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
sz += sizeof(struct pebs_meminfo);
if (pebs_data_cfg & PEBS_DATACFG_GPRS)
sz += sizeof(struct pebs_gprs);
if (pebs_data_cfg & PEBS_DATACFG_XMMS)
sz += sizeof(struct pebs_xmm);
if (pebs_data_cfg & PEBS_DATACFG_LBRS)
sz += MAX_NUM_LBR_ENTRY * sizeof(struct lbr_entry);
return sz;
}
static void cnt_overflow(isr_regs_t *regs)
{
apic_write(APIC_EOI, 0);
}
static inline void workload(void)
{
asm volatile(
"mov $0x0, %%eax\n"
"cmp $0x0, %%eax\n"
"jne label2\n"
"jne label2\n"
"jne label2\n"
"jne label2\n"
"mov $0x0, %%eax\n"
"cmp $0x0, %%eax\n"
"jne label2\n"
"jne label2\n"
"jne label2\n"
"jne label2\n"
"mov $0xa, %%eax\n"
"cpuid\n"
"mov $0xa, %%eax\n"
"cpuid\n"
"mov $0xa, %%eax\n"
"cpuid\n"
"mov $0xa, %%eax\n"
"cpuid\n"
"mov $0xa, %%eax\n"
"cpuid\n"
"mov $0xa, %%eax\n"
"cpuid\n"
"label2:\n"
:
:
: "eax", "ebx", "ecx", "edx");
}
static inline void workload2(void)
{
asm volatile(
"mov $0x0, %%eax\n"
"cmp $0x0, %%eax\n"
"jne label3\n"
"jne label3\n"
"jne label3\n"
"jne label3\n"
"mov $0x0, %%eax\n"
"cmp $0x0, %%eax\n"
"jne label3\n"
"jne label3\n"
"jne label3\n"
"jne label3\n"
"mov $0xa, %%eax\n"
"cpuid\n"
"mov $0xa, %%eax\n"
"cpuid\n"
"mov $0xa, %%eax\n"
"cpuid\n"
"mov $0xa, %%eax\n"
"cpuid\n"
"mov $0xa, %%eax\n"
"cpuid\n"
"mov $0xa, %%eax\n"
"cpuid\n"
"label3:\n"
:
:
: "eax", "ebx", "ecx", "edx");
}
static void alloc_buffers(void)
{
ds_bufer = alloc_page();
force_4k_page(ds_bufer);
memset(ds_bufer, 0x0, PAGE_SIZE);
pebs_buffer = alloc_page();
force_4k_page(pebs_buffer);
memset(pebs_buffer, 0x0, PAGE_SIZE);
}
static void free_buffers(void)
{
if (ds_bufer)
free_page(ds_bufer);
if (pebs_buffer)
free_page(pebs_buffer);
}
static void pebs_enable(u64 bitmask, u64 pebs_data_cfg, bool use_adaptive)
{
static struct debug_store *ds;
u64 adaptive_ctrl = 0, fixed_ctr_ctrl = 0;
unsigned int idx;
if (has_baseline)
wrmsr(MSR_PEBS_DATA_CFG, pebs_data_cfg);
ds = (struct debug_store *)ds_bufer;
ds->pebs_index = ds->pebs_buffer_base = (unsigned long)pebs_buffer;
ds->pebs_absolute_maximum = (unsigned long)pebs_buffer + PAGE_SIZE;
ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
get_pebs_record_size(pebs_data_cfg, use_adaptive);
for (idx = 0; idx < pmu.nr_fixed_counters; idx++) {
if (!(BIT_ULL(FIXED_CNT_INDEX + idx) & bitmask))
continue;
if (use_adaptive)
adaptive_ctrl = BIT(FIXED_CNT_INDEX + idx * 4);
wrmsr(MSR_PERF_FIXED_CTRx(idx), ctr_start_val);
fixed_ctr_ctrl |= (0xbULL << (idx * 4) | adaptive_ctrl);
}
if (fixed_ctr_ctrl)
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, fixed_ctr_ctrl);
for (idx = 0; idx < max_nr_gp_events; idx++) {
if (!(BIT_ULL(idx) & bitmask))
continue;
if (use_adaptive)
adaptive_ctrl = ICL_EVENTSEL_ADAPTIVE;
wrmsr(MSR_GP_EVENT_SELECTx(idx), EVNTSEL_EN | EVNTSEL_OS | EVNTSEL_USR |
intel_arch_events[idx] | adaptive_ctrl);
wrmsr(MSR_GP_COUNTERx(idx), ctr_start_val);
}
wrmsr(MSR_IA32_DS_AREA, (unsigned long)ds_bufer);
wrmsr(MSR_IA32_PEBS_ENABLE, bitmask);
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, bitmask);
}
static void reset_pebs(void)
{
memset(ds_bufer, 0x0, PAGE_SIZE);
memset(pebs_buffer, 0x0, PAGE_SIZE);
wrmsr(MSR_IA32_PEBS_ENABLE, 0);
wrmsr(MSR_IA32_DS_AREA, 0);
if (has_baseline)
wrmsr(MSR_PEBS_DATA_CFG, 0);
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
pmu_reset_all_counters();
}
static void pebs_disable(unsigned int idx)
{
/*
* If we only clear the PEBS_ENABLE bit, the counter will continue to increment.
* In this very tiny time window, if the counter overflows no pebs record will be generated,
* but a normal counter irq. Test this fully with two ways.
*/
if (idx % 2)
wrmsr(MSR_IA32_PEBS_ENABLE, 0);
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
}
static void check_pebs_records(u64 bitmask, u64 pebs_data_cfg, bool use_adaptive)
{
struct pebs_basic *pebs_rec = (struct pebs_basic *)pebs_buffer;
struct debug_store *ds = (struct debug_store *)ds_bufer;
unsigned int pebs_record_size;
unsigned int count = 0;
bool expected, pebs_idx_match, pebs_size_match, data_cfg_match;
void *cur_record;
expected = (ds->pebs_index == ds->pebs_buffer_base) && !pebs_rec->format_size;
if (!(rdmsr(MSR_CORE_PERF_GLOBAL_STATUS) & GLOBAL_STATUS_BUFFER_OVF)) {
report(expected, "No OVF irq, none PEBS records.");
return;
}
if (expected) {
report(!expected, "A OVF irq, but none PEBS records.");
return;
}
expected = ds->pebs_index >= ds->pebs_interrupt_threshold;
cur_record = (void *)pebs_buffer;
do {
pebs_rec = (struct pebs_basic *)cur_record;
pebs_record_size = pebs_rec->format_size >> RECORD_SIZE_OFFSET;
pebs_idx_match = pebs_rec->applicable_counters & bitmask;
pebs_size_match = pebs_record_size == get_pebs_record_size(pebs_data_cfg, use_adaptive);
data_cfg_match = (pebs_rec->format_size & GENMASK_ULL(47, 0)) == pebs_data_cfg;
expected = pebs_idx_match && pebs_size_match && data_cfg_match;
report(expected,
"PEBS record (written seq %d) is verified (including size, counters and cfg).", count);
if (use_adaptive && (pebs_data_cfg & PEBS_DATACFG_LBRS)) {
unsigned int lbrs_offset = get_pebs_record_size(pebs_data_cfg & ~PEBS_DATACFG_LBRS, true);
struct lbr_entry *pebs_lbrs = cur_record + lbrs_offset;
int i;
for (i = 0; i < MAX_NUM_LBR_ENTRY; i++) {
if (!pebs_lbrs[i].from && !pebs_lbrs[i].to)
continue;
report_fail("PEBS LBR record %u isn't empty, got from = '%lx', to = '%lx', info = '%lx'",
i, pebs_lbrs[i].from, pebs_lbrs[i].to, pebs_lbrs[i].info);
}
}
cur_record = cur_record + pebs_record_size;
count++;
} while (expected && (void *)cur_record < (void *)ds->pebs_index);
if (!expected) {
if (!pebs_idx_match)
printf("FAIL: The applicable_counters (0x%lx) doesn't match with pmc_bitmask (0x%lx).\n",
pebs_rec->applicable_counters, bitmask);
if (!pebs_size_match)
printf("FAIL: The pebs_record_size (%d) doesn't match with expected record size (%d).\n",
pebs_record_size, get_pebs_record_size(pebs_data_cfg, use_adaptive));
if (!data_cfg_match)
printf("FAIL: The pebs_data_cfg (0x%lx) doesn't match with the effective MSR_PEBS_DATA_CFG (0x%lx).\n",
pebs_rec->format_size & 0xffffffffffff, use_adaptive ? pebs_data_cfg : 0);
}
}
static void check_one_counter(enum pmc_type type, unsigned int idx,
u64 pebs_data_cfg, bool use_adaptive)
{
int pebs_bit = BIT_ULL(type == FIXED ? FIXED_CNT_INDEX + idx : idx);
report_prefix_pushf("%s counter %d (0x%lx)",
type == FIXED ? "Extended Fixed" : "GP", idx, ctr_start_val);
reset_pebs();
pebs_enable(pebs_bit, pebs_data_cfg, use_adaptive);
workload();
pebs_disable(idx);
check_pebs_records(pebs_bit, pebs_data_cfg, use_adaptive);
report_prefix_pop();
}
/* more than one PEBS records will be generated. */
static void check_multiple_counters(u64 bitmask, u64 pebs_data_cfg,
bool use_adaptive)
{
reset_pebs();
pebs_enable(bitmask, pebs_data_cfg, use_adaptive);
workload2();
pebs_disable(0);
check_pebs_records(bitmask, pebs_data_cfg, use_adaptive);
}
static void check_pebs_counters(u64 pebs_data_cfg, bool use_adaptive)
{
unsigned int idx;
u64 bitmask = 0;
for (idx = 0; has_baseline && idx < pmu.nr_fixed_counters; idx++)
check_one_counter(FIXED, idx, pebs_data_cfg, use_adaptive);
for (idx = 0; idx < max_nr_gp_events; idx++)
check_one_counter(GP, idx, pebs_data_cfg, use_adaptive);
for (idx = 0; has_baseline && idx < pmu.nr_fixed_counters; idx++)
bitmask |= BIT_ULL(FIXED_CNT_INDEX + idx);
for (idx = 0; idx < max_nr_gp_events; idx += 2)
bitmask |= BIT_ULL(idx);
report_prefix_pushf("Multiple (0x%lx)", bitmask);
check_multiple_counters(bitmask, pebs_data_cfg, use_adaptive);
report_prefix_pop();
}
/*
* Known reasons for none PEBS records:
* 1. The selected event does not support PEBS;
* 2. From a core pmu perspective, the vCPU and pCPU models are not same;
* 3. Guest counter has not yet overflowed or been cross-mapped by the host;
*/
int main(int ac, char **av)
{
unsigned int i, j;
setup_vm();
max_nr_gp_events = MIN(pmu.nr_gp_counters, ARRAY_SIZE(intel_arch_events));
printf("PMU version: %d\n", pmu.version);
has_baseline = pmu_has_pebs_baseline();
if (pmu_has_full_writes())
pmu_activate_full_writes();
if (!pmu.is_intel) {
report_skip("PEBS requires Intel ICX or later, non-Intel detected");
return report_summary();
} else if (!pmu_has_pebs()) {
report_skip("PEBS required PMU version 2, reported version is %d", pmu.version);
return report_summary();
} else if (!pmu_pebs_format()) {
report_skip("PEBS not enumerated in PERF_CAPABILITIES");
return report_summary();
} else if (rdmsr(MSR_IA32_MISC_ENABLE) & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL) {
report_skip("PEBS unavailable according to MISC_ENABLE");
return report_summary();
}
printf("PEBS format: %d\n", pmu_pebs_format());
printf("PEBS GP counters: %d\n", pmu.nr_gp_counters);
printf("PEBS Fixed counters: %d\n", pmu.nr_fixed_counters);
printf("PEBS baseline (Adaptive PEBS): %d\n", has_baseline);
handle_irq(PMI_VECTOR, cnt_overflow);
alloc_buffers();
for (i = 0; i < ARRAY_SIZE(counter_start_values); i++) {
ctr_start_val = counter_start_values[i];
check_pebs_counters(0, false);
if (!has_baseline)
continue;
for (j = 0; j <= PEBS_DATACFG_MASK; j++) {
u64 pebs_data_cfg = j;
if (pebs_data_cfg & PEBS_DATACFG_LBRS)
pebs_data_cfg |= ((MAX_NUM_LBR_ENTRY -1) << PEBS_DATACFG_LBR_SHIFT);
report_prefix_pushf("Adaptive (0x%lx)", pebs_data_cfg);
check_pebs_counters(pebs_data_cfg, true);
report_prefix_pop();
report_prefix_pushf("Ignored Adaptive (0x%lx)", pebs_data_cfg);
check_pebs_counters(pebs_data_cfg, false);
report_prefix_pop();
}
}
free_buffers();
return report_summary();
}