| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * dlfilter-show-cycles.c: Print the number of cycles at the start of each line |
| * Copyright (c) 2021, Intel Corporation. |
| */ |
| #include <perf/perf_dlfilter.h> |
| #include <string.h> |
| #include <stdio.h> |
| |
| #define MAX_CPU 4096 |
| |
| enum { |
| INSTR_CYC, |
| BRNCH_CYC, |
| OTHER_CYC, |
| MAX_ENTRY |
| }; |
| |
| static __u64 cycles[MAX_CPU][MAX_ENTRY]; |
| static __u64 cycles_rpt[MAX_CPU][MAX_ENTRY]; |
| |
| #define BITS 16 |
| #define TABLESZ (1 << BITS) |
| #define TABLEMAX (TABLESZ / 2) |
| #define MASK (TABLESZ - 1) |
| |
| static struct entry { |
| __u32 used; |
| __s32 tid; |
| __u64 cycles[MAX_ENTRY]; |
| __u64 cycles_rpt[MAX_ENTRY]; |
| } table[TABLESZ]; |
| |
| static int tid_cnt; |
| |
| static int event_entry(const char *event) |
| { |
| if (!event) |
| return OTHER_CYC; |
| if (!strncmp(event, "instructions", 12)) |
| return INSTR_CYC; |
| if (!strncmp(event, "branches", 8)) |
| return BRNCH_CYC; |
| return OTHER_CYC; |
| } |
| |
| static struct entry *find_entry(__s32 tid) |
| { |
| __u32 pos = tid & MASK; |
| struct entry *e; |
| |
| e = &table[pos]; |
| while (e->used) { |
| if (e->tid == tid) |
| return e; |
| if (++pos == TABLESZ) |
| pos = 0; |
| e = &table[pos]; |
| } |
| |
| if (tid_cnt >= TABLEMAX) { |
| fprintf(stderr, "Too many threads\n"); |
| return NULL; |
| } |
| |
| tid_cnt += 1; |
| e->used = 1; |
| e->tid = tid; |
| return e; |
| } |
| |
| static void add_entry(__s32 tid, int pos, __u64 cnt) |
| { |
| struct entry *e = find_entry(tid); |
| |
| if (e) |
| e->cycles[pos] += cnt; |
| } |
| |
| int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx) |
| { |
| __s32 cpu = sample->cpu; |
| __s32 tid = sample->tid; |
| int pos; |
| |
| if (!sample->cyc_cnt) |
| return 0; |
| |
| pos = event_entry(sample->event); |
| |
| if (cpu >= 0 && cpu < MAX_CPU) |
| cycles[cpu][pos] += sample->cyc_cnt; |
| else if (tid != -1) |
| add_entry(tid, pos, sample->cyc_cnt); |
| return 0; |
| } |
| |
| static void print_vals(__u64 cycles, __u64 delta) |
| { |
| if (delta) |
| printf("%10llu %10llu ", (unsigned long long)cycles, (unsigned long long)delta); |
| else |
| printf("%10llu %10s ", (unsigned long long)cycles, ""); |
| } |
| |
| int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx) |
| { |
| __s32 cpu = sample->cpu; |
| __s32 tid = sample->tid; |
| int pos; |
| |
| pos = event_entry(sample->event); |
| |
| if (cpu >= 0 && cpu < MAX_CPU) { |
| print_vals(cycles[cpu][pos], cycles[cpu][pos] - cycles_rpt[cpu][pos]); |
| cycles_rpt[cpu][pos] = cycles[cpu][pos]; |
| return 0; |
| } |
| |
| if (tid != -1) { |
| struct entry *e = find_entry(tid); |
| |
| if (e) { |
| print_vals(e->cycles[pos], e->cycles[pos] - e->cycles_rpt[pos]); |
| e->cycles_rpt[pos] = e->cycles[pos]; |
| return 0; |
| } |
| } |
| |
| printf("%22s", ""); |
| return 0; |
| } |
| |
| const char *filter_description(const char **long_description) |
| { |
| static char *long_desc = "Cycle counts are accumulated per CPU (or " |
| "per thread if CPU is not recorded) from IPC information, and " |
| "printed together with the change since the last print, at the " |
| "start of each line. Separate counts are kept for branches, " |
| "instructions or other events."; |
| |
| *long_description = long_desc; |
| return "Print the number of cycles at the start of each line"; |
| } |