perf_counter: Ignore the nmi call frames in the x86-64 backtraces
About every callchains recorded with perf record are filled up
including the internal perfcounter nmi frame:
perf_callchain
perf_counter_overflow
intel_pmu_handle_irq
perf_counter_nmi_handler
notifier_call_chain
atomic_notifier_call_chain
notify_die
do_nmi
nmi
We want ignore this frame as it's not interesting for
instrumentation. To solve this, we simply ignore every frames
from nmi context.
New example of "perf report -s sym -c" after this patch:
9.59% [k] search_by_key
4.88%
search_by_key
reiserfs_read_locked_inode
reiserfs_iget
reiserfs_lookup
do_lookup
__link_path_walk
path_walk
do_path_lookup
user_path_at
vfs_fstatat
vfs_lstat
sys_newlstat
system_call_fastpath
__lxstat
0x406fb1
3.19%
search_by_key
search_by_entry_key
reiserfs_find_entry
reiserfs_lookup
do_lookup
__link_path_walk
path_walk
do_path_lookup
user_path_at
vfs_fstatat
vfs_lstat
sys_newlstat
system_call_fastpath
__lxstat
0x406fb1
[...]
For now this patch only solves the problem in x86-64.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1246474930-6088-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d4cf4ce..36c3dc7 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1561,6 +1561,7 @@
static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
+static DEFINE_PER_CPU(int, in_nmi_frame);
static void
@@ -1576,7 +1577,9 @@
static int backtrace_stack(void *data, char *name)
{
- /* Process all stacks: */
+ per_cpu(in_nmi_frame, smp_processor_id()) =
+ x86_is_stack_id(NMI_STACK, name);
+
return 0;
}
@@ -1584,6 +1587,9 @@
{
struct perf_callchain_entry *entry = data;
+ if (per_cpu(in_nmi_frame, smp_processor_id()))
+ return;
+
if (reliable)
callchain_store(entry, addr);
}
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index d593cd1..bca5fba 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -19,6 +19,12 @@
#include "dumpstack.h"
+/* Just a stub for now */
+int x86_is_stack_id(int id, char *name)
+{
+ return 0;
+}
+
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index d35db59..54b0a32 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -19,10 +19,8 @@
#include "dumpstack.h"
-static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
- unsigned *usedp, char **idp)
-{
- static char ids[][8] = {
+
+static char x86_stack_ids[][8] = {
[DEBUG_STACK - 1] = "#DB",
[NMI_STACK - 1] = "NMI",
[DOUBLEFAULT_STACK - 1] = "#DF",
@@ -33,6 +31,15 @@
N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
#endif
};
+
+int x86_is_stack_id(int id, char *name)
+{
+ return x86_stack_ids[id - 1] == name;
+}
+
+static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
+ unsigned *usedp, char **idp)
+{
unsigned k;
/*
@@ -61,7 +68,7 @@
if (*usedp & (1U << k))
break;
*usedp |= 1U << k;
- *idp = ids[k];
+ *idp = x86_stack_ids[k];
return (unsigned long *)end;
}
/*
@@ -81,12 +88,13 @@
do {
++j;
end -= EXCEPTION_STKSZ;
- ids[j][4] = '1' + (j - N_EXCEPTION_STACKS);
+ x86_stack_ids[j][4] = '1' +
+ (j - N_EXCEPTION_STACKS);
} while (stack < end - EXCEPTION_STKSZ);
if (*usedp & (1U << j))
break;
*usedp |= 1U << j;
- *idp = ids[j];
+ *idp = x86_stack_ids[j];
return (unsigned long *)end;
}
#endif