tracing: Cache comms only after an event occurred
Whenever an event is registered, the comm of tasks are saved at
every task switch instead of saving them at every event. But if
an event isn't executed much, the comm cache will be filled up
by tasks that did not record the event and you lose out on the comms
that did.
Here's an example, if you enable the following events:
echo 1 > /debug/tracing/events/kvm/kvm_cr/enable
echo 1 > /debug/tracing/events/net/net_dev_xmit/enable
Note, there's no kvm running on this machine so the first event will
never be triggered, but because it is enabled, the storing of comms
will continue. If we now disable the network event:
echo 0 > /debug/tracing/events/net/net_dev_xmit/enable
and look at the trace:
cat /debug/tracing/trace
sshd-2672 [001] ..s2 375.731616: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=242 rc=0
sshd-2672 [001] ..s1 375.731617: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=242 rc=0
sshd-2672 [001] ..s2 375.859356: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=242 rc=0
sshd-2672 [001] ..s1 375.859357: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=242 rc=0
sshd-2672 [001] ..s2 375.947351: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=242 rc=0
sshd-2672 [001] ..s1 375.947352: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=242 rc=0
sshd-2672 [001] ..s2 376.035383: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=242 rc=0
sshd-2672 [001] ..s1 376.035383: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=242 rc=0
sshd-2672 [001] ..s2 377.563806: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=226 rc=0
sshd-2672 [001] ..s1 377.563807: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=226 rc=0
sshd-2672 [001] ..s2 377.563834: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6be0 len=114 rc=0
sshd-2672 [001] ..s1 377.563842: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6be0 len=114 rc=0
We see that process 2672 which triggered the events has the comm "sshd".
But if we run hackbench for a bit and look again:
cat /debug/tracing/trace
<...>-2672 [001] ..s2 375.731616: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=242 rc=0
<...>-2672 [001] ..s1 375.731617: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=242 rc=0
<...>-2672 [001] ..s2 375.859356: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=242 rc=0
<...>-2672 [001] ..s1 375.859357: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=242 rc=0
<...>-2672 [001] ..s2 375.947351: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=242 rc=0
<...>-2672 [001] ..s1 375.947352: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=242 rc=0
<...>-2672 [001] ..s2 376.035383: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=242 rc=0
<...>-2672 [001] ..s1 376.035383: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=242 rc=0
<...>-2672 [001] ..s2 377.563806: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=226 rc=0
<...>-2672 [001] ..s1 377.563807: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=226 rc=0
<...>-2672 [001] ..s2 377.563834: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6be0 len=114 rc=0
<...>-2672 [001] ..s1 377.563842: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6be0 len=114 rc=0
The stored "sshd" comm has been flushed out and we get a useless "<...>".
But by only storing comms after a trace event occurred, we can run
hackbench all day and still get the same output.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b90a827..88111b0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -78,6 +78,13 @@
}
/*
+ * To prevent the comm cache from being overwritten when no
+ * tracing is active, only save the comm when a trace event
+ * occurred.
+ */
+static DEFINE_PER_CPU(bool, trace_cmdline_save);
+
+/*
* Kill all tracing for good (never come back).
* It is initialized to 1 but will turn to zero if the initialization
* of the tracer is successful. But that is the only place that sets
@@ -1135,6 +1142,11 @@
!tracing_is_on())
return;
+ if (!__this_cpu_read(trace_cmdline_save))
+ return;
+
+ __this_cpu_write(trace_cmdline_save, false);
+
trace_save_cmdline(tsk);
}
@@ -1178,13 +1190,20 @@
return event;
}
+void
+__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
+{
+ __this_cpu_write(trace_cmdline_save, true);
+ ring_buffer_unlock_commit(buffer, event);
+}
+
static inline void
__trace_buffer_unlock_commit(struct ring_buffer *buffer,
struct ring_buffer_event *event,
unsigned long flags, int pc,
int wake)
{
- ring_buffer_unlock_commit(buffer, event);
+ __buffer_unlock_commit(buffer, event);
ftrace_trace_stack(buffer, flags, 6, pc);
ftrace_trace_userstack(buffer, flags, pc);
@@ -1232,7 +1251,7 @@
unsigned long flags, int pc,
struct pt_regs *regs)
{
- ring_buffer_unlock_commit(buffer, event);
+ __buffer_unlock_commit(buffer, event);
ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
ftrace_trace_userstack(buffer, flags, pc);
@@ -1269,7 +1288,7 @@
entry->parent_ip = parent_ip;
if (!filter_check_discard(call, entry, buffer, event))
- ring_buffer_unlock_commit(buffer, event);
+ __buffer_unlock_commit(buffer, event);
}
void
@@ -1362,7 +1381,7 @@
entry->size = trace.nr_entries;
if (!filter_check_discard(call, entry, buffer, event))
- ring_buffer_unlock_commit(buffer, event);
+ __buffer_unlock_commit(buffer, event);
out:
/* Again, don't let gcc optimize things here */
@@ -1458,7 +1477,7 @@
save_stack_trace_user(&trace);
if (!filter_check_discard(call, entry, buffer, event))
- ring_buffer_unlock_commit(buffer, event);
+ __buffer_unlock_commit(buffer, event);
out_drop_count:
__this_cpu_dec(user_stack_count);
@@ -1653,7 +1672,7 @@
memcpy(entry->buf, tbuffer, sizeof(u32) * len);
if (!filter_check_discard(call, entry, buffer, event)) {
- ring_buffer_unlock_commit(buffer, event);
+ __buffer_unlock_commit(buffer, event);
ftrace_trace_stack(buffer, flags, 6, pc);
}
@@ -1724,7 +1743,7 @@
memcpy(&entry->buf, tbuffer, len);
entry->buf[len] = '\0';
if (!filter_check_discard(call, entry, buffer, event)) {
- ring_buffer_unlock_commit(buffer, event);
+ __buffer_unlock_commit(buffer, event);
ftrace_trace_stack(buffer, flags, 6, pc);
}
out:
@@ -3993,7 +4012,7 @@
} else
entry->buf[cnt] = '\0';
- ring_buffer_unlock_commit(buffer, event);
+ __buffer_unlock_commit(buffer, event);
written = cnt;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 7824a55..839ae00 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -359,6 +359,9 @@
struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
int *ent_cpu, u64 *ent_ts);
+void __buffer_unlock_commit(struct ring_buffer *buffer,
+ struct ring_buffer_event *event);
+
int trace_empty(struct trace_iterator *iter);
void *trace_find_next_entry_inc(struct trace_iterator *iter);
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index bd3e0ee..95e9684 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -77,7 +77,7 @@
entry->correct = val == expect;
if (!filter_check_discard(call, entry, buffer, event))
- ring_buffer_unlock_commit(buffer, event);
+ __buffer_unlock_commit(buffer, event);
out:
atomic_dec(&tr->data[cpu]->disabled);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index a84b558..4edb4b7 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -223,7 +223,7 @@
entry = ring_buffer_event_data(event);
entry->graph_ent = *trace;
if (!filter_current_check_discard(buffer, call, entry, event))
- ring_buffer_unlock_commit(buffer, event);
+ __buffer_unlock_commit(buffer, event);
return 1;
}
@@ -327,7 +327,7 @@
entry = ring_buffer_event_data(event);
entry->ret = *trace;
if (!filter_current_check_discard(buffer, call, entry, event))
- ring_buffer_unlock_commit(buffer, event);
+ __buffer_unlock_commit(buffer, event);
}
void trace_graph_return(struct ftrace_graph_ret *trace)