x86: Add NMI duration tracepoints

This patch has been invaluable in my adventures finding
issues in the perf NMI handler.  I'm as big a fan of
printk() as anybody is, but using printk() in NMIs is
deadly when they're happening frequently.

Even hacking in trace_printk() ended up eating enough
CPU to throw off some of the measurements I was making.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus@samba.org
Cc: acme@ghostprotocols.net
Cc: Dave Hansen <dave@sr71.net>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index e9bae4c..0920212 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -30,6 +30,9 @@
 #include <asm/nmi.h>
 #include <asm/x86_init.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/nmi.h>
+
 struct nmi_desc {
 	spinlock_t lock;
 	struct list_head head;
@@ -108,11 +111,13 @@
 	 */
 	list_for_each_entry_rcu(a, &desc->head, list) {
 		u64 before, delta, whole_msecs;
-		int decimal_msecs;
+		int decimal_msecs, thishandled;
 
 		before = local_clock();
-		handled += a->handler(type, regs);
+		thishandled = a->handler(type, regs);
+		handled += thishandled;
 		delta = local_clock() - before;
+		trace_nmi_handler(a->handler, (int)delta, thishandled);
 
 		if (delta < nmi_longest_ns)
 			continue;