perf script: Enable printing of branch stack

This patch improves perf script by enabling printing of the
branch stack via the 'brstack' and 'brstacksym' arguments to
the field selection option -F. The option is off by default
and operates only if the perf.data file has branch stack content.

The branches are printed in to/from pairs. The most recent branch
is printed first. The number of branch entries vary based on the
underlying hardware and filtering used.

The brstack prints FROM/TO addresses in raw hexadecimal format.
The brstacksym prints FROM/TO addresses in symbolic form wherever
possible.

 $ perf script -F ip,brstack
  5d3000 0x401aa0/0x5d2000/M/-/-/-/0 ...

 $ perf script -F ip,brstacksym
  4011e0 noploop+0x0/noploop+0x0/P/-/-/0

The notation F/T/M/X/A/C describes the attributes of the branch.
F=from, T=to, M/P=misprediction/prediction, X=TSX, A=TSX abort, C=cycles (SKL)

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Yuanfang Chen <cyfmxc@gmail.com>
Link: http://lkml.kernel.org/r/1441039273-16260-5-git-send-email-eranian@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 278acb2..72b5deb 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -51,6 +51,8 @@
 	PERF_OUTPUT_SRCLINE         = 1U << 12,
 	PERF_OUTPUT_PERIOD          = 1U << 13,
 	PERF_OUTPUT_IREGS	    = 1U << 14,
+	PERF_OUTPUT_BRSTACK	    = 1U << 15,
+	PERF_OUTPUT_BRSTACKSYM	    = 1U << 16,
 };
 
 struct output_option {
@@ -72,6 +74,8 @@
 	{.str = "srcline", .field = PERF_OUTPUT_SRCLINE},
 	{.str = "period", .field = PERF_OUTPUT_PERIOD},
 	{.str = "iregs", .field = PERF_OUTPUT_IREGS},
+	{.str = "brstack", .field = PERF_OUTPUT_BRSTACK},
+	{.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM},
 };
 
 /* default set to maintain compatibility with current format */
@@ -425,6 +429,77 @@
 	}
 }
 
+static inline char
+mispred_str(struct branch_entry *br)
+{
+	if (!(br->flags.mispred  || br->flags.predicted))
+		return '-';
+
+	return br->flags.predicted ? 'P' : 'M';
+}
+
+static void print_sample_brstack(union perf_event *event __maybe_unused,
+			  struct perf_sample *sample,
+			  struct thread *thread __maybe_unused,
+			  struct perf_event_attr *attr __maybe_unused)
+{
+	struct branch_stack *br = sample->branch_stack;
+	u64 i;
+
+	if (!(br && br->nr))
+		return;
+
+	for (i = 0; i < br->nr; i++) {
+		printf(" 0x%"PRIx64"/0x%"PRIx64"/%c/%c/%c/%d ",
+			br->entries[i].from,
+			br->entries[i].to,
+			mispred_str( br->entries + i),
+			br->entries[i].flags.in_tx? 'X' : '-',
+			br->entries[i].flags.abort? 'A' : '-',
+			br->entries[i].flags.cycles);
+	}
+}
+
+static void print_sample_brstacksym(union perf_event *event __maybe_unused,
+			  struct perf_sample *sample,
+			  struct thread *thread __maybe_unused,
+			  struct perf_event_attr *attr __maybe_unused)
+{
+	struct branch_stack *br = sample->branch_stack;
+	struct addr_location alf, alt;
+	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+	u64 i, from, to;
+
+	if (!(br && br->nr))
+		return;
+
+	for (i = 0; i < br->nr; i++) {
+
+		memset(&alf, 0, sizeof(alf));
+		memset(&alt, 0, sizeof(alt));
+		from = br->entries[i].from;
+		to   = br->entries[i].to;
+
+		thread__find_addr_map(thread, cpumode, MAP__FUNCTION, from, &alf);
+		if (alf.map)
+			alf.sym = map__find_symbol(alf.map, alf.addr, NULL);
+
+		thread__find_addr_map(thread, cpumode, MAP__FUNCTION, to, &alt);
+		if (alt.map)
+			alt.sym = map__find_symbol(alt.map, alt.addr, NULL);
+
+		symbol__fprintf_symname_offs(alf.sym, &alf, stdout);
+		putchar('/');
+		symbol__fprintf_symname_offs(alt.sym, &alt, stdout);
+		printf("/%c/%c/%c/%d ",
+			mispred_str( br->entries + i),
+			br->entries[i].flags.in_tx? 'X' : '-',
+			br->entries[i].flags.abort? 'A' : '-',
+			br->entries[i].flags.cycles);
+	}
+}
+
+
 static void print_sample_addr(union perf_event *event,
 			  struct perf_sample *sample,
 			  struct thread *thread,
@@ -560,6 +635,11 @@
 	if (PRINT_FIELD(IREGS))
 		print_sample_iregs(event, sample, thread, attr);
 
+	if (PRINT_FIELD(BRSTACK))
+		print_sample_brstack(event, sample, thread, attr);
+	else if (PRINT_FIELD(BRSTACKSYM))
+		print_sample_brstacksym(event, sample, thread, attr);
+
 	printf("\n");
 }
 
@@ -1681,7 +1761,7 @@
 		     "comma separated output fields prepend with 'type:'. "
 		     "Valid types: hw,sw,trace,raw. "
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
-		     "addr,symoff,period,iregs,flags", parse_output_fields),
+		     "addr,symoff,period,iregs,brstack,brstacksym,flags", parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",