perf report: Fix and improve the displaying of per-thread event counters

Improve and fix the handling of per-thread counter stats
recorded via perf record -s. Previously we only displayed
it in debug printouts (-D) and even that output was hard
to disambiguate.

I moved everything to utils/values.[ch] so that we may reuse
it in perf stat.

We get something like this now:

 #  PID   TID  cache-misses  cache-references
   4658  4659        495581           3238779
   4658  4662        498246           3236823
   4658  4663        499531           3243162

Then it'll be easy to add --pretty=raw to display a single line per thread/event.

By the way, -S was also used for --symbol... So I used -T/--thread here.

perf report: Add -T/--threads to display per-thread counter values

 We get something like this now:
 #  PID   TID  cache-misses  cache-references
   4658  4659        495581           3238779
   4658  4662        498246           3236823
   4658  4663        499531           3243162

Per-thread arrays of counter values are managed in utils/values.[ch]

Signed-off-by: Brice Goglin <Brice.Goglin@inria.fr>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus@samba.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 99274ce..4163918 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -17,6 +17,7 @@
 #include "util/string.h"
 #include "util/callchain.h"
 #include "util/strlist.h"
+#include "util/values.h"
 
 #include "perf.h"
 #include "util/header.h"
@@ -53,6 +54,9 @@
 static int		full_paths;
 static int		show_nr_samples;
 
+static int		show_threads;
+static struct perf_read_values	show_threads_values;
+
 static unsigned long	page_size;
 static unsigned long	mmap_window = 32;
 
@@ -1473,6 +1477,9 @@
 
 	free(rem_sq_bracket);
 
+	if (show_threads)
+		perf_read_values_display(fp, &show_threads_values);
+
 	return ret;
 }
 
@@ -1758,6 +1765,16 @@
 {
 	struct perf_counter_attr *attr = perf_header__find_attr(event->read.id);
 
+	if (show_threads) {
+		char *name = attr ? __event_name(attr->type, attr->config)
+				   : "unknown";
+		perf_read_values_add_value(&show_threads_values,
+					   event->read.pid, event->read.tid,
+					   event->read.id,
+					   name,
+					   event->read.value);
+	}
+
 	dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n",
 			(void *)(offset + head),
 			(void *)(long)(event->header.size),
@@ -1839,6 +1856,9 @@
 
 	register_idle_thread();
 
+	if (show_threads)
+		perf_read_values_init(&show_threads_values);
+
 	input = open(input_name, O_RDONLY);
 	if (input < 0) {
 		fprintf(stderr, " failed to open file: %s", input_name);
@@ -1993,6 +2013,9 @@
 	output__resort(total);
 	output__fprintf(stdout, total);
 
+	if (show_threads)
+		perf_read_values_destroy(&show_threads_values);
+
 	return rc;
 }
 
@@ -2066,6 +2089,8 @@
 		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
 	OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples,
 		    "Show a column with the number of samples"),
+	OPT_BOOLEAN('T', "threads", &show_threads,
+		    "Show per-thread event counters"),
 	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
 		   "sort by key(s): pid, comm, dso, symbol, parent"),
 	OPT_BOOLEAN('P', "full-paths", &full_paths,