Merge tag 'perf-core-for-mingo-20160505' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
User visible changes:
- Order output of 'perf trace --summary' better, now the threads will
appear ascending order of number of events, and then, for each, in
descending order of syscalls by the time spent in the syscalls, so
that the last page produced can be the one about the most interesting
thread straced, suggested by Milian Wolff (Arnaldo Carvalho de Melo)
- Do not show the runtime_ms for a thread when not collecting it, that
is done so far only with 'perf trace --sched' (Arnaldo Carvalho de Melo)
- Fix kallsyms perf test on ppc64le (Naveen N. Rao)
Infrastructure changes:
- Move global variables related to presence of some keys in the sort order to a
per hist struct, to allow code like the hists browser to work with multiple
hists with different lists of columns (Jiri Olsa)
- Add support for generating bpf prologue in powerpc (Naveen N. Rao)
- Fix kprobe and kretprobe handling with kallsyms on ppc64le (Naveen N. Rao)
- evlist mmap changes, prep work for supporting reading backwards (Wang Nan)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 56e05f1..cc39309 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -3,4 +3,5 @@
endif
HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c
index 733151c..41bdf95 100644
--- a/tools/perf/arch/powerpc/util/dwarf-regs.c
+++ b/tools/perf/arch/powerpc/util/dwarf-regs.c
@@ -10,19 +10,26 @@
*/
#include <stddef.h>
+#include <errno.h>
+#include <string.h>
#include <dwarf-regs.h>
-
+#include <linux/ptrace.h>
+#include <linux/kernel.h>
+#include "util.h"
struct pt_regs_dwarfnum {
const char *name;
unsigned int dwarfnum;
+ unsigned int ptregs_offset;
};
-#define STR(s) #s
-#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
-#define GPR_DWARFNUM_NAME(num) \
- {.name = STR(%gpr##num), .dwarfnum = num}
-#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
+#define REG_DWARFNUM_NAME(r, num) \
+ {.name = STR(%)STR(r), .dwarfnum = num, \
+ .ptregs_offset = offsetof(struct pt_regs, r)}
+#define GPR_DWARFNUM_NAME(num) \
+ {.name = STR(%gpr##num), .dwarfnum = num, \
+ .ptregs_offset = offsetof(struct pt_regs, gpr[num])}
+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0}
/*
* Reference:
@@ -61,12 +68,12 @@
GPR_DWARFNUM_NAME(29),
GPR_DWARFNUM_NAME(30),
GPR_DWARFNUM_NAME(31),
- REG_DWARFNUM_NAME("%msr", 66),
- REG_DWARFNUM_NAME("%ctr", 109),
- REG_DWARFNUM_NAME("%link", 108),
- REG_DWARFNUM_NAME("%xer", 101),
- REG_DWARFNUM_NAME("%dar", 119),
- REG_DWARFNUM_NAME("%dsisr", 118),
+ REG_DWARFNUM_NAME(msr, 66),
+ REG_DWARFNUM_NAME(ctr, 109),
+ REG_DWARFNUM_NAME(link, 108),
+ REG_DWARFNUM_NAME(xer, 101),
+ REG_DWARFNUM_NAME(dar, 119),
+ REG_DWARFNUM_NAME(dsisr, 118),
REG_DWARFNUM_END,
};
@@ -86,3 +93,12 @@
return roff->name;
return NULL;
}
+
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_dwarfnum *roff;
+ for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->ptregs_offset;
+ return -EINVAL;
+}
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index bbc1a50..c6d0f91 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -19,12 +19,6 @@
ehdr.e_type == ET_DYN;
}
-#if defined(_CALL_ELF) && _CALL_ELF == 2
-void arch__elf_sym_adjust(GElf_Sym *sym)
-{
- sym->st_value += PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
-}
-#endif
#endif
#if !defined(_CALL_ELF) || _CALL_ELF != 2
@@ -65,18 +59,45 @@
return true;
}
+#ifdef HAVE_LIBELF_SUPPORT
+void arch__sym_update(struct symbol *s, GElf_Sym *sym)
+{
+ s->arch_sym = sym->st_other;
+}
+#endif
+
#define PPC64LE_LEP_OFFSET 8
void arch__fix_tev_from_maps(struct perf_probe_event *pev,
- struct probe_trace_event *tev, struct map *map)
+ struct probe_trace_event *tev, struct map *map,
+ struct symbol *sym)
{
+ int lep_offset;
+
/*
- * ppc64 ABIv2 local entry point is currently always 2 instructions
- * (8 bytes) after the global entry point.
+ * When probing at a function entry point, we normally always want the
+ * LEP since that catches calls to the function through both the GEP and
+ * the LEP. Hence, we would like to probe at an offset of 8 bytes if
+ * the user only specified the function entry.
+ *
+ * However, if the user specifies an offset, we fall back to using the
+ * GEP since all userspace applications (objdump/readelf) show function
+ * disassembly with offsets from the GEP.
+ *
+ * In addition, we shouldn't specify an offset for kretprobes.
*/
- if (!pev->uprobes && map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
- tev->point.address += PPC64LE_LEP_OFFSET;
+ if (pev->point.offset || pev->point.retprobe || !map || !sym)
+ return;
+
+ lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
+
+ if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS)
tev->point.offset += PPC64LE_LEP_OFFSET;
+ else if (lep_offset) {
+ if (pev->uprobes)
+ tev->point.address += lep_offset;
+ else
+ tev->point.offset += lep_offset;
}
}
#endif
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 8053a8c..9ce354f 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -428,7 +428,7 @@
struct rb_root *root;
struct rb_node *next;
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root = &hists->entries_collapsed;
else
root = hists->entries_in;
@@ -450,7 +450,7 @@
struct rb_root *root;
struct rb_node *next;
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root = &hists->entries_collapsed;
else
root = hists->entries_in;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8d9b88a..87d40e3 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -234,7 +234,7 @@
sample_type |= PERF_SAMPLE_BRANCH_STACK;
if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
- if (sort__has_parent) {
+ if (perf_hpp_list.parent) {
ui__error("Selected --sort parent, but no "
"callchain data. Did you call "
"'perf record' without -g?\n");
@@ -936,7 +936,7 @@
goto error;
}
- sort__need_collapse = true;
+ perf_hpp_list.need_collapse = true;
}
/* Force tty output for header output and per-thread stat. */
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index da18517..1793da5 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -688,7 +688,7 @@
struct hist_entry *he = iter->he;
struct perf_evsel *evsel = iter->evsel;
- if (sort__has_sym && single)
+ if (perf_hpp_list.sym && single)
perf_top__record_precise_ip(top, he, evsel->idx, al->addr);
hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
@@ -919,7 +919,7 @@
static int callchain_param__setup_sample_type(struct callchain_param *callchain)
{
- if (!sort__has_sym) {
+ if (!perf_hpp_list.sym) {
if (callchain->enabled) {
ui__error("Selected -g but \"sym\" not present in --sort/-s.");
return -EINVAL;
@@ -962,7 +962,7 @@
machine__synthesize_threads(&top->session->machines.host, &opts->target,
top->evlist->threads, false, opts->proc_map_timeout);
- if (sort__has_socket) {
+ if (perf_hpp_list.socket) {
ret = perf_env__read_cpu_topology_map(&perf_env);
if (ret < 0)
goto out_err_cpu_topo;
@@ -1255,7 +1255,7 @@
sort__mode = SORT_MODE__TOP;
/* display thread wants entries to be collapsed in a different tree */
- sort__need_collapse = 1;
+ perf_hpp_list.need_collapse = 1;
if (top.use_stdio)
use_browser = 0;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 9e38fe9..66aa2a0 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -36,6 +36,7 @@
#include "util/bpf-loader.h"
#include "callchain.h"
#include "syscalltbl.h"
+#include "rb_resort.h"
#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
#include <stdlib.h>
@@ -2784,15 +2785,29 @@
return printed;
}
+DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
+ struct stats *stats;
+ double msecs;
+ int syscall;
+)
+{
+ struct int_node *source = rb_entry(nd, struct int_node, rb_node);
+ struct stats *stats = source->priv;
+
+ entry->syscall = source->i;
+ entry->stats = stats;
+ entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
+}
+
static size_t thread__dump_stats(struct thread_trace *ttrace,
struct trace *trace, FILE *fp)
{
- struct stats *stats;
size_t printed = 0;
struct syscall *sc;
- struct int_node *inode = intlist__first(ttrace->syscall_stats);
+ struct rb_node *nd;
+ DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
- if (inode == NULL)
+ if (syscall_stats == NULL)
return 0;
printed += fprintf(fp, "\n");
@@ -2801,9 +2816,8 @@
printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
- /* each int_node is a syscall */
- while (inode) {
- stats = inode->priv;
+ resort_rb__for_each(nd, syscall_stats) {
+ struct stats *stats = syscall_stats_entry->stats;
if (stats) {
double min = (double)(stats->min) / NSEC_PER_MSEC;
double max = (double)(stats->max) / NSEC_PER_MSEC;
@@ -2814,34 +2828,23 @@
pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
avg /= NSEC_PER_MSEC;
- sc = &trace->syscalls.table[inode->i];
+ sc = &trace->syscalls.table[syscall_stats_entry->syscall];
printed += fprintf(fp, " %-15s", sc->name);
printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
- n, avg * n, min, avg);
+ n, syscall_stats_entry->msecs, min, avg);
printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
}
-
- inode = intlist__next(inode);
}
+ resort_rb__delete(syscall_stats);
printed += fprintf(fp, "\n\n");
return printed;
}
-/* struct used to pass data to per-thread function */
-struct summary_data {
- FILE *fp;
- struct trace *trace;
- size_t printed;
-};
-
-static int trace__fprintf_one_thread(struct thread *thread, void *priv)
+static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
{
- struct summary_data *data = priv;
- FILE *fp = data->fp;
- size_t printed = data->printed;
- struct trace *trace = data->trace;
+ size_t printed = 0;
struct thread_trace *ttrace = thread__priv(thread);
double ratio;
@@ -2857,25 +2860,45 @@
printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
if (ttrace->pfmin)
printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
- printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
+ if (trace->sched)
+ printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
+ else if (fputc('\n', fp) != EOF)
+ ++printed;
+
printed += thread__dump_stats(ttrace, trace, fp);
- data->printed += printed;
+ return printed;
+}
- return 0;
+static unsigned long thread__nr_events(struct thread_trace *ttrace)
+{
+ return ttrace ? ttrace->nr_events : 0;
+}
+
+DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
+ struct thread *thread;
+)
+{
+ entry->thread = rb_entry(nd, struct thread, rb_node);
}
static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
{
- struct summary_data data = {
- .fp = fp,
- .trace = trace
- };
- data.printed = trace__fprintf_threads_header(fp);
+ DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
+ size_t printed = trace__fprintf_threads_header(fp);
+ struct rb_node *nd;
- machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
+ if (threads == NULL) {
+ fprintf(fp, "%s", "Error sorting output by nr_events!\n");
+ return 0;
+ }
- return data.printed;
+ resort_rb__for_each(nd, threads)
+ printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
+
+ resort_rb__delete(threads);
+
+ return printed;
}
static int trace__set_duration(const struct option *opt, const char *str,
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index f55f4bd..6b21746 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -161,7 +161,7 @@
struct rb_root *root;
struct rb_node *node;
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root = &hists->entries_collapsed;
else
root = hists->entries_in;
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 4a2bbff9..a9e3db3 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -126,7 +126,7 @@
struct rb_root *root_out;
struct rb_node *node;
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root_in = &hists->entries_collapsed;
else
root_in = hists->entries_in;
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 358324e..acf5a13 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -145,7 +145,7 @@
/*
* Only entries from fake_common_samples should have a pair.
*/
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root = &hists->entries_collapsed;
else
root = hists->entries_in;
@@ -197,7 +197,7 @@
* and some entries will have no pair. However every entry
* in other hists should have (dummy) pair.
*/
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root = &hists->entries_collapsed;
else
root = hists->entries_in;
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index 7cd8738..63c5efa 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -92,7 +92,7 @@
struct rb_root *root_out;
struct rb_node *node;
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root_in = &hists->entries_collapsed;
else
root_in = hists->entries_in;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 6a46819..538bae8 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2135,7 +2135,7 @@
printed += snprintf(bf + printed, size - printed,
", UID: %s", hists->uid_filter_str);
if (thread) {
- if (sort__has_thread) {
+ if (hists__has(hists, thread)) {
printed += scnprintf(bf + printed, size - printed,
", Thread: %s(%d)",
(thread->comm_set ? thread__comm_str(thread) : ""),
@@ -2320,7 +2320,8 @@
{
struct thread *thread = act->thread;
- if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
+ if ((!hists__has(browser->hists, thread) &&
+ !hists__has(browser->hists, comm)) || thread == NULL)
return 0;
if (browser->hists->thread_filter) {
@@ -2329,7 +2330,7 @@
thread__zput(browser->hists->thread_filter);
ui_helpline__pop();
} else {
- if (sort__has_thread) {
+ if (hists__has(browser->hists, thread)) {
ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
thread->comm_set ? thread__comm_str(thread) : "",
thread->tid);
@@ -2354,10 +2355,11 @@
{
int ret;
- if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
+ if ((!hists__has(browser->hists, thread) &&
+ !hists__has(browser->hists, comm)) || thread == NULL)
return 0;
- if (sort__has_thread) {
+ if (hists__has(browser->hists, thread)) {
ret = asprintf(optstr, "Zoom %s %s(%d) thread",
browser->hists->thread_filter ? "out of" : "into",
thread->comm_set ? thread__comm_str(thread) : "",
@@ -2380,7 +2382,7 @@
{
struct map *map = act->ms.map;
- if (!sort__has_dso || map == NULL)
+ if (!hists__has(browser->hists, dso) || map == NULL)
return 0;
if (browser->hists->dso_filter) {
@@ -2407,7 +2409,7 @@
add_dso_opt(struct hist_browser *browser, struct popup_action *act,
char **optstr, struct map *map)
{
- if (!sort__has_dso || map == NULL)
+ if (!hists__has(browser->hists, dso) || map == NULL)
return 0;
if (asprintf(optstr, "Zoom %s %s DSO",
@@ -2429,10 +2431,10 @@
}
static int
-add_map_opt(struct hist_browser *browser __maybe_unused,
+add_map_opt(struct hist_browser *browser,
struct popup_action *act, char **optstr, struct map *map)
{
- if (!sort__has_dso || map == NULL)
+ if (!hists__has(browser->hists, dso) || map == NULL)
return 0;
if (asprintf(optstr, "Browse map details") < 0)
@@ -2534,7 +2536,7 @@
static int
do_zoom_socket(struct hist_browser *browser, struct popup_action *act)
{
- if (!sort__has_socket || act->socket < 0)
+ if (!hists__has(browser->hists, socket) || act->socket < 0)
return 0;
if (browser->hists->socket_filter > -1) {
@@ -2556,7 +2558,7 @@
add_socket_opt(struct hist_browser *browser, struct popup_action *act,
char **optstr, int socket_id)
{
- if (!sort__has_socket || socket_id < 0)
+ if (!hists__has(browser->hists, socket) || socket_id < 0)
return 0;
if (asprintf(optstr, "Zoom %s Processor Socket %d",
@@ -2747,7 +2749,7 @@
*/
goto out_free_stack;
case 'a':
- if (!sort__has_sym) {
+ if (!hists__has(hists, sym)) {
ui_browser__warning(&browser->b, delay_secs * 2,
"Annotation is only available for symbolic views, "
"include \"sym*\" in --sort to use it.");
@@ -2910,7 +2912,7 @@
continue;
}
- if (!sort__has_sym || browser->selection == NULL)
+ if (!hists__has(hists, sym) || browser->selection == NULL)
goto skip_annotation;
if (sort__mode == SORT_MODE__BRANCH) {
@@ -2954,7 +2956,7 @@
goto skip_scripting;
if (browser->he_selection) {
- if (sort__has_thread && thread) {
+ if (hists__has(hists, thread) && thread) {
nr_options += add_script_opt(browser,
&actions[nr_options],
&options[nr_options],
@@ -2969,7 +2971,7 @@
*
* See hist_browser__show_entry.
*/
- if (sort__has_sym && browser->selection->sym) {
+ if (hists__has(hists, sym) && browser->selection->sym) {
nr_options += add_script_opt(browser,
&actions[nr_options],
&options[nr_options],
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 2aa45b6..932adfa 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -379,7 +379,7 @@
gtk_tree_store_set(store, &iter, col_idx++, s, -1);
}
- if (symbol_conf.use_callchain && sort__has_sym) {
+ if (symbol_conf.use_callchain && hists__has(hists, sym)) {
if (callchain_param.mode == CHAIN_GRAPH_REL)
total = symbol_conf.cumulate_callchain ?
h->stat_acc->period : h->stat.period;
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 3baeaa6..af07ffb 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -635,7 +635,7 @@
ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists));
}
- if (verbose && sort__has_sym) /* Addr + origin */
+ if (verbose && hists__has(hists, sym)) /* Addr + origin */
ret += 3 + BITS_PER_LONG / 4;
return ret;
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index b795b69..d4b3d03 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1665,5 +1665,5 @@
bool ui__has_annotation(void)
{
- return use_browser == 1 && sort__has_sym;
+ return use_browser == 1 && perf_hpp_list.sym;
}
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index aa248dc..07fd30b 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -799,7 +799,7 @@
return 0;
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
- sort__has_parent) {
+ perf_hpp_list.parent) {
return thread__resolve_callchain(al->thread, cursor, evsel, sample,
parent, al, max_stack);
}
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 85271e5..17cd014 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -679,39 +679,31 @@
return NULL;
}
-union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+/* When check_messup is true, 'end' must points to a good entry */
+static union perf_event *
+perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start,
+ u64 end, u64 *prev)
{
- struct perf_mmap *md = &evlist->mmap[idx];
- u64 head;
- u64 old = md->prev;
- int diff;
unsigned char *data = md->base + page_size;
union perf_event *event = NULL;
+ int diff = end - start;
- /*
- * Check if event was unmapped due to a POLLHUP/POLLERR.
- */
- if (!atomic_read(&md->refcnt))
- return NULL;
-
- head = perf_mmap__read_head(md);
- diff = head - old;
- if (evlist->overwrite) {
+ if (check_messup) {
/*
* If we're further behind than half the buffer, there's a chance
* the writer will bite our tail and mess up the samples under us.
*
- * If we somehow ended up ahead of the head, we got messed up.
+ * If we somehow ended up ahead of the 'end', we got messed up.
*
- * In either case, truncate and restart at head.
+ * In either case, truncate and restart at 'end'.
*/
if (diff > md->mask / 2 || diff < 0) {
fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
/*
- * head points to a known good entry, start there.
+ * 'end' points to a known good entry, start there.
*/
- old = head;
+ start = end;
diff = 0;
}
}
@@ -719,7 +711,7 @@
if (diff >= (int)sizeof(event->header)) {
size_t size;
- event = (union perf_event *)&data[old & md->mask];
+ event = (union perf_event *)&data[start & md->mask];
size = event->header.size;
if (size < sizeof(event->header) || diff < (int)size) {
@@ -731,8 +723,8 @@
* Event straddles the mmap boundary -- header should always
* be inside due to u64 alignment of output.
*/
- if ((old & md->mask) + size != ((old + size) & md->mask)) {
- unsigned int offset = old;
+ if ((start & md->mask) + size != ((start + size) & md->mask)) {
+ unsigned int offset = start;
unsigned int len = min(sizeof(*event), size), cpy;
void *dst = md->event_copy;
@@ -747,15 +739,33 @@
event = (union perf_event *) md->event_copy;
}
- old += size;
+ start += size;
}
broken_event:
- md->prev = old;
+ if (prev)
+ *prev = start;
return event;
}
+union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+{
+ struct perf_mmap *md = &evlist->mmap[idx];
+ u64 head;
+ u64 old = md->prev;
+
+ /*
+ * Check if event was unmapped due to a POLLHUP/POLLERR.
+ */
+ if (!atomic_read(&md->refcnt))
+ return NULL;
+
+ head = perf_mmap__read_head(md);
+
+ return perf_mmap__read(md, evlist->overwrite, old, head, &md->prev);
+}
+
static bool perf_mmap__empty(struct perf_mmap *md)
{
return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 0f33d7e..cfab531 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -295,7 +295,7 @@
root_in = &he->parent_he->hroot_in;
root_out = &he->parent_he->hroot_out;
} else {
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root_in = &hists->entries_collapsed;
else
root_in = hists->entries_in;
@@ -1373,7 +1373,7 @@
struct hist_entry *n;
int ret;
- if (!sort__need_collapse)
+ if (!hists__has(hists, need_collapse))
return 0;
hists->nr_entries = 0;
@@ -1632,7 +1632,7 @@
return;
}
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root = &hists->entries_collapsed;
else
root = hists->entries_in;
@@ -2036,7 +2036,7 @@
struct hist_entry *he;
int64_t cmp;
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root = &hists->entries_collapsed;
else
root = hists->entries_in;
@@ -2078,7 +2078,7 @@
{
struct rb_node *n;
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
n = hists->entries_collapsed.rb_node;
else
n = hists->entries_in->rb_node;
@@ -2107,7 +2107,7 @@
struct rb_node *nd;
struct hist_entry *pos, *pair;
- if (sort__need_collapse)
+ if (hists__has(leader, need_collapse))
root = &leader->entries_collapsed;
else
root = leader->entries_in;
@@ -2132,7 +2132,7 @@
struct rb_node *nd;
struct hist_entry *pos, *pair;
- if (sort__need_collapse)
+ if (hists__has(other, need_collapse))
root = &other->entries_collapsed;
else
root = other->entries_in;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 5885965..0f84bfb 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -82,6 +82,8 @@
int nr_hpp_node;
};
+#define hists__has(__h, __f) (__h)->hpp_list->__f
+
struct hist_entry_iter;
struct hist_iter_ops {
@@ -238,6 +240,14 @@
struct perf_hpp_list {
struct list_head fields;
struct list_head sorts;
+
+ int need_collapse;
+ int parent;
+ int sym;
+ int dso;
+ int socket;
+ int thread;
+ int comm;
};
extern struct perf_hpp_list perf_hpp_list;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 2cb95bb..8c7bf4d 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -32,6 +32,7 @@
machine->threads = RB_ROOT;
pthread_rwlock_init(&machine->threads_lock, NULL);
+ machine->nr_threads = 0;
INIT_LIST_HEAD(&machine->dead_threads);
machine->last_match = NULL;
@@ -430,6 +431,7 @@
*/
thread__get(th);
machine->last_match = th;
+ ++machine->nr_threads;
}
return th;
@@ -681,11 +683,13 @@
size_t machine__fprintf(struct machine *machine, FILE *fp)
{
- size_t ret = 0;
+ size_t ret;
struct rb_node *nd;
pthread_rwlock_rdlock(&machine->threads_lock);
+ ret = fprintf(fp, "Threads: %u\n", machine->nr_threads);
+
for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
struct thread *pos = rb_entry(nd, struct thread, rb_node);
@@ -1419,6 +1423,7 @@
pthread_rwlock_wrlock(&machine->threads_lock);
rb_erase_init(&th->rb_node, &machine->threads);
RB_CLEAR_NODE(&th->rb_node);
+ --machine->nr_threads;
/*
* Move it first to the dead_threads list, then drop the reference,
* if this is the last reference, then the thread__delete destructor
@@ -1647,7 +1652,7 @@
}
if (al.sym != NULL) {
- if (sort__has_parent && !*parent &&
+ if (perf_hpp_list.parent && !*parent &&
symbol__match_regex(al.sym, &parent_regex))
*parent = al.sym;
else if (have_ignore_callees && root_al &&
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 4822de5..83f4679 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -31,6 +31,7 @@
char *root_dir;
struct rb_root threads;
pthread_rwlock_t threads_lock;
+ unsigned int nr_threads;
struct list_head dead_threads;
struct thread *last_match;
struct vdso_info *vdso_info;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 85d82f4..c82c625 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2477,7 +2477,8 @@
void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
struct probe_trace_event *tev __maybe_unused,
- struct map *map __maybe_unused) { }
+ struct map *map __maybe_unused,
+ struct symbol *sym __maybe_unused) { }
/*
* Find probe function addresses from map.
@@ -2614,7 +2615,7 @@
strdup_or_goto(pev->args[i].type,
nomem_out);
}
- arch__fix_tev_from_maps(pev, tev, map);
+ arch__fix_tev_from_maps(pev, tev, map, sym);
}
if (ret == skipped) {
ret = -ENOENT;
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index e220962..5a27eb4 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -154,7 +154,8 @@
int show_available_funcs(const char *module, struct strfilter *filter, bool user);
bool arch__prefers_symtab(void);
void arch__fix_tev_from_maps(struct perf_probe_event *pev,
- struct probe_trace_event *tev, struct map *map);
+ struct probe_trace_event *tev, struct map *map,
+ struct symbol *sym);
/* If there is no space to write, returns -E2BIG. */
int e_snprintf(char *str, size_t size, const char *format, ...)
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
new file mode 100644
index 0000000..abc76e3
--- /dev/null
+++ b/tools/perf/util/rb_resort.h
@@ -0,0 +1,149 @@
+#ifndef _PERF_RESORT_RB_H_
+#define _PERF_RESORT_RB_H_
+/*
+ * Template for creating a class to resort an existing rb_tree according to
+ * a new sort criteria, that must be present in the entries of the source
+ * rb_tree.
+ *
+ * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Quick example, resorting threads by its shortname:
+ *
+ * First define the prefix (threads) to be used for the functions and data
+ * structures created, and provide an expression for the sorting, then the
+ * fields to be present in each of the entries in the new, sorted, rb_tree.
+ *
+ * The body of the init function should collect the fields, maybe
+ * pre-calculating them from multiple entries in the original 'entry' from
+ * the rb_tree used as a source for the entries to be sorted:
+
+DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname,
+ b->thread->shortname) < 0,
+ struct thread *thread;
+)
+{
+ entry->thread = rb_entry(nd, struct thread, rb_node);
+}
+
+ * After this it is just a matter of instantiating it and iterating it,
+ * for a few data structures with existing rb_trees, such as 'struct machine',
+ * helpers are available to get the rb_root and the nr_entries:
+
+ DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr);
+
+ * This will instantiate the new rb_tree and a cursor for it, that can be used as:
+
+ struct rb_node *nd;
+
+ resort_rb__for_each(nd, threads) {
+ struct thread *t = threads_entry;
+ printf("%s: %d\n", t->shortname, t->tid);
+ }
+
+ * Then delete it:
+
+ resort_rb__delete(threads);
+
+ * The name of the data structures and functions will have a _sorted suffix
+ * right before the method names, i.e. will look like:
+ *
+ * struct threads_sorted_entry {}
+ * threads_sorted__insert()
+ */
+
+#define DEFINE_RESORT_RB(__name, __comp, ...) \
+struct __name##_sorted_entry { \
+ struct rb_node rb_node; \
+ __VA_ARGS__ \
+}; \
+static void __name##_sorted__init_entry(struct rb_node *nd, \
+ struct __name##_sorted_entry *entry); \
+ \
+static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb) \
+{ \
+ struct __name##_sorted_entry *a, *b; \
+ a = rb_entry(nda, struct __name##_sorted_entry, rb_node); \
+ b = rb_entry(ndb, struct __name##_sorted_entry, rb_node); \
+ return __comp; \
+} \
+ \
+struct __name##_sorted { \
+ struct rb_root entries; \
+ struct __name##_sorted_entry nd[0]; \
+}; \
+ \
+static void __name##_sorted__insert(struct __name##_sorted *sorted, \
+ struct rb_node *sorted_nd) \
+{ \
+ struct rb_node **p = &sorted->entries.rb_node, *parent = NULL; \
+ while (*p != NULL) { \
+ parent = *p; \
+ if (__name##_sorted__cmp(sorted_nd, parent)) \
+ p = &(*p)->rb_left; \
+ else \
+ p = &(*p)->rb_right; \
+ } \
+ rb_link_node(sorted_nd, parent, p); \
+ rb_insert_color(sorted_nd, &sorted->entries); \
+} \
+ \
+static void __name##_sorted__sort(struct __name##_sorted *sorted, \
+ struct rb_root *entries) \
+{ \
+ struct rb_node *nd; \
+ unsigned int i = 0; \
+ for (nd = rb_first(entries); nd; nd = rb_next(nd)) { \
+ struct __name##_sorted_entry *snd = &sorted->nd[i++]; \
+ __name##_sorted__init_entry(nd, snd); \
+ __name##_sorted__insert(sorted, &snd->rb_node); \
+ } \
+} \
+ \
+static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries, \
+ int nr_entries) \
+{ \
+ struct __name##_sorted *sorted; \
+ sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries); \
+ if (sorted) { \
+ sorted->entries = RB_ROOT; \
+ __name##_sorted__sort(sorted, entries); \
+ } \
+ return sorted; \
+} \
+ \
+static void __name##_sorted__delete(struct __name##_sorted *sorted) \
+{ \
+ free(sorted); \
+} \
+ \
+static void __name##_sorted__init_entry(struct rb_node *nd, \
+ struct __name##_sorted_entry *entry)
+
+#define DECLARE_RESORT_RB(__name) \
+struct __name##_sorted_entry *__name##_entry; \
+struct __name##_sorted *__name = __name##_sorted__new
+
+#define resort_rb__for_each(__nd, __name) \
+ for (__nd = rb_first(&__name->entries); \
+ __name##_entry = rb_entry(__nd, struct __name##_sorted_entry, \
+ rb_node), __nd; \
+ __nd = rb_next(__nd))
+
+#define resort_rb__delete(__name) \
+ __name##_sorted__delete(__name), __name = NULL
+
+/*
+ * Helpers for other classes that contains both an rbtree and the
+ * number of entries in it:
+ */
+
+/* For 'struct intlist' */
+#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \
+ DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries, \
+ __ilist->rblist.nr_entries)
+
+/* For 'struct machine->threads' */
+#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine) \
+ DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads)
+
+#endif /* _PERF_RESORT_RB_H_ */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 47966a1..772e2e4 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -21,13 +21,6 @@
const char *field_order;
regex_t ignore_callees_regex;
int have_ignore_callees = 0;
-int sort__need_collapse = 0;
-int sort__has_parent = 0;
-int sort__has_sym = 0;
-int sort__has_dso = 0;
-int sort__has_socket = 0;
-int sort__has_thread = 0;
-int sort__has_comm = 0;
enum sort_mode sort__mode = SORT_MODE__NORMAL;
/*
@@ -244,7 +237,7 @@
* comparing symbol address alone is not enough since it's a
* relative address within a dso.
*/
- if (!sort__has_dso) {
+ if (!hists__has(left->hists, dso) || hists__has(right->hists, dso)) {
ret = sort__dso_cmp(left, right);
if (ret != 0)
return ret;
@@ -2163,7 +2156,7 @@
return -1;
if (sd->entry->se_collapse)
- sort__need_collapse = 1;
+ list->need_collapse = 1;
sd->taken = 1;
@@ -2245,9 +2238,9 @@
pr_err("Invalid regex: %s\n%s", parent_pattern, err);
return -EINVAL;
}
- sort__has_parent = 1;
+ list->parent = 1;
} else if (sd->entry == &sort_sym) {
- sort__has_sym = 1;
+ list->sym = 1;
/*
* perf diff displays the performance difference amongst
* two or more perf.data files. Those files could come
@@ -2258,13 +2251,13 @@
sd->entry->se_collapse = sort__sym_sort;
} else if (sd->entry == &sort_dso) {
- sort__has_dso = 1;
+ list->dso = 1;
} else if (sd->entry == &sort_socket) {
- sort__has_socket = 1;
+ list->socket = 1;
} else if (sd->entry == &sort_thread) {
- sort__has_thread = 1;
+ list->thread = 1;
} else if (sd->entry == &sort_comm) {
- sort__has_comm = 1;
+ list->comm = 1;
}
return __sort_dimension__add(sd, list, level);
@@ -2289,7 +2282,7 @@
return -EINVAL;
if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
- sort__has_sym = 1;
+ list->sym = 1;
__sort_dimension__add(sd, list, level);
return 0;
@@ -2305,7 +2298,7 @@
return -EINVAL;
if (sd->entry == &sort_mem_daddr_sym)
- sort__has_sym = 1;
+ list->sym = 1;
__sort_dimension__add(sd, list, level);
return 0;
@@ -2746,10 +2739,10 @@
void reset_output_field(void)
{
- sort__need_collapse = 0;
- sort__has_parent = 0;
- sort__has_sym = 0;
- sort__has_dso = 0;
+ perf_hpp_list.need_collapse = 0;
+ perf_hpp_list.parent = 0;
+ perf_hpp_list.sym = 0;
+ perf_hpp_list.dso = 0;
field_order = NULL;
sort_order = NULL;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 3f4e359..42927f4 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -31,13 +31,6 @@
extern const char default_sort_order[];
extern regex_t ignore_callees_regex;
extern int have_ignore_callees;
-extern int sort__need_collapse;
-extern int sort__has_dso;
-extern int sort__has_parent;
-extern int sort__has_sym;
-extern int sort__has_socket;
-extern int sort__has_thread;
-extern int sort__has_comm;
extern enum sort_mode sort__mode;
extern struct sort_entry sort_comm;
extern struct sort_entry sort_dso;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 3f9d679..87a297d 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -770,7 +770,8 @@
return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
}
-void __weak arch__elf_sym_adjust(GElf_Sym *sym __maybe_unused) { }
+void __weak arch__sym_update(struct symbol *s __maybe_unused,
+ GElf_Sym *sym __maybe_unused) { }
int dso__load_sym(struct dso *dso, struct map *map,
struct symsrc *syms_ss, struct symsrc *runtime_ss,
@@ -947,8 +948,6 @@
(sym.st_value & 1))
--sym.st_value;
- arch__elf_sym_adjust(&sym);
-
if (dso->kernel || kmodule) {
char dso_name[PATH_MAX];
@@ -1082,6 +1081,8 @@
if (!f)
goto out_elf_end;
+ arch__sym_update(f, &sym);
+
if (filter && filter(curr_map, f))
symbol__delete(f);
else {
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index c8e4397..07211c2 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -55,6 +55,7 @@
u16 namelen;
u8 binding;
bool ignore;
+ u8 arch_sym;
char name[0];
};
@@ -323,7 +324,7 @@
#ifdef HAVE_LIBELF_SUPPORT
bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
-void arch__elf_sym_adjust(GElf_Sym *sym);
+void arch__sym_update(struct symbol *s, GElf_Sym *sym);
#endif
#define SYMBOL_A 0