Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf tooling updates from Arnaldo Carvalho de Melo:

New features:

 * perf record: Add --initial-delay option (Andi Kleen)

 * Column colouring improvements in 'diff' (Ramkumar Ramachandra)

Fixes:

 * Don't show counter information when workload fails (Arnaldo Carvalho de Melo)

 * Fixup leak on error path in parse events test. (Arnaldo Carvalho de Melo)

 * Fix --delay option in 'stat' man page (Andi Kleen)

 * Use the DWARF unwind info only if loaded (Jean Pihet):

Developer stuff:

 * Improve forked workload error reporting by sending the errno in the signal
   data queueing integer field, using sigqueue and by doing the signal setup in
   the evlist methods, removing open coded equivalents in various tools. (Arnaldo Carvalho de Melo)

 * Do more auto exit cleanup shores in the 'evlist' destructor, so that the tools
   don't have to all do that sequence. (Arnaldo Carvalho de Melo)

 * Pack 'struct perf_session_env' and 'struct trace' (Arnaldo Carvalho de Melo)

 * Include tools/lib/api/ in MANIFEST, fixing detached tarballs (Arnaldo Carvalho de Melo)

 * Add test for building detached source tarballs (Arnaldo Carvalho de Melo)

 * Shut up libtracevent plugins make message (Jiri Olsa)

 * Fix installation tests path setup (Jiri Olsa)

 * Fix id_hdr_size initialization (Jiri Olsa)

 * Move some header files from tools/perf/ to tools/include/ to make them available to
   other tools/ dwelling codebases (Namhyung Kim)

 * Fix 'probe' build when DWARF support libraries not present (Arnaldo Carvalho de Melo)

Refactorings:

 * Move logic to warn about kptr_restrict'ed kernels to separate
   function in 'report' (Arnaldo Carvalho de Melo)

 * Move hist browser selection code to separate function (Arnaldo Carvalho de Melo)

 * Move histogram entries collapsing to separate function (Arnaldo Carvalho de Melo)

 * Introduce evlist__for_each() & friends (Arnaldo Carvalho de Melo)

 * Automate setup of FEATURE_CHECK_(C|LD)FLAGS-all variables (Jiri Olsa)

 * Move arch setup into seprate Makefile (Jiri Olsa)

Trivial stuff:

 * Remove misplaced __maybe_unused in 'stat' (Arnaldo Carvalho de Melo)

 * Remove old evsel_list usage in 'record' (Arnaldo Carvalho de Melo)

 * Comment typo fix (Cody P Schafer)

 * Remove unused test-volatile-register-var.c (Yann Droneaud)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/tools/perf/util/include/asm/bug.h b/tools/include/asm/bug.h
similarity index 81%
rename from tools/perf/util/include/asm/bug.h
rename to tools/include/asm/bug.h
index 7fcc681..9e5f484 100644
--- a/tools/perf/util/include/asm/bug.h
+++ b/tools/include/asm/bug.h
@@ -1,5 +1,7 @@
-#ifndef _PERF_ASM_GENERIC_BUG_H
-#define _PERF_ASM_GENERIC_BUG_H
+#ifndef _TOOLS_ASM_BUG_H
+#define _TOOLS_ASM_BUG_H
+
+#include <linux/compiler.h>
 
 #define __WARN_printf(arg...)	do { fprintf(stderr, arg); } while (0)
 
@@ -19,4 +21,5 @@
 			__warned = 1;		\
 	unlikely(__ret_warn_once);		\
 })
-#endif
+
+#endif /* _TOOLS_ASM_BUG_H */
diff --git a/tools/perf/util/include/linux/compiler.h b/tools/include/linux/compiler.h
similarity index 64%
rename from tools/perf/util/include/linux/compiler.h
rename to tools/include/linux/compiler.h
index b003ad7..fbc6665 100644
--- a/tools/perf/util/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -1,5 +1,5 @@
-#ifndef _PERF_LINUX_COMPILER_H_
-#define _PERF_LINUX_COMPILER_H_
+#ifndef _TOOLS_LINUX_COMPILER_H_
+#define _TOOLS_LINUX_COMPILER_H_
 
 #ifndef __always_inline
 # define __always_inline	inline __attribute__((always_inline))
@@ -27,4 +27,12 @@
 # define __weak			__attribute__((weak))
 #endif
 
+#ifndef likely
+# define likely(x)		__builtin_expect(!!(x), 1)
 #endif
+
+#ifndef unlikely
+# define unlikely(x)		__builtin_expect(!!(x), 0)
+#endif
+
+#endif /* _TOOLS_LINUX_COMPILER_H */
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index ca4ab78..f778d48 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -86,8 +86,8 @@
 ifneq ($(OUTPUT),)
 
 define build_output
-	$(if $(VERBOSE:1=),@)+$(MAKE) -C $(OUTPUT) \
-	BUILD_SRC=$(CURDIR)/ -f $(CURDIR)/Makefile $1
+  $(if $(VERBOSE:1=),@)+$(MAKE) -C $(OUTPUT) \
+  BUILD_SRC=$(CURDIR)/ -f $(CURDIR)/Makefile $1
 endef
 
 all: sub-make
@@ -221,23 +221,23 @@
 	$(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $<
 
 define make_version.h
-	(echo '/* This file is automatically generated. Do not modify. */';		\
-	echo \#define VERSION_CODE $(shell						\
-	expr $(VERSION) \* 256 + $(PATCHLEVEL));					\
-	echo '#define EXTRAVERSION ' $(EXTRAVERSION);					\
-	echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"';	\
-	echo '#define FILE_VERSION '$(FILE_VERSION);					\
-	) > $1
+  (echo '/* This file is automatically generated. Do not modify. */';		\
+   echo \#define VERSION_CODE $(shell						\
+   expr $(VERSION) \* 256 + $(PATCHLEVEL));					\
+   echo '#define EXTRAVERSION ' $(EXTRAVERSION);				\
+   echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"';	\
+   echo '#define FILE_VERSION '$(FILE_VERSION);					\
+  ) > $1
 endef
 
 define update_version.h
-	($(call make_version.h, $@.tmp);		\
-	if [ -r $@ ] && cmp -s $@ $@.tmp; then		\
-		rm -f $@.tmp;				\
-	else						\
-		echo '  UPDATE                 $@';	\
-		mv -f $@.tmp $@;			\
-	fi);
+  ($(call make_version.h, $@.tmp);		\
+    if [ -r $@ ] && cmp -s $@ $@.tmp; then	\
+      rm -f $@.tmp;				\
+    else					\
+      echo '  UPDATE                 $@';	\
+      mv -f $@.tmp $@;				\
+    fi);
 endef
 
 ep_version.h: force
@@ -246,13 +246,13 @@
 VERSION_FILES = ep_version.h
 
 define update_dir
-	(echo $1 > $@.tmp;	\
-	if [ -r $@ ] && cmp -s $@ $@.tmp; then		\
-		rm -f $@.tmp;				\
-	else						\
-		echo '  UPDATE                 $@';	\
-		mv -f $@.tmp $@;			\
-	fi);
+  (echo $1 > $@.tmp;				\
+   if [ -r $@ ] && cmp -s $@ $@.tmp; then	\
+     rm -f $@.tmp;				\
+   else						\
+     echo '  UPDATE                 $@';	\
+     mv -f $@.tmp $@;				\
+   fi);
 endef
 
 ## make deps
@@ -262,10 +262,10 @@
 
 # let .d file also depends on the source and header files
 define check_deps
-		@set -e; $(RM) $@; \
-		$(CC) -MM $(CFLAGS) $< > $@.$$$$; \
-		sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
-		$(RM) $@.$$$$
+  @set -e; $(RM) $@; \
+  $(CC) -MM $(CFLAGS) $< > $@.$$$$; \
+  sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
+  $(RM) $@.$$$$
 endef
 
 $(all_deps): .%.d: $(src)/%.c
@@ -329,9 +329,12 @@
 
 endif # skip-makefile
 
-PHONY += force
+PHONY += force plugins
 force:
 
+plugins:
+	@echo > /dev/null
+
 # Declare the contents of the .PHONY variable as phony.  We keep that
 # information in a variable so we can use it in if_changed and friends.
 .PHONY: $(PHONY)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index c407897..82bffac 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -209,6 +209,10 @@
 inheritance is automatically disabled.  --per-thread is ignored with a warning
 if combined with -a or -C options.
 
+--initial-delay msecs::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 80c7da6..29ee857 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -133,7 +133,7 @@
 core number and the number of online logical processors on that physical processor.
 
 -D msecs::
---initial-delay msecs::
+--delay msecs::
 After starting the program, wait msecs before measuring. This is useful to
 filter out the startup phase of the program, which is often very different.
 
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 3170a7f..f41572d 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,9 +1,11 @@
 tools/perf
 tools/scripts
 tools/lib/traceevent
-tools/lib/lk
+tools/lib/api
 tools/lib/symbol/kallsyms.c
 tools/lib/symbol/kallsyms.h
+tools/include/asm/bug.h
+tools/include/linux/compiler.h
 include/linux/const.h
 include/linux/perf_event.h
 include/linux/rbtree.h
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 3638b0b..87d7726 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -211,7 +211,7 @@
 LIB_H += ../../include/linux/stringify.h
 LIB_H += util/include/linux/bitmap.h
 LIB_H += util/include/linux/bitops.h
-LIB_H += util/include/linux/compiler.h
+LIB_H += ../include/linux/compiler.h
 LIB_H += util/include/linux/const.h
 LIB_H += util/include/linux/ctype.h
 LIB_H += util/include/linux/kernel.h
@@ -226,7 +226,7 @@
 LIB_H += util/include/linux/types.h
 LIB_H += util/include/linux/linkage.h
 LIB_H += util/include/asm/asm-offsets.h
-LIB_H += util/include/asm/bug.h
+LIB_H += ../include/asm/bug.h
 LIB_H += util/include/asm/byteorder.h
 LIB_H += util/include/asm/hweight.h
 LIB_H += util/include/asm/swab.h
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index ab65057..0da603b 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -232,7 +232,7 @@
 		perf_session__fprintf_dsos(session, stdout);
 
 	total_nr_samples = 0;
-	list_for_each_entry(pos, &session->evlist->entries, node) {
+	evlist__for_each(session->evlist, pos) {
 		struct hists *hists = &pos->hists;
 		u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
 
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index e6a0844..a77e312 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -356,9 +356,10 @@
 {
 	struct perf_evsel *e;
 
-	list_for_each_entry(e, &evlist->entries, node)
+	evlist__for_each(evlist, e) {
 		if (perf_evsel__match2(evsel, e))
 			return e;
+	}
 
 	return NULL;
 }
@@ -367,7 +368,7 @@
 {
 	struct perf_evsel *evsel;
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		struct hists *hists = &evsel->hists;
 
 		hists__collapse_resort(hists, NULL);
@@ -614,7 +615,7 @@
 	struct perf_evsel *evsel_base;
 	bool first = true;
 
-	list_for_each_entry(evsel_base, &evlist_base->entries, node) {
+	evlist__for_each(evlist_base, evsel_base) {
 		struct data__file *d;
 		int i;
 
@@ -769,6 +770,81 @@
 	return ret;
 }
 
+static int __hpp__color_compare(struct perf_hpp_fmt *fmt,
+				struct perf_hpp *hpp, struct hist_entry *he,
+				int comparison_method)
+{
+	struct diff_hpp_fmt *dfmt =
+		container_of(fmt, struct diff_hpp_fmt, fmt);
+	struct hist_entry *pair = get_pair_fmt(he, dfmt);
+	double diff;
+	s64 wdiff;
+	char pfmt[20] = " ";
+
+	if (!pair)
+		goto dummy_print;
+
+	switch (comparison_method) {
+	case COMPUTE_DELTA:
+		if (pair->diff.computed)
+			diff = pair->diff.period_ratio_delta;
+		else
+			diff = compute_delta(he, pair);
+
+		if (fabs(diff) < 0.01)
+			goto dummy_print;
+		scnprintf(pfmt, 20, "%%%+d.2f%%%%", dfmt->header_width - 1);
+		return percent_color_snprintf(hpp->buf, hpp->size,
+					pfmt, diff);
+	case COMPUTE_RATIO:
+		if (he->dummy)
+			goto dummy_print;
+		if (pair->diff.computed)
+			diff = pair->diff.period_ratio;
+		else
+			diff = compute_ratio(he, pair);
+
+		scnprintf(pfmt, 20, "%%%d.6f", dfmt->header_width);
+		return value_color_snprintf(hpp->buf, hpp->size,
+					pfmt, diff);
+	case COMPUTE_WEIGHTED_DIFF:
+		if (he->dummy)
+			goto dummy_print;
+		if (pair->diff.computed)
+			wdiff = pair->diff.wdiff;
+		else
+			wdiff = compute_wdiff(he, pair);
+
+		scnprintf(pfmt, 20, "%%14ld", dfmt->header_width);
+		return color_snprintf(hpp->buf, hpp->size,
+				get_percent_color(wdiff),
+				pfmt, wdiff);
+	default:
+		BUG_ON(1);
+	}
+dummy_print:
+	return scnprintf(hpp->buf, hpp->size, "%*s",
+			dfmt->header_width, pfmt);
+}
+
+static int hpp__color_delta(struct perf_hpp_fmt *fmt,
+			struct perf_hpp *hpp, struct hist_entry *he)
+{
+	return __hpp__color_compare(fmt, hpp, he, COMPUTE_DELTA);
+}
+
+static int hpp__color_ratio(struct perf_hpp_fmt *fmt,
+			struct perf_hpp *hpp, struct hist_entry *he)
+{
+	return __hpp__color_compare(fmt, hpp, he, COMPUTE_RATIO);
+}
+
+static int hpp__color_wdiff(struct perf_hpp_fmt *fmt,
+			struct perf_hpp *hpp, struct hist_entry *he)
+{
+	return __hpp__color_compare(fmt, hpp, he, COMPUTE_WEIGHTED_DIFF);
+}
+
 static void
 hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size)
 {
@@ -940,8 +1016,22 @@
 	fmt->entry  = hpp__entry_global;
 
 	/* TODO more colors */
-	if (idx == PERF_HPP_DIFF__BASELINE)
+	switch (idx) {
+	case PERF_HPP_DIFF__BASELINE:
 		fmt->color = hpp__color_baseline;
+		break;
+	case PERF_HPP_DIFF__DELTA:
+		fmt->color = hpp__color_delta;
+		break;
+	case PERF_HPP_DIFF__RATIO:
+		fmt->color = hpp__color_ratio;
+		break;
+	case PERF_HPP_DIFF__WEIGHTED_DIFF:
+		fmt->color = hpp__color_wdiff;
+		break;
+	default:
+		break;
+	}
 
 	init_header(d, dfmt);
 	perf_hpp__column_register(fmt);
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index 20b0f12..c99e0de 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -29,7 +29,7 @@
 	if (session == NULL)
 		return -ENOMEM;
 
-	list_for_each_entry(pos, &session->evlist->entries, node)
+	evlist__for_each(session->evlist, pos)
 		perf_evsel__fprintf(pos, details, stdout);
 
 	perf_session__delete(session);
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index c9f6d74..b346601 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -369,7 +369,7 @@
 
 		inject->tool.ordered_samples = true;
 
-		list_for_each_entry(evsel, &session->evlist->entries, node) {
+		evlist__for_each(session->evlist, evsel) {
 			const char *name = perf_evsel__name(evsel);
 
 			if (!strcmp(name, "sched:sched_switch")) {
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index a6ec105..a735051 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1174,7 +1174,7 @@
 	 * Note: exclude_{guest,host} do not apply here.
 	 *       This command processes KVM tracepoints from host only
 	 */
-	list_for_each_entry(pos, &evlist->entries, node) {
+	evlist__for_each(evlist, pos) {
 		struct perf_event_attr *attr = &pos->attr;
 
 		/* make sure these *are* set */
@@ -1556,10 +1556,8 @@
 	if (kvm->session)
 		perf_session__delete(kvm->session);
 	kvm->session = NULL;
-	if (kvm->evlist) {
-		perf_evlist__delete_maps(kvm->evlist);
+	if (kvm->evlist)
 		perf_evlist__delete(kvm->evlist);
-	}
 
 	return err;
 }
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6ec0cbc..07d4cf8 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -183,7 +183,7 @@
 
 	perf_evlist__config(evlist, opts);
 
-	list_for_each_entry(pos, &evlist->entries, node) {
+	evlist__for_each(evlist, pos) {
 try_again:
 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
@@ -324,7 +324,6 @@
 
 static void record__init_features(struct record *rec)
 {
-	struct perf_evlist *evsel_list = rec->evlist;
 	struct perf_session *session = rec->session;
 	int feat;
 
@@ -334,13 +333,29 @@
 	if (rec->no_buildid)
 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
 
-	if (!have_tracepoints(&evsel_list->entries))
+	if (!have_tracepoints(&rec->evlist->entries))
 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
 
 	if (!rec->opts.branch_stack)
 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
 }
 
+static volatile int workload_exec_errno;
+
+/*
+ * perf_evlist__prepare_workload will send a SIGUSR1
+ * if the fork fails, since we asked by setting its
+ * want_signal to true.
+ */
+static void workload_exec_failed_signal(int signo, siginfo_t *info,
+					void *ucontext __maybe_unused)
+{
+	workload_exec_errno = info->si_value.sival_int;
+	done = 1;
+	signr = signo;
+	child_finished = 1;
+}
+
 static int __cmd_record(struct record *rec, int argc, const char **argv)
 {
 	int err;
@@ -349,7 +364,6 @@
 	struct machine *machine;
 	struct perf_tool *tool = &rec->tool;
 	struct record_opts *opts = &rec->opts;
-	struct perf_evlist *evsel_list = rec->evlist;
 	struct perf_data_file *file = &rec->file;
 	struct perf_session *session;
 	bool disabled = false;
@@ -359,7 +373,6 @@
 	on_exit(record__sig_exit, rec);
 	signal(SIGCHLD, sig_handler);
 	signal(SIGINT, sig_handler);
-	signal(SIGUSR1, sig_handler);
 	signal(SIGTERM, sig_handler);
 
 	session = perf_session__new(file, false, NULL);
@@ -373,9 +386,9 @@
 	record__init_features(rec);
 
 	if (forks) {
-		err = perf_evlist__prepare_workload(evsel_list, &opts->target,
+		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
 						    argv, file->is_pipe,
-						    true);
+						    workload_exec_failed_signal);
 		if (err < 0) {
 			pr_err("Couldn't run the workload!\n");
 			goto out_delete_session;
@@ -387,7 +400,7 @@
 		goto out_delete_session;
 	}
 
-	if (!evsel_list->nr_groups)
+	if (!rec->evlist->nr_groups)
 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
 
 	/*
@@ -400,7 +413,7 @@
 		if (err < 0)
 			goto out_delete_session;
 	} else {
-		err = perf_session__write_header(session, evsel_list,
+		err = perf_session__write_header(session, rec->evlist,
 						 file->fd, false);
 		if (err < 0)
 			goto out_delete_session;
@@ -424,7 +437,7 @@
 			goto out_delete_session;
 		}
 
-		if (have_tracepoints(&evsel_list->entries)) {
+		if (have_tracepoints(&rec->evlist->entries)) {
 			/*
 			 * FIXME err <= 0 here actually means that
 			 * there were no tracepoints so its not really
@@ -433,7 +446,7 @@
 			 * return this more properly and also
 			 * propagate errors that now are calling die()
 			 */
-			err = perf_event__synthesize_tracing_data(tool, file->fd, evsel_list,
+			err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist,
 								  process_synthesized_event);
 			if (err <= 0) {
 				pr_err("Couldn't record tracing data.\n");
@@ -465,7 +478,7 @@
 					 perf_event__synthesize_guest_os, tool);
 	}
 
-	err = __machine__synthesize_threads(machine, tool, &opts->target, evsel_list->threads,
+	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
 					    process_synthesized_event, opts->sample_address);
 	if (err != 0)
 		goto out_delete_session;
@@ -486,14 +499,19 @@
 	 * (apart from group members) have enable_on_exec=1 set,
 	 * so don't spoil it by prematurely enabling them.
 	 */
-	if (!target__none(&opts->target))
-		perf_evlist__enable(evsel_list);
+	if (!target__none(&opts->target) && !opts->initial_delay)
+		perf_evlist__enable(rec->evlist);
 
 	/*
 	 * Let the child rip
 	 */
 	if (forks)
-		perf_evlist__start_workload(evsel_list);
+		perf_evlist__start_workload(rec->evlist);
+
+	if (opts->initial_delay) {
+		usleep(opts->initial_delay * 1000);
+		perf_evlist__enable(rec->evlist);
+	}
 
 	for (;;) {
 		int hits = rec->samples;
@@ -506,7 +524,7 @@
 		if (hits == rec->samples) {
 			if (done)
 				break;
-			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
+			err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
 			waking++;
 		}
 
@@ -516,11 +534,19 @@
 		 * disable events in this case.
 		 */
 		if (done && !disabled && !target__none(&opts->target)) {
-			perf_evlist__disable(evsel_list);
+			perf_evlist__disable(rec->evlist);
 			disabled = true;
 		}
 	}
 
+	if (forks && workload_exec_errno) {
+		char msg[512];
+		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
+		pr_err("Workload failed: %s\n", emsg);
+		err = -1;
+		goto out_delete_session;
+	}
+
 	if (quiet || signr == SIGUSR1)
 		return 0;
 
@@ -856,6 +882,8 @@
 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
 		     "monitor event in cgroup name only",
 		     parse_cgroups),
+	OPT_UINTEGER(0, "initial-delay", &record.opts.initial_delay,
+		  "ms to wait before starting measurement after program start"),
 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
 		   "user to profile"),
 
@@ -878,16 +906,13 @@
 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	int err = -ENOMEM;
-	struct perf_evlist *evsel_list;
 	struct record *rec = &record;
 	char errbuf[BUFSIZ];
 
-	evsel_list = perf_evlist__new();
-	if (evsel_list == NULL)
+	rec->evlist = perf_evlist__new();
+	if (rec->evlist == NULL)
 		return -ENOMEM;
 
-	rec->evlist = evsel_list;
-
 	argc = parse_options(argc, argv, record_options, record_usage,
 			    PARSE_OPT_STOP_AT_NON_OPTION);
 	if (!argc && target__none(&rec->opts.target))
@@ -914,8 +939,8 @@
 	if (rec->no_buildid_cache || rec->no_buildid)
 		disable_buildid_cache();
 
-	if (evsel_list->nr_entries == 0 &&
-	    perf_evlist__add_default(evsel_list) < 0) {
+	if (rec->evlist->nr_entries == 0 &&
+	    perf_evlist__add_default(rec->evlist) < 0) {
 		pr_err("Not enough memory for event selector list\n");
 		goto out_symbol_exit;
 	}
@@ -941,20 +966,15 @@
 	}
 
 	err = -ENOMEM;
-	if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
+	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
 		usage_with_options(record_usage, record_options);
 
 	if (record_opts__config(&rec->opts)) {
 		err = -EINVAL;
-		goto out_free_fd;
+		goto out_symbol_exit;
 	}
 
 	err = __cmd_record(&record, argc, argv);
-
-	perf_evlist__munmap(evsel_list);
-	perf_evlist__close(evsel_list);
-out_free_fd:
-	perf_evlist__delete_maps(evsel_list);
 out_symbol_exit:
 	symbol__exit();
 	return err;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index bf8dd2e..46864dd 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -384,7 +384,7 @@
 {
 	struct perf_evsel *pos;
 
-	list_for_each_entry(pos, &evlist->entries, node) {
+	evlist__for_each(evlist, pos) {
 		struct hists *hists = &pos->hists;
 		const char *evname = perf_evsel__name(pos);
 
@@ -412,40 +412,11 @@
 	return 0;
 }
 
-static int __cmd_report(struct report *rep)
+static void report__warn_kptr_restrict(const struct report *rep)
 {
-	int ret = -EINVAL;
-	u64 nr_samples;
-	struct perf_session *session = rep->session;
-	struct perf_evsel *pos;
-	struct map *kernel_map;
-	struct kmap *kernel_kmap;
-	const char *help = "For a higher level overview, try: perf report --sort comm,dso";
-	struct ui_progress prog;
-	struct perf_data_file *file = session->file;
+	struct map *kernel_map = rep->session->machines.host.vmlinux_maps[MAP__FUNCTION];
+	struct kmap *kernel_kmap = map__kmap(kernel_map);
 
-	signal(SIGINT, sig_handler);
-
-	if (rep->cpu_list) {
-		ret = perf_session__cpu_bitmap(session, rep->cpu_list,
-					       rep->cpu_bitmap);
-		if (ret)
-			return ret;
-	}
-
-	if (rep->show_threads)
-		perf_read_values_init(&rep->show_threads_values);
-
-	ret = report__setup_sample_type(rep);
-	if (ret)
-		return ret;
-
-	ret = perf_session__process_events(session, &rep->tool);
-	if (ret)
-		return ret;
-
-	kernel_map = session->machines.host.vmlinux_maps[MAP__FUNCTION];
-	kernel_kmap = map__kmap(kernel_map);
 	if (kernel_map == NULL ||
 	    (kernel_map->dso->hit &&
 	     (kernel_kmap->ref_reloc_sym == NULL ||
@@ -468,28 +439,73 @@
 "Samples in kernel modules can't be resolved as well.\n\n",
 		desc);
 	}
+}
 
-	if (use_browser == 0) {
-		if (verbose > 3)
-			perf_session__fprintf(session, stdout);
+static int report__gtk_browse_hists(struct report *rep, const char *help)
+{
+	int (*hist_browser)(struct perf_evlist *evlist, const char *help,
+			    struct hist_browser_timer *timer, float min_pcnt);
 
-		if (verbose > 2)
-			perf_session__fprintf_dsos(session, stdout);
+	hist_browser = dlsym(perf_gtk_handle, "perf_evlist__gtk_browse_hists");
 
-		if (dump_trace) {
-			perf_session__fprintf_nr_events(session, stdout);
-			return 0;
-		}
+	if (hist_browser == NULL) {
+		ui__error("GTK browser not found!\n");
+		return -1;
 	}
 
-	nr_samples = 0;
-	list_for_each_entry(pos, &session->evlist->entries, node)
+	return hist_browser(rep->session->evlist, help, NULL, rep->min_percent);
+}
+
+static int report__browse_hists(struct report *rep)
+{
+	int ret;
+	struct perf_session *session = rep->session;
+	struct perf_evlist *evlist = session->evlist;
+	const char *help = "For a higher level overview, try: perf report --sort comm,dso";
+
+	switch (use_browser) {
+	case 1:
+		ret = perf_evlist__tui_browse_hists(evlist, help, NULL,
+						    rep->min_percent,
+						    &session->header.env);
+		/*
+		 * Usually "ret" is the last pressed key, and we only
+		 * care if the key notifies us to switch data file.
+		 */
+		if (ret != K_SWITCH_INPUT_DATA)
+			ret = 0;
+		break;
+	case 2:
+		ret = report__gtk_browse_hists(rep, help);
+		break;
+	default:
+		ret = perf_evlist__tty_browse_hists(evlist, rep, help);
+		break;
+	}
+
+	return ret;
+}
+
+static u64 report__collapse_hists(struct report *rep)
+{
+	struct ui_progress prog;
+	struct perf_evsel *pos;
+	u64 nr_samples = 0;
+	/*
+ 	 * Count number of histogram entries to use when showing progress,
+ 	 * reusing nr_samples variable.
+ 	 */
+	evlist__for_each(rep->session->evlist, pos)
 		nr_samples += pos->hists.nr_entries;
 
 	ui_progress__init(&prog, nr_samples, "Merging related events...");
-
+	/*
+	 * Count total number of samples, will be used to check if this
+ 	 * session had any.
+ 	 */
 	nr_samples = 0;
-	list_for_each_entry(pos, &session->evlist->entries, node) {
+
+	evlist__for_each(rep->session->evlist, pos) {
 		struct hists *hists = &pos->hists;
 
 		if (pos->idx == 0)
@@ -507,8 +523,57 @@
 			hists__link(leader_hists, hists);
 		}
 	}
+
 	ui_progress__finish();
 
+	return nr_samples;
+}
+
+static int __cmd_report(struct report *rep)
+{
+	int ret;
+	u64 nr_samples;
+	struct perf_session *session = rep->session;
+	struct perf_evsel *pos;
+	struct perf_data_file *file = session->file;
+
+	signal(SIGINT, sig_handler);
+
+	if (rep->cpu_list) {
+		ret = perf_session__cpu_bitmap(session, rep->cpu_list,
+					       rep->cpu_bitmap);
+		if (ret)
+			return ret;
+	}
+
+	if (rep->show_threads)
+		perf_read_values_init(&rep->show_threads_values);
+
+	ret = report__setup_sample_type(rep);
+	if (ret)
+		return ret;
+
+	ret = perf_session__process_events(session, &rep->tool);
+	if (ret)
+		return ret;
+
+	report__warn_kptr_restrict(rep);
+
+	if (use_browser == 0) {
+		if (verbose > 3)
+			perf_session__fprintf(session, stdout);
+
+		if (verbose > 2)
+			perf_session__fprintf_dsos(session, stdout);
+
+		if (dump_trace) {
+			perf_session__fprintf_nr_events(session, stdout);
+			return 0;
+		}
+	}
+
+	nr_samples = report__collapse_hists(rep);
+
 	if (session_done())
 		return 0;
 
@@ -517,41 +582,10 @@
 		return 0;
 	}
 
-	list_for_each_entry(pos, &session->evlist->entries, node)
+	evlist__for_each(session->evlist, pos)
 		hists__output_resort(&pos->hists);
 
-	if (use_browser > 0) {
-		if (use_browser == 1) {
-			ret = perf_evlist__tui_browse_hists(session->evlist,
-							help, NULL,
-							rep->min_percent,
-							&session->header.env);
-			/*
-			 * Usually "ret" is the last pressed key, and we only
-			 * care if the key notifies us to switch data file.
-			 */
-			if (ret != K_SWITCH_INPUT_DATA)
-				ret = 0;
-
-		} else if (use_browser == 2) {
-			int (*hist_browser)(struct perf_evlist *,
-					    const char *,
-					    struct hist_browser_timer *,
-					    float min_pcnt);
-
-			hist_browser = dlsym(perf_gtk_handle,
-					     "perf_evlist__gtk_browse_hists");
-			if (hist_browser == NULL) {
-				ui__error("GTK browser not found!\n");
-				return ret;
-			}
-			hist_browser(session->evlist, help, NULL,
-				     rep->min_percent);
-		}
-	} else
-		perf_evlist__tty_browse_hists(session->evlist, rep, help);
-
-	return ret;
+	return report__browse_hists(rep);
 }
 
 static int
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 6040000..9e9c91f 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -603,7 +603,7 @@
 	if (evsel->attr.type >= PERF_TYPE_MAX)
 		return 0;
 
-	list_for_each_entry(pos, &evlist->entries, node) {
+	evlist__for_each(evlist, pos) {
 		if (pos->attr.type == evsel->attr.type && pos != evsel)
 			return 0;
 	}
@@ -1309,8 +1309,7 @@
 			snprintf(evname, len + 1, "%s", p);
 
 			match = 0;
-			list_for_each_entry(pos,
-					&session->evlist->entries, node) {
+			evlist__for_each(session->evlist, pos) {
 				if (!strcmp(perf_evsel__name(pos), evname)) {
 					match = 1;
 					break;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 106a5e5..8b0e1c9 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -214,7 +214,7 @@
 {
 	struct perf_evsel *evsel;
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		perf_evsel__free_stat_priv(evsel);
 		perf_evsel__free_counts(evsel);
 		perf_evsel__free_prev_raw_counts(evsel);
@@ -225,7 +225,7 @@
 {
 	struct perf_evsel *evsel;
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
 		    perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 ||
 		    (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0))
@@ -259,7 +259,7 @@
 {
 	struct perf_evsel *evsel;
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		perf_evsel__reset_stat_priv(evsel);
 		perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
 	}
@@ -326,13 +326,13 @@
 
 	/* Assumes this only called when evsel_list does not change anymore. */
 	if (!array) {
-		list_for_each_entry(ev, &evsel_list->entries, node)
+		evlist__for_each(evsel_list, ev)
 			array_len++;
 		array = malloc(array_len * sizeof(void *));
 		if (!array)
 			exit(ENOMEM);
 		j = 0;
-		list_for_each_entry(ev, &evsel_list->entries, node)
+		evlist__for_each(evsel_list, ev)
 			array[j++] = ev;
 	}
 	if (n < array_len)
@@ -440,13 +440,13 @@
 	char prefix[64];
 
 	if (aggr_mode == AGGR_GLOBAL) {
-		list_for_each_entry(counter, &evsel_list->entries, node) {
+		evlist__for_each(evsel_list, counter) {
 			ps = counter->priv;
 			memset(ps->res_stats, 0, sizeof(ps->res_stats));
 			read_counter_aggr(counter);
 		}
 	} else	{
-		list_for_each_entry(counter, &evsel_list->entries, node) {
+		evlist__for_each(evsel_list, counter) {
 			ps = counter->priv;
 			memset(ps->res_stats, 0, sizeof(ps->res_stats));
 			read_counter(counter);
@@ -483,12 +483,12 @@
 		print_aggr(prefix);
 		break;
 	case AGGR_NONE:
-		list_for_each_entry(counter, &evsel_list->entries, node)
+		evlist__for_each(evsel_list, counter)
 			print_counter(counter, prefix);
 		break;
 	case AGGR_GLOBAL:
 	default:
-		list_for_each_entry(counter, &evsel_list->entries, node)
+		evlist__for_each(evsel_list, counter)
 			print_counter_aggr(counter, prefix);
 	}
 
@@ -504,11 +504,24 @@
 			nthreads = thread_map__nr(evsel_list->threads);
 
 		usleep(initial_delay * 1000);
-		list_for_each_entry(counter, &evsel_list->entries, node)
+		evlist__for_each(evsel_list, counter)
 			perf_evsel__enable(counter, ncpus, nthreads);
 	}
 }
 
+static volatile int workload_exec_errno;
+
+/*
+ * perf_evlist__prepare_workload will send a SIGUSR1
+ * if the fork fails, since we asked by setting its
+ * want_signal to true.
+ */
+static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
+					void *ucontext __maybe_unused)
+{
+	workload_exec_errno = info->si_value.sival_int;
+}
+
 static int __run_perf_stat(int argc, const char **argv)
 {
 	char msg[512];
@@ -528,8 +541,8 @@
 	}
 
 	if (forks) {
-		if (perf_evlist__prepare_workload(evsel_list, &target, argv,
-						  false, false) < 0) {
+		if (perf_evlist__prepare_workload(evsel_list, &target, argv, false,
+						  workload_exec_failed_signal) < 0) {
 			perror("failed to prepare workload");
 			return -1;
 		}
@@ -539,7 +552,7 @@
 	if (group)
 		perf_evlist__set_leader(evsel_list);
 
-	list_for_each_entry(counter, &evsel_list->entries, node) {
+	evlist__for_each(evsel_list, counter) {
 		if (create_perf_stat_counter(counter) < 0) {
 			/*
 			 * PPC returns ENXIO for HW counters until 2.6.37
@@ -594,6 +607,13 @@
 			}
 		}
 		wait(&status);
+
+		if (workload_exec_errno) {
+			const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
+			pr_err("Workload failed: %s\n", emsg);
+			return -1;
+		}
+
 		if (WIFSIGNALED(status))
 			psignal(WTERMSIG(status), argv[0]);
 	} else {
@@ -610,13 +630,13 @@
 	update_stats(&walltime_nsecs_stats, t1 - t0);
 
 	if (aggr_mode == AGGR_GLOBAL) {
-		list_for_each_entry(counter, &evsel_list->entries, node) {
+		evlist__for_each(evsel_list, counter) {
 			read_counter_aggr(counter);
 			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
 					     thread_map__nr(evsel_list->threads));
 		}
 	} else {
-		list_for_each_entry(counter, &evsel_list->entries, node) {
+		evlist__for_each(evsel_list, counter) {
 			read_counter(counter);
 			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
 		}
@@ -625,7 +645,7 @@
 	return WEXITSTATUS(status);
 }
 
-static int run_perf_stat(int argc __maybe_unused, const char **argv)
+static int run_perf_stat(int argc, const char **argv)
 {
 	int ret;
 
@@ -1097,7 +1117,7 @@
 
 	for (s = 0; s < aggr_map->nr; s++) {
 		id = aggr_map->map[s];
-		list_for_each_entry(counter, &evsel_list->entries, node) {
+		evlist__for_each(evsel_list, counter) {
 			val = ena = run = 0;
 			nr = 0;
 			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
@@ -1308,11 +1328,11 @@
 		print_aggr(NULL);
 		break;
 	case AGGR_GLOBAL:
-		list_for_each_entry(counter, &evsel_list->entries, node)
+		evlist__for_each(evsel_list, counter)
 			print_counter_aggr(counter, NULL);
 		break;
 	case AGGR_NONE:
-		list_for_each_entry(counter, &evsel_list->entries, node)
+		evlist__for_each(evsel_list, counter)
 			print_counter(counter, NULL);
 		break;
 	default:
@@ -1762,14 +1782,14 @@
 	if (interval && interval < 100) {
 		pr_err("print interval must be >= 100ms\n");
 		parse_options_usage(stat_usage, options, "I", 1);
-		goto out_free_maps;
+		goto out;
 	}
 
 	if (perf_evlist__alloc_stats(evsel_list, interval))
-		goto out_free_maps;
+		goto out;
 
 	if (perf_stat_init_aggr_mode())
-		goto out_free_maps;
+		goto out;
 
 	/*
 	 * We dont want to block the signals - that would cause
@@ -1801,8 +1821,6 @@
 		print_stat(argc, argv);
 
 	perf_evlist__free_stats(evsel_list);
-out_free_maps:
-	perf_evlist__delete_maps(evsel_list);
 out:
 	perf_evlist__delete(evsel_list);
 	return status;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 172e91a..569dd87 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -482,7 +482,7 @@
 
 				fprintf(stderr, "\nAvailable events:");
 
-				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
+				evlist__for_each(top->evlist, top->sym_evsel)
 					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));
 
 				prompt_integer(&counter, "Enter details event counter");
@@ -493,7 +493,7 @@
 					sleep(1);
 					break;
 				}
-				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
+				evlist__for_each(top->evlist, top->sym_evsel)
 					if (top->sym_evsel->idx == counter)
 						break;
 			} else
@@ -575,7 +575,7 @@
 	 * Zooming in/out UIDs. For now juse use whatever the user passed
 	 * via --uid.
 	 */
-	list_for_each_entry(pos, &top->evlist->entries, node)
+	evlist__for_each(top->evlist, pos)
 		pos->hists.uid_filter_str = top->record_opts.target.uid_str;
 
 	perf_evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent,
@@ -858,7 +858,7 @@
 
 	perf_evlist__config(evlist, opts);
 
-	list_for_each_entry(counter, &evlist->entries, node) {
+	evlist__for_each(evlist, counter) {
 try_again:
 		if (perf_evsel__open(counter, top->evlist->cpus,
 				     top->evlist->threads) < 0) {
@@ -1171,7 +1171,7 @@
 	if (!top.evlist->nr_entries &&
 	    perf_evlist__add_default(top.evlist) < 0) {
 		ui__error("Not enough memory for event selector list\n");
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	symbol_conf.nr_events = top.evlist->nr_entries;
@@ -1181,7 +1181,7 @@
 
 	if (record_opts__config(opts)) {
 		status = -EINVAL;
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	top.sym_evsel = perf_evlist__first(top.evlist);
@@ -1206,8 +1206,6 @@
 
 	status = __cmd_top(&top);
 
-out_delete_maps:
-	perf_evlist__delete_maps(top.evlist);
 out_delete_evlist:
 	perf_evlist__delete(top.evlist);
 
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index c5b4bc5..4bd44ab 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1160,26 +1160,27 @@
 	struct record_opts	opts;
 	struct machine		*host;
 	u64			base_time;
-	bool			full_time;
 	FILE			*output;
 	unsigned long		nr_events;
 	struct strlist		*ev_qualifier;
-	bool			not_ev_qualifier;
-	bool			live;
 	const char 		*last_vfs_getname;
 	struct intlist		*tid_list;
 	struct intlist		*pid_list;
+	double			duration_filter;
+	double			runtime_ms;
+	struct {
+		u64		vfs_getname,
+				proc_getname;
+	} stats;
+	bool			not_ev_qualifier;
+	bool			live;
+	bool			full_time;
 	bool			sched;
 	bool			multiple_threads;
 	bool			summary;
 	bool			summary_only;
 	bool			show_comm;
 	bool			show_tool_stats;
-	double			duration_filter;
-	double			runtime_ms;
-	struct {
-		u64		vfs_getname, proc_getname;
-	} stats;
 };
 
 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
@@ -1885,7 +1886,7 @@
 	err = trace__symbols_init(trace, evlist);
 	if (err < 0) {
 		fprintf(trace->output, "Problems initializing symbol libraries!\n");
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	perf_evlist__config(evlist, &trace->opts);
@@ -1895,10 +1896,10 @@
 
 	if (forks) {
 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
-						    argv, false, false);
+						    argv, false, NULL);
 		if (err < 0) {
 			fprintf(trace->output, "Couldn't run the workload!\n");
-			goto out_delete_maps;
+			goto out_delete_evlist;
 		}
 	}
 
@@ -1909,7 +1910,7 @@
 	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
 	if (err < 0) {
 		fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
-		goto out_close_evlist;
+		goto out_delete_evlist;
 	}
 
 	perf_evlist__enable(evlist);
@@ -1993,11 +1994,6 @@
 		}
 	}
 
-	perf_evlist__munmap(evlist);
-out_close_evlist:
-	perf_evlist__close(evlist);
-out_delete_maps:
-	perf_evlist__delete_maps(evlist);
 out_delete_evlist:
 	perf_evlist__delete(evlist);
 out:
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 14faeeb..01dd43d 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -1,28 +1,26 @@
-uname_M := $(shell uname -m 2>/dev/null || echo not)
 
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
-                                  -e s/arm.*/arm/ -e s/sa110/arm/ \
-                                  -e s/s390x/s390/ -e s/parisc64/parisc/ \
-                                  -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
-                                  -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ )
-NO_PERF_REGS := 1
-CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS)
-
-# Additional ARCH settings for x86
-ifeq ($(ARCH),i386)
-  override ARCH := x86
-  NO_PERF_REGS := 0
-  LIBUNWIND_LIBS = -lunwind -lunwind-x86
+ifeq ($(src-perf),)
+src-perf := $(srctree)/tools/perf
 endif
 
-ifeq ($(ARCH),x86_64)
-  override ARCH := x86
-  IS_X86_64 := 0
-  ifeq (, $(findstring m32,$(CFLAGS)))
-    IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -x c - | tail -n 1)
-  endif
+ifeq ($(obj-perf),)
+obj-perf := $(OUTPUT)
+endif
+
+ifneq ($(obj-perf),)
+obj-perf := $(abspath $(obj-perf))/
+endif
+
+LIB_INCLUDE := $(srctree)/tools/lib/
+CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS)
+
+include $(src-perf)/config/Makefile.arch
+
+NO_PERF_REGS := 1
+
+# Additional ARCH settings for x86
+ifeq ($(ARCH),x86)
   ifeq (${IS_X86_64}, 1)
-    RAW_ARCH := x86_64
     CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT
     ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
     LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
@@ -55,29 +53,12 @@
   FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS)
   FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
   FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS)
-  #  and the flags for the test-all case
-  FEATURE_CHECK_CFLAGS-all += $(LIBUNWIND_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-all += $(LIBUNWIND_LDFLAGS)
 endif
 
 ifeq ($(NO_PERF_REGS),0)
   CFLAGS += -DHAVE_PERF_REGS_SUPPORT
 endif
 
-ifeq ($(src-perf),)
-src-perf := $(srctree)/tools/perf
-endif
-
-ifeq ($(obj-perf),)
-obj-perf := $(OUTPUT)
-endif
-
-ifneq ($(obj-perf),)
-obj-perf := $(abspath $(obj-perf))/
-endif
-
-LIB_INCLUDE := $(srctree)/tools/lib/
-
 # include ARCH specific config
 -include $(src-perf)/arch/$(ARCH)/Makefile
 
@@ -168,6 +149,17 @@
 	stackprotector-all		\
 	timerfd
 
+# Set FEATURE_CHECK_(C|LD)FLAGS-all for all CORE_FEATURE_TESTS features.
+# If in the future we need per-feature checks/flags for features not
+# mentioned in this list we need to refactor this ;-).
+set_test_all_flags = $(eval $(set_test_all_flags_code))
+define set_test_all_flags_code
+  FEATURE_CHECK_CFLAGS-all  += $(FEATURE_CHECK_CFLAGS-$(1))
+  FEATURE_CHECK_LDFLAGS-all += $(FEATURE_CHECK_LDFLAGS-$(1))
+endef
+
+$(foreach feat,$(CORE_FEATURE_TESTS),$(call set_test_all_flags,$(feat)))
+
 #
 # So here we detect whether test-all was rebuilt, to be able
 # to skip the print-out of the long features list if the file
@@ -240,6 +232,7 @@
 
 CFLAGS += -I$(src-perf)/util/include
 CFLAGS += -I$(src-perf)/arch/$(ARCH)/include
+CFLAGS += -I$(srctree)/tools/include/
 CFLAGS += -I$(srctree)/arch/$(ARCH)/include/uapi
 CFLAGS += -I$(srctree)/arch/$(ARCH)/include
 CFLAGS += -I$(srctree)/include/uapi
diff --git a/tools/perf/config/Makefile.arch b/tools/perf/config/Makefile.arch
new file mode 100644
index 0000000..fef8ae9
--- /dev/null
+++ b/tools/perf/config/Makefile.arch
@@ -0,0 +1,22 @@
+
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
+                                  -e s/arm.*/arm/ -e s/sa110/arm/ \
+                                  -e s/s390x/s390/ -e s/parisc64/parisc/ \
+                                  -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
+                                  -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ )
+
+# Additional ARCH settings for x86
+ifeq ($(ARCH),i386)
+  override ARCH := x86
+endif
+
+ifeq ($(ARCH),x86_64)
+  override ARCH := x86
+  IS_X86_64 := 0
+  ifeq (, $(findstring m32,$(CFLAGS)))
+    IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -x c - | tail -n 1)
+    RAW_ARCH := x86_64
+  endif
+endif
diff --git a/tools/perf/config/feature-checks/test-volatile-register-var.c b/tools/perf/config/feature-checks/test-volatile-register-var.c
deleted file mode 100644
index c9f398d..0000000
--- a/tools/perf/config/feature-checks/test-volatile-register-var.c
+++ /dev/null
@@ -1,6 +0,0 @@
-#include <stdio.h>
-
-int main(void)
-{
-	return puts("hi");
-}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index b1cc84b..af1ce6e 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -269,6 +269,7 @@
 	u64	     user_interval;
 	u16	     stack_dump_size;
 	bool	     sample_transaction;
+	unsigned     initial_delay;
 };
 
 #endif
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 4248d1e..653a8fe 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -540,14 +540,11 @@
 		err = TEST_CODE_READING_OK;
 out_err:
 	if (evlist) {
-		perf_evlist__munmap(evlist);
-		perf_evlist__close(evlist);
 		perf_evlist__delete(evlist);
-	}
-	if (cpus)
+	} else {
 		cpu_map__delete(cpus);
-	if (threads)
 		thread_map__delete(threads);
+	}
 	machines__destroy_kernel_maps(&machines);
 	machine__delete_threads(machine);
 	machines__exit(&machines);
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index 0197bda..465cdbc 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -79,7 +79,7 @@
 	}
 
 	err = 0;
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		if (strcmp(perf_evsel__name(evsel), names[evsel->idx])) {
 			--err;
 			pr_debug("%s != %s\n", perf_evsel__name(evsel), names[evsel->idx]);
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 173bf42..2b6519e 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -208,7 +208,7 @@
 	 * However the second evsel also has a collapsed entry for
 	 * "bash [libc] malloc" so total 9 entries will be in the tree.
 	 */
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) {
 			const union perf_event event = {
 				.header = {
@@ -466,7 +466,7 @@
 	if (err < 0)
 		goto out;
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		hists__collapse_resort(&evsel->hists, NULL);
 
 		if (verbose > 2)
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index 27eb751..497957f 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -142,14 +142,11 @@
 out_err:
 	if (evlist) {
 		perf_evlist__disable(evlist);
-		perf_evlist__munmap(evlist);
-		perf_evlist__close(evlist);
 		perf_evlist__delete(evlist);
-	}
-	if (cpus)
+	} else {
 		cpu_map__delete(cpus);
-	if (threads)
 		thread_map__delete(threads);
+	}
 
 	return err;
 }
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index f641c35..00544b8 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -1,6 +1,16 @@
 PERF := .
 MK   := Makefile
 
+include config/Makefile.arch
+
+# FIXME looks like x86 is the only arch running tests ;-)
+# we need some IS_(32/64) flag to make this generic
+ifeq ($(IS_X86_64),1)
+lib = lib64
+else
+lib = lib
+endif
+
 has = $(shell which $1 2>/dev/null)
 
 # standard single make variable specified
@@ -118,16 +128,16 @@
 installed_files_bin += etc/bash_completion.d/perf
 installed_files_bin += libexec/perf-core/perf-archive
 
-installed_files_plugins := lib64/traceevent/plugins/plugin_cfg80211.so
-installed_files_plugins += lib64/traceevent/plugins/plugin_scsi.so
-installed_files_plugins += lib64/traceevent/plugins/plugin_xen.so
-installed_files_plugins += lib64/traceevent/plugins/plugin_function.so
-installed_files_plugins += lib64/traceevent/plugins/plugin_sched_switch.so
-installed_files_plugins += lib64/traceevent/plugins/plugin_mac80211.so
-installed_files_plugins += lib64/traceevent/plugins/plugin_kvm.so
-installed_files_plugins += lib64/traceevent/plugins/plugin_kmem.so
-installed_files_plugins += lib64/traceevent/plugins/plugin_hrtimer.so
-installed_files_plugins += lib64/traceevent/plugins/plugin_jbd2.so
+installed_files_plugins := $(lib)/traceevent/plugins/plugin_cfg80211.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_scsi.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_xen.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_function.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_sched_switch.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_mac80211.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_kvm.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_kmem.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_hrtimer.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_jbd2.so
 
 installed_files_all := $(installed_files_bin)
 installed_files_all += $(installed_files_plugins)
@@ -206,10 +216,16 @@
 	rm -rf $$TMP_O \
 	rm -rf $$TMP_DEST
 
-all: $(run) $(run_O)
+tarpkg:
+	@cmd="$(PERF)/tests/perf-targz-src-pkg $(PERF)"; \
+	echo "- $@: $$cmd" && echo $$cmd > $@ && \
+	( eval $$cmd ) >> $@ 2>&1
+	
+
+all: $(run) $(run_O) tarpkg
 	@echo OK
 
 out: $(run_O)
 	@echo OK
 
-.PHONY: all $(run) $(run_O) clean
+.PHONY: all $(run) $(run_O) tarpkg clean
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index d64ab79..1422634 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -68,7 +68,7 @@
 		evsels[i] = perf_evsel__newtp("syscalls", name);
 		if (evsels[i] == NULL) {
 			pr_debug("perf_evsel__new\n");
-			goto out_free_evlist;
+			goto out_delete_evlist;
 		}
 
 		evsels[i]->attr.wakeup_events = 1;
@@ -80,7 +80,7 @@
 			pr_debug("failed to open counter: %s, "
 				 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
 				 strerror(errno));
-			goto out_close_fd;
+			goto out_delete_evlist;
 		}
 
 		nr_events[i] = 0;
@@ -90,7 +90,7 @@
 	if (perf_evlist__mmap(evlist, 128, true) < 0) {
 		pr_debug("failed to mmap events: %d (%s)\n", errno,
 			 strerror(errno));
-		goto out_close_fd;
+		goto out_delete_evlist;
 	}
 
 	for (i = 0; i < nsyscalls; ++i)
@@ -105,13 +105,13 @@
 		if (event->header.type != PERF_RECORD_SAMPLE) {
 			pr_debug("unexpected %s event\n",
 				 perf_event__name(event->header.type));
-			goto out_munmap;
+			goto out_delete_evlist;
 		}
 
 		err = perf_evlist__parse_sample(evlist, event, &sample);
 		if (err) {
 			pr_err("Can't parse sample, err = %d\n", err);
-			goto out_munmap;
+			goto out_delete_evlist;
 		}
 
 		err = -1;
@@ -119,30 +119,27 @@
 		if (evsel == NULL) {
 			pr_debug("event with id %" PRIu64
 				 " doesn't map to an evsel\n", sample.id);
-			goto out_munmap;
+			goto out_delete_evlist;
 		}
 		nr_events[evsel->idx]++;
 		perf_evlist__mmap_consume(evlist, 0);
 	}
 
 	err = 0;
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
 			pr_debug("expected %d %s events, got %d\n",
 				 expected_nr_events[evsel->idx],
 				 perf_evsel__name(evsel), nr_events[evsel->idx]);
 			err = -1;
-			goto out_munmap;
+			goto out_delete_evlist;
 		}
 	}
 
-out_munmap:
-	perf_evlist__munmap(evlist);
-out_close_fd:
-	for (i = 0; i < nsyscalls; ++i)
-		perf_evsel__close_fd(evsels[i], 1, threads->nr);
-out_free_evlist:
+out_delete_evlist:
 	perf_evlist__delete(evlist);
+	cpus	= NULL;
+	threads = NULL;
 out_free_cpus:
 	cpu_map__delete(cpus);
 out_free_threads:
diff --git a/tools/perf/tests/open-syscall-tp-fields.c b/tools/perf/tests/open-syscall-tp-fields.c
index 774620a..5a016f6 100644
--- a/tools/perf/tests/open-syscall-tp-fields.c
+++ b/tools/perf/tests/open-syscall-tp-fields.c
@@ -48,13 +48,13 @@
 	err = perf_evlist__open(evlist);
 	if (err < 0) {
 		pr_debug("perf_evlist__open: %s\n", strerror(errno));
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	err = perf_evlist__mmap(evlist, UINT_MAX, false);
 	if (err < 0) {
 		pr_debug("perf_evlist__mmap: %s\n", strerror(errno));
-		goto out_close_evlist;
+		goto out_delete_evlist;
 	}
 
 	perf_evlist__enable(evlist);
@@ -85,7 +85,7 @@
 				err = perf_evsel__parse_sample(evsel, event, &sample);
 				if (err) {
 					pr_err("Can't parse sample, err = %d\n", err);
-					goto out_munmap;
+					goto out_delete_evlist;
 				}
 
 				tp_flags = perf_evsel__intval(evsel, &sample, "flags");
@@ -93,7 +93,7 @@
 				if (flags != tp_flags) {
 					pr_debug("%s: Expected flags=%#x, got %#x\n",
 						 __func__, flags, tp_flags);
-					goto out_munmap;
+					goto out_delete_evlist;
 				}
 
 				goto out_ok;
@@ -105,17 +105,11 @@
 
 		if (++nr_polls > 5) {
 			pr_debug("%s: no events!\n", __func__);
-			goto out_munmap;
+			goto out_delete_evlist;
 		}
 	}
 out_ok:
 	err = 0;
-out_munmap:
-	perf_evlist__munmap(evlist);
-out_close_evlist:
-	perf_evlist__close(evlist);
-out_delete_maps:
-	perf_evlist__delete_maps(evlist);
 out_delete_evlist:
 	perf_evlist__delete(evlist);
 out:
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index e4ce8ae..4db0ae6 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -30,7 +30,7 @@
 	TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
 	TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups);
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		TEST_ASSERT_VAL("wrong type",
 			PERF_TYPE_TRACEPOINT == evsel->attr.type);
 		TEST_ASSERT_VAL("wrong sample_type",
@@ -201,7 +201,7 @@
 
 	TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		TEST_ASSERT_VAL("wrong exclude_user",
 				!evsel->attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel",
@@ -1385,10 +1385,10 @@
 	if (ret) {
 		pr_debug("failed to parse event '%s', err %d\n",
 			 e->name, ret);
-		return ret;
+	} else {
+		ret = e->check(evlist);
 	}
-
-	ret = e->check(evlist);
+	
 	perf_evlist__delete(evlist);
 
 	return ret;
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index eeba562..39cc7c3 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -83,11 +83,10 @@
 	 * so that we have time to open the evlist (calling sys_perf_event_open
 	 * on all the fds) and then mmap them.
 	 */
-	err = perf_evlist__prepare_workload(evlist, &opts.target, argv,
-					    false, false);
+	err = perf_evlist__prepare_workload(evlist, &opts.target, argv, false, NULL);
 	if (err < 0) {
 		pr_debug("Couldn't run the workload!\n");
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	/*
@@ -102,7 +101,7 @@
 	err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
 	if (err < 0) {
 		pr_debug("sched__get_first_possible_cpu: %s\n", strerror(errno));
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	cpu = err;
@@ -112,7 +111,7 @@
 	 */
 	if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) {
 		pr_debug("sched_setaffinity: %s\n", strerror(errno));
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	/*
@@ -122,7 +121,7 @@
 	err = perf_evlist__open(evlist);
 	if (err < 0) {
 		pr_debug("perf_evlist__open: %s\n", strerror(errno));
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	/*
@@ -133,7 +132,7 @@
 	err = perf_evlist__mmap(evlist, opts.mmap_pages, false);
 	if (err < 0) {
 		pr_debug("perf_evlist__mmap: %s\n", strerror(errno));
-		goto out_close_evlist;
+		goto out_delete_evlist;
 	}
 
 	/*
@@ -166,7 +165,7 @@
 					if (verbose)
 						perf_event__fprintf(event, stderr);
 					pr_debug("Couldn't parse sample\n");
-					goto out_err;
+					goto out_delete_evlist;
 				}
 
 				if (verbose) {
@@ -303,12 +302,6 @@
 		pr_debug("PERF_RECORD_MMAP for %s missing!\n", "[vdso]");
 		++errs;
 	}
-out_err:
-	perf_evlist__munmap(evlist);
-out_close_evlist:
-	perf_evlist__close(evlist);
-out_delete_maps:
-	perf_evlist__delete_maps(evlist);
 out_delete_evlist:
 	perf_evlist__delete(evlist);
 out:
diff --git a/tools/perf/tests/perf-targz-src-pkg b/tools/perf/tests/perf-targz-src-pkg
new file mode 100755
index 0000000..238aa39
--- /dev/null
+++ b/tools/perf/tests/perf-targz-src-pkg
@@ -0,0 +1,21 @@
+#!/bin/sh
+# Test one of the main kernel Makefile targets to generate a perf sources tarball
+# suitable for build outside the full kernel sources.
+#
+# This is to test that the tools/perf/MANIFEST file lists all the files needed to
+# be in such tarball, which sometimes gets broken when we move files around,
+# like when we made some files that were in tools/perf/ available to other tools/
+# codebases by moving it to tools/include/, etc.
+
+PERF=$1
+cd ${PERF}/../..
+make perf-targz-src-pkg > /dev/null
+TARBALL=$(ls -rt perf-*.tar.gz)
+TMP_DEST=$(mktemp -d)
+tar xf ${TARBALL} -C $TMP_DEST
+rm -f ${TARBALL}
+cd - > /dev/null
+make -C $TMP_DEST/perf*/tools/perf > /dev/null 2>&1
+RC=$?
+rm -rf ${TMP_DEST}
+exit $RC
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index c6398b9..47146d3 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -166,14 +166,8 @@
 out_err:
 	if (evlist) {
 		perf_evlist__disable(evlist);
-		perf_evlist__munmap(evlist);
-		perf_evlist__close(evlist);
 		perf_evlist__delete(evlist);
 	}
-	if (cpus)
-		cpu_map__delete(cpus);
-	if (threads)
-		thread_map__delete(threads);
 
 	return err;
 }
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index 6664a7c..983d6b8 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -45,7 +45,7 @@
 	evsel = perf_evsel__new(&attr);
 	if (evsel == NULL) {
 		pr_debug("perf_evsel__new\n");
-		goto out_free_evlist;
+		goto out_delete_evlist;
 	}
 	perf_evlist__add(evlist, evsel);
 
@@ -54,7 +54,7 @@
 	if (!evlist->cpus || !evlist->threads) {
 		err = -ENOMEM;
 		pr_debug("Not enough memory to create thread/cpu maps\n");
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	if (perf_evlist__open(evlist)) {
@@ -63,14 +63,14 @@
 		err = -errno;
 		pr_debug("Couldn't open evlist: %s\nHint: check %s, using %" PRIu64 " in this test.\n",
 			 strerror(errno), knob, (u64)attr.sample_freq);
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	err = perf_evlist__mmap(evlist, 128, true);
 	if (err < 0) {
 		pr_debug("failed to mmap event: %d (%s)\n", errno,
 			 strerror(errno));
-		goto out_close_evlist;
+		goto out_delete_evlist;
 	}
 
 	perf_evlist__enable(evlist);
@@ -90,7 +90,7 @@
 		err = perf_evlist__parse_sample(evlist, event, &sample);
 		if (err < 0) {
 			pr_debug("Error during parse sample\n");
-			goto out_unmap_evlist;
+			goto out_delete_evlist;
 		}
 
 		total_periods += sample.period;
@@ -105,13 +105,7 @@
 		err = -1;
 	}
 
-out_unmap_evlist:
-	perf_evlist__munmap(evlist);
-out_close_evlist:
-	perf_evlist__close(evlist);
-out_delete_maps:
-	perf_evlist__delete_maps(evlist);
-out_free_evlist:
+out_delete_evlist:
 	perf_evlist__delete(evlist);
 	return err;
 }
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index d09ab57..5ff3db3 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -9,12 +9,21 @@
 static int exited;
 static int nr_exit;
 
-static void sig_handler(int sig)
+static void sig_handler(int sig __maybe_unused)
 {
 	exited = 1;
+}
 
-	if (sig == SIGUSR1)
-		nr_exit = -1;
+/*
+ * perf_evlist__prepare_workload will send a SIGUSR1 if the fork fails, since
+ * we asked by setting its exec_error to this handler.
+ */
+static void workload_exec_failed_signal(int signo __maybe_unused,
+					siginfo_t *info __maybe_unused,
+					void *ucontext __maybe_unused)
+{
+	exited	= 1;
+	nr_exit = -1;
 }
 
 /*
@@ -35,7 +44,6 @@
 	const char *argv[] = { "true", NULL };
 
 	signal(SIGCHLD, sig_handler);
-	signal(SIGUSR1, sig_handler);
 
 	evlist = perf_evlist__new_default();
 	if (evlist == NULL) {
@@ -54,13 +62,14 @@
 	if (!evlist->cpus || !evlist->threads) {
 		err = -ENOMEM;
 		pr_debug("Not enough memory to create thread/cpu maps\n");
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
-	err = perf_evlist__prepare_workload(evlist, &target, argv, false, true);
+	err = perf_evlist__prepare_workload(evlist, &target, argv, false,
+					    workload_exec_failed_signal);
 	if (err < 0) {
 		pr_debug("Couldn't run the workload!\n");
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	evsel = perf_evlist__first(evlist);
@@ -74,13 +83,13 @@
 	err = perf_evlist__open(evlist);
 	if (err < 0) {
 		pr_debug("Couldn't open the evlist: %s\n", strerror(-err));
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	if (perf_evlist__mmap(evlist, 128, true) < 0) {
 		pr_debug("failed to mmap events: %d (%s)\n", errno,
 			 strerror(errno));
-		goto out_close_evlist;
+		goto out_delete_evlist;
 	}
 
 	perf_evlist__start_workload(evlist);
@@ -103,11 +112,7 @@
 		err = -1;
 	}
 
-	perf_evlist__munmap(evlist);
-out_close_evlist:
-	perf_evlist__close(evlist);
-out_delete_maps:
-	perf_evlist__delete_maps(evlist);
+out_delete_evlist:
 	perf_evlist__delete(evlist);
 	return err;
 }
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index a7045ea..b720b92 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1938,7 +1938,7 @@
 
 	ui_helpline__push("Press ESC to exit");
 
-	list_for_each_entry(pos, &evlist->entries, node) {
+	evlist__for_each(evlist, pos) {
 		const char *ev_name = perf_evsel__name(pos);
 		size_t line_len = strlen(ev_name) + 7;
 
@@ -1970,9 +1970,10 @@
 		struct perf_evsel *pos;
 
 		nr_entries = 0;
-		list_for_each_entry(pos, &evlist->entries, node)
+		evlist__for_each(evlist, pos) {
 			if (perf_evsel__is_group_leader(pos))
 				nr_entries++;
+		}
 
 		if (nr_entries == 1)
 			goto single_entry;
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 2ca66cc..5b95c44 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -375,7 +375,7 @@
 
 	gtk_container_add(GTK_CONTAINER(window), vbox);
 
-	list_for_each_entry(pos, &evlist->entries, node) {
+	evlist__for_each(evlist, pos) {
 		struct hists *hists = &pos->hists;
 		const char *evname = perf_evsel__name(pos);
 		GtkWidget *scrolled_window;
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 0922aa4..88f7be3 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -81,7 +81,7 @@
 	/*
 	 * check if cgrp is already defined, if so we reuse it
 	 */
-	list_for_each_entry(counter, &evlist->entries, node) {
+	evlist__for_each(evlist, counter) {
 		cgrp = counter->cgrp;
 		if (!cgrp)
 			continue;
@@ -110,7 +110,7 @@
 	 * if add cgroup N, then need to find event N
 	 */
 	n = 0;
-	list_for_each_entry(counter, &evlist->entries, node) {
+	evlist__for_each(evlist, counter) {
 		if (n == nr_cgroups)
 			goto found;
 		n++;
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
index 66e44a5..87b8672 100644
--- a/tools/perf/util/color.c
+++ b/tools/perf/util/color.c
@@ -1,6 +1,7 @@
 #include <linux/kernel.h>
 #include "cache.h"
 #include "color.h"
+#include <math.h>
 
 int perf_use_color_default = -1;
 
@@ -298,10 +299,10 @@
 	 * entries in green - and keep the low overhead places
 	 * normal:
 	 */
-	if (percent >= MIN_RED)
+	if (fabs(percent) >= MIN_RED)
 		color = PERF_COLOR_RED;
 	else {
-		if (percent > MIN_GREEN)
+		if (fabs(percent) > MIN_GREEN)
 			color = PERF_COLOR_GREEN;
 	}
 	return color;
@@ -318,15 +319,19 @@
 	return r;
 }
 
+int value_color_snprintf(char *bf, size_t size, const char *fmt, double value)
+{
+	const char *color = get_percent_color(value);
+	return color_snprintf(bf, size, color, fmt, value);
+}
+
 int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...)
 {
 	va_list args;
 	double percent;
-	const char *color;
 
 	va_start(args, fmt);
 	percent = va_arg(args, double);
 	va_end(args);
-	color = get_percent_color(percent);
-	return color_snprintf(bf, size, color, fmt, percent);
+	return value_color_snprintf(bf, size, fmt, percent);
 }
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
index fced384..7ff30a6 100644
--- a/tools/perf/util/color.h
+++ b/tools/perf/util/color.h
@@ -39,6 +39,7 @@
 int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...);
 int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...);
 int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf);
+int value_color_snprintf(char *bf, size_t size, const char *fmt, double value);
 int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...);
 int percent_color_fprintf(FILE *fp, const char *fmt, double percent);
 const char *get_percent_color(double percent);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 45a76c6..1fc1c2f 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -175,12 +175,12 @@
 	return tgid;
 }
 
-static int perf_event__synthesize_mmap_events(struct perf_tool *tool,
-					      union perf_event *event,
-					      pid_t pid, pid_t tgid,
-					      perf_event__handler_t process,
-					      struct machine *machine,
-					      bool mmap_data)
+int perf_event__synthesize_mmap_events(struct perf_tool *tool,
+				       union perf_event *event,
+				       pid_t pid, pid_t tgid,
+				       perf_event__handler_t process,
+				       struct machine *machine,
+				       bool mmap_data)
 {
 	char filename[PATH_MAX];
 	FILE *fp;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 30fec99..faf6e21 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -266,6 +266,13 @@
 				  const struct perf_sample *sample,
 				  bool swapped);
 
+int perf_event__synthesize_mmap_events(struct perf_tool *tool,
+				       union perf_event *event,
+				       pid_t pid, pid_t tgid,
+				       perf_event__handler_t process,
+				       struct machine *machine,
+				       bool mmap_data);
+
 size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index b08a7ec..40bd2c0 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -81,7 +81,7 @@
 {
 	struct perf_evsel *evsel;
 
-	list_for_each_entry(evsel, &evlist->entries, node)
+	evlist__for_each(evlist, evsel)
 		perf_evsel__calc_id_pos(evsel);
 
 	perf_evlist__set_id_pos(evlist);
@@ -91,7 +91,7 @@
 {
 	struct perf_evsel *pos, *n;
 
-	list_for_each_entry_safe(pos, n, &evlist->entries, node) {
+	evlist__for_each_safe(evlist, n, pos) {
 		list_del_init(&pos->node);
 		perf_evsel__delete(pos);
 	}
@@ -107,6 +107,12 @@
 
 void perf_evlist__delete(struct perf_evlist *evlist)
 {
+	perf_evlist__munmap(evlist);
+	perf_evlist__close(evlist);
+	cpu_map__delete(evlist->cpus);
+	thread_map__delete(evlist->threads);
+	evlist->cpus = NULL;
+	evlist->threads = NULL;
 	perf_evlist__purge(evlist);
 	perf_evlist__exit(evlist);
 	free(evlist);
@@ -142,7 +148,7 @@
 
 	leader->nr_members = evsel->idx - leader->idx + 1;
 
-	list_for_each_entry(evsel, list, node) {
+	__evlist__for_each(list, evsel) {
 		evsel->leader = leader;
 	}
 }
@@ -201,7 +207,7 @@
 	return 0;
 
 out_delete_partial_list:
-	list_for_each_entry_safe(evsel, n, &head, node)
+	__evlist__for_each_safe(&head, n, evsel)
 		perf_evsel__delete(evsel);
 	return -1;
 }
@@ -222,7 +228,7 @@
 {
 	struct perf_evsel *evsel;
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		if (evsel->attr.type   == PERF_TYPE_TRACEPOINT &&
 		    (int)evsel->attr.config == id)
 			return evsel;
@@ -237,7 +243,7 @@
 {
 	struct perf_evsel *evsel;
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) &&
 		    (strcmp(evsel->name, name) == 0))
 			return evsel;
@@ -267,7 +273,7 @@
 	int nr_threads = thread_map__nr(evlist->threads);
 
 	for (cpu = 0; cpu < nr_cpus; cpu++) {
-		list_for_each_entry(pos, &evlist->entries, node) {
+		evlist__for_each(evlist, pos) {
 			if (!perf_evsel__is_group_leader(pos) || !pos->fd)
 				continue;
 			for (thread = 0; thread < nr_threads; thread++)
@@ -285,7 +291,7 @@
 	int nr_threads = thread_map__nr(evlist->threads);
 
 	for (cpu = 0; cpu < nr_cpus; cpu++) {
-		list_for_each_entry(pos, &evlist->entries, node) {
+		evlist__for_each(evlist, pos) {
 			if (!perf_evsel__is_group_leader(pos) || !pos->fd)
 				continue;
 			for (thread = 0; thread < nr_threads; thread++)
@@ -582,6 +588,9 @@
 {
 	int i;
 
+	if (evlist->mmap == NULL)
+		return;
+
 	for (i = 0; i < evlist->nr_mmaps; i++)
 		__perf_evlist__munmap(evlist, i);
 
@@ -621,7 +630,7 @@
 {
 	struct perf_evsel *evsel;
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		int fd = FD(evsel, cpu, thread);
 
 		if (*output == -1) {
@@ -797,7 +806,7 @@
 	pr_debug("mmap size %zuB\n", evlist->mmap_len);
 	mask = evlist->mmap_len - page_size - 1;
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
 		    evsel->sample_id == NULL &&
 		    perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
@@ -833,14 +842,6 @@
 	return -1;
 }
 
-void perf_evlist__delete_maps(struct perf_evlist *evlist)
-{
-	cpu_map__delete(evlist->cpus);
-	thread_map__delete(evlist->threads);
-	evlist->cpus	= NULL;
-	evlist->threads = NULL;
-}
-
 int perf_evlist__apply_filters(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel;
@@ -848,7 +849,7 @@
 	const int ncpus = cpu_map__nr(evlist->cpus),
 		  nthreads = thread_map__nr(evlist->threads);
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		if (evsel->filter == NULL)
 			continue;
 
@@ -867,7 +868,7 @@
 	const int ncpus = cpu_map__nr(evlist->cpus),
 		  nthreads = thread_map__nr(evlist->threads);
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		err = perf_evsel__set_filter(evsel, ncpus, nthreads, filter);
 		if (err)
 			break;
@@ -886,7 +887,7 @@
 	if (evlist->id_pos < 0 || evlist->is_pos < 0)
 		return false;
 
-	list_for_each_entry(pos, &evlist->entries, node) {
+	evlist__for_each(evlist, pos) {
 		if (pos->id_pos != evlist->id_pos ||
 		    pos->is_pos != evlist->is_pos)
 			return false;
@@ -902,7 +903,7 @@
 	if (evlist->combined_sample_type)
 		return evlist->combined_sample_type;
 
-	list_for_each_entry(evsel, &evlist->entries, node)
+	evlist__for_each(evlist, evsel)
 		evlist->combined_sample_type |= evsel->attr.sample_type;
 
 	return evlist->combined_sample_type;
@@ -920,7 +921,7 @@
 	u64 read_format = first->attr.read_format;
 	u64 sample_type = first->attr.sample_type;
 
-	list_for_each_entry_continue(pos, &evlist->entries, node) {
+	evlist__for_each(evlist, pos) {
 		if (read_format != pos->attr.read_format)
 			return false;
 	}
@@ -977,7 +978,7 @@
 {
 	struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
 
-	list_for_each_entry_continue(pos, &evlist->entries, node) {
+	evlist__for_each_continue(evlist, pos) {
 		if (first->attr.sample_id_all != pos->attr.sample_id_all)
 			return false;
 	}
@@ -1003,7 +1004,7 @@
 	int ncpus = cpu_map__nr(evlist->cpus);
 	int nthreads = thread_map__nr(evlist->threads);
 
-	list_for_each_entry_reverse(evsel, &evlist->entries, node)
+	evlist__for_each_reverse(evlist, evsel)
 		perf_evsel__close(evsel, ncpus, nthreads);
 }
 
@@ -1014,7 +1015,7 @@
 
 	perf_evlist__update_id_pos(evlist);
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		err = perf_evsel__open(evsel, evlist->cpus, evlist->threads);
 		if (err < 0)
 			goto out_err;
@@ -1029,7 +1030,7 @@
 
 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target,
 				  const char *argv[], bool pipe_output,
-				  bool want_signal)
+				  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
 {
 	int child_ready_pipe[2], go_pipe[2];
 	char bf;
@@ -1073,12 +1074,25 @@
 
 		execvp(argv[0], (char **)argv);
 
-		perror(argv[0]);
-		if (want_signal)
-			kill(getppid(), SIGUSR1);
+		if (exec_error) {
+			union sigval val;
+
+			val.sival_int = errno;
+			if (sigqueue(getppid(), SIGUSR1, val))
+				perror(argv[0]);
+		} else
+			perror(argv[0]);
 		exit(-1);
 	}
 
+	if (exec_error) {
+		struct sigaction act = {
+			.sa_flags     = SA_SIGINFO,
+			.sa_sigaction = exec_error,
+		};
+		sigaction(SIGUSR1, &act, NULL);
+	}
+
 	if (target__none(target))
 		evlist->threads->map[0] = evlist->workload.pid;
 
@@ -1140,7 +1154,7 @@
 	struct perf_evsel *evsel;
 	size_t printed = 0;
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
 				   perf_evsel__name(evsel));
 	}
@@ -1219,7 +1233,7 @@
 	if (move_evsel == perf_evlist__first(evlist))
 		return;
 
-	list_for_each_entry_safe(evsel, n, &evlist->entries, node) {
+	evlist__for_each_safe(evlist, n, evsel) {
 		if (evsel->leader == move_evsel->leader)
 			list_move_tail(&evsel->node, &move);
 	}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 2fe5195..f5173cd 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -103,7 +103,8 @@
 int perf_evlist__prepare_workload(struct perf_evlist *evlist,
 				  struct target *target,
 				  const char *argv[], bool pipe_output,
-				  bool want_signal);
+				  void (*exec_error)(int signo, siginfo_t *info,
+						     void *ucontext));
 int perf_evlist__start_workload(struct perf_evlist *evlist);
 
 int perf_evlist__parse_mmap_pages(const struct option *opt,
@@ -134,7 +135,6 @@
 }
 
 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target);
-void perf_evlist__delete_maps(struct perf_evlist *evlist);
 int perf_evlist__apply_filters(struct perf_evlist *evlist);
 
 void __perf_evlist__set_leader(struct list_head *list);
@@ -196,5 +196,70 @@
 void perf_evlist__to_front(struct perf_evlist *evlist,
 			   struct perf_evsel *move_evsel);
 
+/**
+ * __evlist__for_each - iterate thru all the evsels
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each(list, evsel) \
+        list_for_each_entry(evsel, list, node)
+
+/**
+ * evlist__for_each - iterate thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each(evlist, evsel) \
+	__evlist__for_each(&(evlist)->entries, evsel)
+
+/**
+ * __evlist__for_each_continue - continue iteration thru all the evsels
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_continue(list, evsel) \
+        list_for_each_entry_continue(evsel, list, node)
+
+/**
+ * evlist__for_each_continue - continue iteration thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_continue(evlist, evsel) \
+	__evlist__for_each_continue(&(evlist)->entries, evsel)
+
+/**
+ * __evlist__for_each_reverse - iterate thru all the evsels in reverse order
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_reverse(list, evsel) \
+        list_for_each_entry_reverse(evsel, list, node)
+
+/**
+ * evlist__for_each_reverse - iterate thru all the evsels in reverse order
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_reverse(evlist, evsel) \
+	__evlist__for_each_reverse(&(evlist)->entries, evsel)
+
+/**
+ * __evlist__for_each_safe - safely iterate thru all the evsels
+ * @list: list_head instance to iterate
+ * @tmp: struct evsel temp iterator
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_safe(list, tmp, evsel) \
+        list_for_each_entry_safe(evsel, tmp, list, node)
+
+/**
+ * evlist__for_each_safe - safely iterate thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ * @tmp: struct evsel temp iterator
+ */
+#define evlist__for_each_safe(evlist, tmp, evsel) \
+	__evlist__for_each_safe(&(evlist)->entries, tmp, evsel)
 
 #endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ade8d9c..cd4630a 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -658,7 +658,8 @@
 	 * Setting enable_on_exec for independent events and
 	 * group leaders for traced executed by perf.
 	 */
-	if (target__none(&opts->target) && perf_evsel__is_group_leader(evsel))
+	if (target__none(&opts->target) && perf_evsel__is_group_leader(evsel) &&
+		!opts->initial_delay)
 		attr->enable_on_exec = 1;
 }
 
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index a4a60b7..bb3e0ed 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -643,8 +643,7 @@
 	if (ret < 0)
 		return ret;
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
-
+	evlist__for_each(evlist, evsel) {
 		ret = do_write(fd, &evsel->attr, sz);
 		if (ret < 0)
 			return ret;
@@ -1092,7 +1091,7 @@
 	if (ret < 0)
 		return ret;
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		if (perf_evsel__is_group_leader(evsel) &&
 		    evsel->nr_members > 1) {
 			const char *name = evsel->group_name ?: "{anon_group}";
@@ -1487,7 +1486,7 @@
 
 	session = container_of(ph, struct perf_session, header);
 
-	list_for_each_entry(evsel, &session->evlist->entries, node) {
+	evlist__for_each(session->evlist, evsel) {
 		if (perf_evsel__is_group_leader(evsel) &&
 		    evsel->nr_members > 1) {
 			fprintf(fp, "# group: %s{%s", evsel->group_name ?: "",
@@ -1768,7 +1767,7 @@
 {
 	struct perf_evsel *evsel;
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		if (evsel->idx == idx)
 			return evsel;
 	}
@@ -2071,7 +2070,7 @@
 	session->evlist->nr_groups = nr_groups;
 
 	i = nr = 0;
-	list_for_each_entry(evsel, &session->evlist->entries, node) {
+	evlist__for_each(session->evlist, evsel) {
 		if (evsel->idx == (int) desc[i].leader_idx) {
 			evsel->leader = evsel;
 			/* {anon_group} is a dummy name */
@@ -2298,7 +2297,7 @@
 
 	lseek(fd, sizeof(f_header), SEEK_SET);
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(session->evlist, evsel) {
 		evsel->id_offset = lseek(fd, 0, SEEK_CUR);
 		err = do_write(fd, evsel->id, evsel->ids * sizeof(u64));
 		if (err < 0) {
@@ -2309,7 +2308,7 @@
 
 	attr_offset = lseek(fd, 0, SEEK_CUR);
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	evlist__for_each(evlist, evsel) {
 		f_attr = (struct perf_file_attr){
 			.attr = evsel->attr,
 			.ids  = {
@@ -2742,7 +2741,7 @@
 {
 	struct perf_evsel *pos;
 
-	list_for_each_entry(pos, &evlist->entries, node) {
+	evlist__for_each(evlist, pos) {
 		if (pos->attr.type == PERF_TYPE_TRACEPOINT &&
 		    perf_evsel__prepare_tracepoint_event(pos, pevent))
 			return -1;
@@ -2890,7 +2889,7 @@
 	struct perf_evsel *evsel;
 	int err = 0;
 
-	list_for_each_entry(evsel, &session->evlist->entries, node) {
+	evlist__for_each(session->evlist, evsel) {
 		err = perf_event__synthesize_attr(tool, &evsel->attr, evsel->ids,
 						  evsel->id, process);
 		if (err) {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 307c9ae..a2d047b 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -77,16 +77,16 @@
 	unsigned long long	total_mem;
 
 	int			nr_cmdline;
-	char			*cmdline;
 	int			nr_sibling_cores;
-	char			*sibling_cores;
 	int			nr_sibling_threads;
-	char			*sibling_threads;
 	int			nr_numa_nodes;
-	char			*numa_nodes;
 	int			nr_pmu_mappings;
-	char			*pmu_mappings;
 	int			nr_groups;
+	char			*cmdline;
+	char			*sibling_cores;
+	char			*sibling_threads;
+	char			*numa_nodes;
+	char			*pmu_mappings;
 };
 
 struct perf_header {
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index a98538d..0130279a 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -27,6 +27,7 @@
 	machine->pid = pid;
 
 	machine->symbol_filter = NULL;
+	machine->id_hdr_size = 0;
 
 	machine->root_dir = strdup(root_dir);
 	if (machine->root_dir == NULL)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 0153435..a7f1b6a 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -820,8 +820,7 @@
 	if (!add && get_event_modifier(&mod, str, NULL))
 		return -EINVAL;
 
-	list_for_each_entry(evsel, list, node) {
-
+	__evlist__for_each(list, evsel) {
 		if (add && get_event_modifier(&mod, str, evsel))
 			return -EINVAL;
 
@@ -845,7 +844,7 @@
 {
 	struct perf_evsel *evsel;
 
-	list_for_each_entry(evsel, list, node) {
+	__evlist__for_each(list, evsel) {
 		if (!evsel->name)
 			evsel->name = strdup(name);
 	}
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 0934d64..d9cab4d 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -505,7 +505,7 @@
 
 /*
  * Setup one of config[12] attr members based on the
- * user input data - temr parameter.
+ * user input data - term parameter.
  */
 static int pmu_config_term(struct list_head *formats,
 			   struct perf_event_attr *attr,
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 86ed858..a4ee6b4 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -172,6 +172,7 @@
 	return (dso) ? dso->long_name : NULL;
 }
 
+#ifdef HAVE_DWARF_SUPPORT
 /* Copied from unwind.c */
 static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
 				    GElf_Shdr *shp, const char *name)
@@ -217,6 +218,7 @@
 	elf_end(elf);
 	return ret;
 }
+#endif
 
 static int init_user_exec(void)
 {
@@ -750,7 +752,8 @@
 
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
 				struct probe_trace_event **tevs __maybe_unused,
-				int max_tevs __maybe_unused, const char *target)
+				int max_tevs __maybe_unused,
+				const char *target __maybe_unused)
 {
 	if (perf_probe_event_need_dwarf(pev)) {
 		pr_warning("Debuginfo-analysis is not supported.\n");
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 4bf8ace..122669c 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -908,9 +908,10 @@
 	if (i >= pevlist->evlist.nr_entries)
 		return NULL;
 
-	list_for_each_entry(pos, &pevlist->evlist.entries, node)
+	evlist__for_each(&pevlist->evlist, pos) {
 		if (i-- == 0)
 			break;
+	}
 
 	return Py_BuildValue("O", container_of(pos, struct pyrf_evsel, evsel));
 }
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 104a475..3737625 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -89,19 +89,19 @@
 	if (evlist->cpus->map[0] < 0)
 		opts->no_inherit = true;
 
-	list_for_each_entry(evsel, &evlist->entries, node)
+	evlist__for_each(evlist, evsel)
 		perf_evsel__config(evsel, opts);
 
 	if (evlist->nr_entries > 1) {
 		struct perf_evsel *first = perf_evlist__first(evlist);
 
-		list_for_each_entry(evsel, &evlist->entries, node) {
+		evlist__for_each(evlist, evsel) {
 			if (evsel->attr.sample_type == first->attr.sample_type)
 				continue;
 			use_sample_identifier = perf_can_sample_identifier();
 			break;
 		}
-		list_for_each_entry(evsel, &evlist->entries, node)
+		evlist__for_each(evlist, evsel)
 			perf_evsel__set_sample_id(evsel, use_sample_identifier);
 	}
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 8ffe29c..7acc03e 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1384,7 +1384,7 @@
 {
 	struct perf_evsel *evsel;
 
-	list_for_each_entry(evsel, &session->evlist->entries, node) {
+	evlist__for_each(session->evlist, evsel) {
 		if (evsel->attr.type == PERF_TYPE_TRACEPOINT)
 			return true;
 	}
@@ -1442,7 +1442,7 @@
 
 	ret += events_stats__fprintf(&session->stats, fp);
 
-	list_for_each_entry(pos, &session->evlist->entries, node) {
+	evlist__for_each(session->evlist, pos) {
 		ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos));
 		ret += events_stats__fprintf(&pos->hists.stats, fp);
 	}
@@ -1464,7 +1464,7 @@
 {
 	struct perf_evsel *pos;
 
-	list_for_each_entry(pos, &session->evlist->entries, node) {
+	evlist__for_each(session->evlist, pos) {
 		if (pos->attr.type == type)
 			return pos;
 	}
diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c
index 0efd539..416f22b 100644
--- a/tools/perf/util/unwind.c
+++ b/tools/perf/util/unwind.c
@@ -340,10 +340,10 @@
 	/* Check the .debug_frame section for unwinding info */
 	if (!read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) {
 		memset(&di, 0, sizeof(di));
-		dwarf_find_debug_frame(0, &di, ip, 0, map->dso->name,
-				       map->start, map->end);
-		return dwarf_search_unwind_table(as, ip, &di, pi,
-						 need_unwind_info, arg);
+		if (dwarf_find_debug_frame(0, &di, ip, 0, map->dso->name,
+					   map->start, map->end))
+			return dwarf_search_unwind_table(as, ip, &di, pi,
+							 need_unwind_info, arg);
 	}
 #endif