perf tools: Enable close-on-exec flag on perf file descriptor

In commit a21b0b354d4a ('perf: Introduce a flag to enable
close-on-exec in perf_event_open()'), flag PERF_FLAG_FD_CLOEXEC
was added to perf_event_open(2) syscall to allows userspace
to atomically enable close-on-exec behavor when creating
the file descriptor.

This patch makes perf tools use the new flag if supported
by the kernel, so that the event file descriptors got
automatically closed if perf tool exec a sub-command.

Signed-off-by: Yann Droneaud <ydroneaud@opteya.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/1404160127-7475-1-git-send-email-ydroneaud@opteya.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c
new file mode 100644
index 0000000..c5d05ec
--- /dev/null
+++ b/tools/perf/util/cloexec.c
@@ -0,0 +1,57 @@
+#include "util.h"
+#include "../perf.h"
+#include "cloexec.h"
+#include "asm/bug.h"
+
+static unsigned long flag = PERF_FLAG_FD_CLOEXEC;
+
+static int perf_flag_probe(void)
+{
+	/* use 'safest' configuration as used in perf_evsel__fallback() */
+	struct perf_event_attr attr = {
+		.type = PERF_COUNT_SW_CPU_CLOCK,
+		.config = PERF_COUNT_SW_CPU_CLOCK,
+	};
+	int fd;
+	int err;
+
+	/* check cloexec flag */
+	fd = sys_perf_event_open(&attr, 0, -1, -1,
+				 PERF_FLAG_FD_CLOEXEC);
+	err = errno;
+
+	if (fd >= 0) {
+		close(fd);
+		return 1;
+	}
+
+	WARN_ONCE(err != EINVAL,
+		  "perf_event_open(..., PERF_FLAG_FD_CLOEXEC) failed with unexpected error %d (%s)\n",
+		  err, strerror(err));
+
+	/* not supported, confirm error related to PERF_FLAG_FD_CLOEXEC */
+	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+	err = errno;
+
+	if (WARN_ONCE(fd < 0,
+		      "perf_event_open(..., 0) failed unexpectedly with error %d (%s)\n",
+		      err, strerror(err)))
+		return -1;
+
+	close(fd);
+
+	return 0;
+}
+
+unsigned long perf_event_open_cloexec_flag(void)
+{
+	static bool probed;
+
+	if (!probed) {
+		if (perf_flag_probe() <= 0)
+			flag = 0;
+		probed = true;
+	}
+
+	return flag;
+}
diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h
new file mode 100644
index 0000000..94a5a7d
--- /dev/null
+++ b/tools/perf/util/cloexec.h
@@ -0,0 +1,6 @@
+#ifndef __PERF_CLOEXEC_H
+#define __PERF_CLOEXEC_H
+
+unsigned long perf_event_open_cloexec_flag(void);
+
+#endif /* __PERF_CLOEXEC_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 90f58cd..21a373e 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -29,6 +29,7 @@
 	bool sample_id_all;
 	bool exclude_guest;
 	bool mmap2;
+	bool cloexec;
 } perf_missing_features;
 
 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
@@ -994,7 +995,7 @@
 			      struct thread_map *threads)
 {
 	int cpu, thread;
-	unsigned long flags = 0;
+	unsigned long flags = PERF_FLAG_FD_CLOEXEC;
 	int pid = -1, err;
 	enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE;
 
@@ -1003,11 +1004,13 @@
 		return -ENOMEM;
 
 	if (evsel->cgrp) {
-		flags = PERF_FLAG_PID_CGROUP;
+		flags |= PERF_FLAG_PID_CGROUP;
 		pid = evsel->cgrp->fd;
 	}
 
 fallback_missing_features:
+	if (perf_missing_features.cloexec)
+		flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
 	if (perf_missing_features.mmap2)
 		evsel->attr.mmap2 = 0;
 	if (perf_missing_features.exclude_guest)
@@ -1076,7 +1079,10 @@
 	if (err != -EINVAL || cpu > 0 || thread > 0)
 		goto out_close;
 
-	if (!perf_missing_features.mmap2 && evsel->attr.mmap2) {
+	if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) {
+		perf_missing_features.cloexec = true;
+		goto fallback_missing_features;
+	} else if (!perf_missing_features.mmap2 && evsel->attr.mmap2) {
 		perf_missing_features.mmap2 = true;
 		goto fallback_missing_features;
 	} else if (!perf_missing_features.exclude_guest &&
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 1657231..fe8079e 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -4,6 +4,7 @@
 #include "parse-events.h"
 #include <api/fs/fs.h>
 #include "util.h"
+#include "cloexec.h"
 
 typedef void (*setup_probe_fn_t)(struct perf_evsel *evsel);
 
@@ -11,6 +12,7 @@
 {
 	struct perf_evlist *evlist;
 	struct perf_evsel *evsel;
+	unsigned long flags = perf_event_open_cloexec_flag();
 	int err = -EAGAIN, fd;
 
 	evlist = perf_evlist__new();
@@ -22,14 +24,14 @@
 
 	evsel = perf_evlist__first(evlist);
 
-	fd = sys_perf_event_open(&evsel->attr, -1, cpu, -1, 0);
+	fd = sys_perf_event_open(&evsel->attr, -1, cpu, -1, flags);
 	if (fd < 0)
 		goto out_delete;
 	close(fd);
 
 	fn(evsel);
 
-	fd = sys_perf_event_open(&evsel->attr, -1, cpu, -1, 0);
+	fd = sys_perf_event_open(&evsel->attr, -1, cpu, -1, flags);
 	if (fd < 0) {
 		if (errno == EINVAL)
 			err = -EINVAL;
@@ -219,7 +221,8 @@
 		cpu = evlist->cpus->map[0];
 	}
 
-	fd = sys_perf_event_open(&evsel->attr, -1, cpu, -1, 0);
+	fd = sys_perf_event_open(&evsel->attr, -1, cpu, -1,
+				 perf_event_open_cloexec_flag());
 	if (fd >= 0) {
 		close(fd);
 		ret = true;