perf record: Fix per-thread option

Per-thread mode doesn't have specific CPUs for events, add checks for
this case.

Minor fix to a pr_debug by Ian Rogers <irogers@google.com> to avoid an
out of bound array access.

Fixes: 7954f71689f90cb2 ("perf record: Introduce thread affinity and mmap masks")
Reported-by: Ian Rogers <irogers@google.com>
Signed-off-by: Alexey Bayduraev <alexey.bayduraev@gmail.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Riccardo Mancini <rickyman7@gmail.com>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20220414014642.3308206-1-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ba74fab..069825c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -989,8 +989,11 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
 	struct mmap *overwrite_mmap = evlist->overwrite_mmap;
 	struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
 
-	thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
-					      thread_data->mask->maps.nbits);
+	if (cpu_map__is_dummy(cpus))
+		thread_data->nr_mmaps = nr_mmaps;
+	else
+		thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
+						      thread_data->mask->maps.nbits);
 	if (mmap) {
 		thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
 		if (!thread_data->maps)
@@ -1007,16 +1010,17 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
 		 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
 
 	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
-		if (test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
+		if (cpu_map__is_dummy(cpus) ||
+		    test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
 			if (thread_data->maps) {
 				thread_data->maps[tm] = &mmap[m];
 				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
-					  thread_data, cpus->map[m].cpu, tm, m);
+					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
 			}
 			if (thread_data->overwrite_maps) {
 				thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
 				pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
-					  thread_data, cpus->map[m].cpu, tm, m);
+					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
 			}
 			tm++;
 		}
@@ -3329,6 +3333,9 @@ static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_c
 {
 	int c;
 
+	if (cpu_map__is_dummy(cpus))
+		return;
+
 	for (c = 0; c < cpus->nr; c++)
 		set_bit(cpus->map[c].cpu, mask->bits);
 }
@@ -3680,6 +3687,11 @@ static int record__init_thread_masks(struct record *rec)
 	if (!record__threads_enabled(rec))
 		return record__init_thread_default_masks(rec, cpus);
 
+	if (cpu_map__is_dummy(cpus)) {
+		pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
+		return -EINVAL;
+	}
+
 	switch (rec->opts.threads_spec) {
 	case THREAD_SPEC__CPU:
 		ret = record__init_thread_cpu_masks(rec, cpus);