Revert "sched: Improve scalability via 'CPU buddies', which withstand random perturbations" This reverts commit 970e178985cadbca660feb02f4d2ee3a09f7fdda. Nikolay Ulyanitsky reported thatthe 3.6-rc5 kernel has a 15-20% performance drop on PostgreSQL 9.2 on his machine (running "pgbench"). Borislav Petkov was able to reproduce this, and bisected it to this commit 970e178985ca ("sched: Improve scalability via 'CPU buddies' ...") apparently because the new single-idle-buddy model simply doesn't find idle CPU's to reschedule on aggressively enough. Mike Galbraith suspects that it is likely due to the user-mode spinlocks in PostgreSQL not reacting well to preemption, but we don't really know the details - I'll just revert the commit for now. There are hopefully other approaches to improve scheduler scalability without it causing these kinds of downsides. Reported-by: Nikolay Ulyanitsky <lystor@gmail.com> Bisected-by: Borislav Petkov <bp@alien8.de> Acked-by: Mike Galbraith <efault@gmx.de> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@kernel.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

commit: 37407ea7f93864c2cfc03edf8f37872ec539ea2b [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Sun Sep 16 12:29:43 2012 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> Sun Sep 16 12:29:43 2012 -0700
tree: 7c07e7adadd40fc94cebfe816f1c65a4a630b147
parent: 3f0c3c8fe30c725c1264fb6db8cc4b69db3a658a [diff] [blame]
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 42d9df6..96e2b18 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c

@@ -2637,6 +2637,8 @@
 	int cpu = smp_processor_id();
 	int prev_cpu = task_cpu(p);
 	struct sched_domain *sd;
+	struct sched_group *sg;
+	int i;
 
 	/*
 	 * If the task is going to be woken-up on this cpu and if it is
@@ -2653,17 +2655,29 @@
 		return prev_cpu;
 
 	/*
-	 * Otherwise, check assigned siblings to find an elegible idle cpu.
+	 * Otherwise, iterate the domains and find an elegible idle cpu.
 	 */
 	sd = rcu_dereference(per_cpu(sd_llc, target));
-
 	for_each_lower_domain(sd) {
-		if (!cpumask_test_cpu(sd->idle_buddy, tsk_cpus_allowed(p)))
-			continue;
-		if (idle_cpu(sd->idle_buddy))
-			return sd->idle_buddy;
-	}
+		sg = sd->groups;
+		do {
+			if (!cpumask_intersects(sched_group_cpus(sg),
+						tsk_cpus_allowed(p)))
+				goto next;
 
+			for_each_cpu(i, sched_group_cpus(sg)) {
+				if (!idle_cpu(i))
+					goto next;
+			}
+
+			target = cpumask_first_and(sched_group_cpus(sg),
+					tsk_cpus_allowed(p));
+			goto done;
+next:
+			sg = sg->next;
+		} while (sg != sd->groups);
+	}
+done:
 	return target;
 }
commit	37407ea7f93864c2cfc03edf8f37872ec539ea2b	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Sun Sep 16 12:29:43 2012 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	Sun Sep 16 12:29:43 2012 -0700
tree	7c07e7adadd40fc94cebfe816f1c65a4a630b147
parent	3f0c3c8fe30c725c1264fb6db8cc4b69db3a658a [diff] [blame]