Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar:
"Misc fixes: various scheduler metrics corner case fixes, a
sched_features deadlock fix, and a topology fix for certain NUMA
systems"
* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/fair: Fix kernel-doc notation warning
sched/fair: Fix load_balance redo for !imbalance
sched/fair: Fix scale_rt_capacity() for SMT
sched/fair: Fix vruntime_normalized() for remote non-migration wakeup
sched/pelt: Fix update_blocked_averages() for RT and DL classes
sched/topology: Set correct NUMA topology type
sched/debug: Fix potential deadlock when writing to sched_features
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 60caf1f..6383aa6 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -89,12 +89,12 @@
static void sched_feat_disable(int i)
{
- static_key_disable(&sched_feat_keys[i]);
+ static_key_disable_cpuslocked(&sched_feat_keys[i]);
}
static void sched_feat_enable(int i)
{
- static_key_enable(&sched_feat_keys[i]);
+ static_key_enable_cpuslocked(&sched_feat_keys[i]);
}
#else
static void sched_feat_disable(int i) { };
@@ -146,9 +146,11 @@
/* Ensure the static_key remains in a consistent state */
inode = file_inode(filp);
+ cpus_read_lock();
inode_lock(inode);
ret = sched_feat_set(cmp);
inode_unlock(inode);
+ cpus_read_unlock();
if (ret < 0)
return ret;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b39fb59..f808ddf 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3362,6 +3362,7 @@
* attach_entity_load_avg - attach this entity to its cfs_rq load avg
* @cfs_rq: cfs_rq to attach to
* @se: sched_entity to attach
+ * @flags: migration hints
*
* Must call update_cfs_rq_load_avg() before this, since we rely on
* cfs_rq->avg.last_update_time being current.
@@ -7263,6 +7264,7 @@
{
struct rq *rq = cpu_rq(cpu);
struct cfs_rq *cfs_rq, *pos;
+ const struct sched_class *curr_class;
struct rq_flags rf;
bool done = true;
@@ -7299,8 +7301,10 @@
if (cfs_rq_has_blocked(cfs_rq))
done = false;
}
- update_rt_rq_load_avg(rq_clock_task(rq), rq, 0);
- update_dl_rq_load_avg(rq_clock_task(rq), rq, 0);
+
+ curr_class = rq->curr->sched_class;
+ update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class);
+ update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class);
update_irq_load_avg(rq, 0);
/* Don't need periodic decay once load/util_avg are null */
if (others_have_blocked(rq))
@@ -7365,13 +7369,16 @@
{
struct rq *rq = cpu_rq(cpu);
struct cfs_rq *cfs_rq = &rq->cfs;
+ const struct sched_class *curr_class;
struct rq_flags rf;
rq_lock_irqsave(rq, &rf);
update_rq_clock(rq);
update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
- update_rt_rq_load_avg(rq_clock_task(rq), rq, 0);
- update_dl_rq_load_avg(rq_clock_task(rq), rq, 0);
+
+ curr_class = rq->curr->sched_class;
+ update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class);
+ update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class);
update_irq_load_avg(rq, 0);
#ifdef CONFIG_NO_HZ_COMMON
rq->last_blocked_load_update_tick = jiffies;
@@ -7482,10 +7489,10 @@
return load_idx;
}
-static unsigned long scale_rt_capacity(int cpu)
+static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu)
{
struct rq *rq = cpu_rq(cpu);
- unsigned long max = arch_scale_cpu_capacity(NULL, cpu);
+ unsigned long max = arch_scale_cpu_capacity(sd, cpu);
unsigned long used, free;
unsigned long irq;
@@ -7507,7 +7514,7 @@
static void update_cpu_capacity(struct sched_domain *sd, int cpu)
{
- unsigned long capacity = scale_rt_capacity(cpu);
+ unsigned long capacity = scale_rt_capacity(sd, cpu);
struct sched_group *sdg = sd->groups;
cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(sd, cpu);
@@ -8269,7 +8276,7 @@
force_balance:
/* Looks like there is an imbalance. Compute it */
calculate_imbalance(env, &sds);
- return sds.busiest;
+ return env->imbalance ? sds.busiest : NULL;
out_balanced:
env->imbalance = 0;
@@ -9638,7 +9645,8 @@
* - A task which has been woken up by try_to_wake_up() and
* waiting for actually being woken up by sched_ttwu_pending().
*/
- if (!se->sum_exec_runtime || p->state == TASK_WAKING)
+ if (!se->sum_exec_runtime ||
+ (p->state == TASK_WAKING && p->sched_remote_wakeup))
return true;
return false;
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 56a0fed..505a41c 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1295,7 +1295,7 @@
n = sched_max_numa_distance;
- if (sched_domains_numa_levels <= 1) {
+ if (sched_domains_numa_levels <= 2) {
sched_numa_topology_type = NUMA_DIRECT;
return;
}
@@ -1380,9 +1380,6 @@
break;
}
- if (!level)
- return;
-
/*
* 'level' contains the number of unique distances
*