blob: 0597ba0f85ff30870b164b8e27d2af1da56cf67d [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001// SPDX-License-Identifier: GPL-2.0
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002/*
3 * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
4 * policies)
5 */
Vincent Guittot371bf422018-06-28 17:45:05 +02006
Clark Williamsce0dbbb2013-02-07 09:47:04 -06007int sched_rr_timeslice = RR_TIMESLICE;
Huaixin Changd505b8a2020-04-25 18:52:48 +08008/* More than 4 hours if BW_SHIFT equals 20. */
9static const u64 max_rt_runtime = MAX_BW;
Clark Williamsce0dbbb2013-02-07 09:47:04 -060010
Peter Zijlstra029632f2011-10-25 10:00:11 +020011static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
12
13struct rt_bandwidth def_rt_bandwidth;
14
Zhen Nid9ab0e62022-02-15 19:45:59 +080015/*
16 * period over which we measure -rt task CPU usage in us.
17 * default: 1s
18 */
19unsigned int sysctl_sched_rt_period = 1000000;
20
21/*
22 * part of the period that we allow rt tasks to run in us.
23 * default: 0.95s
24 */
25int sysctl_sched_rt_runtime = 950000;
26
Baisong Zhong28f152c2022-03-18 10:54:17 +080027#ifdef CONFIG_SYSCTL
Cyril Hrubisc7fcb992023-08-02 17:19:05 +020028static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC * RR_TIMESLICE) / HZ;
Zhen Nid9ab0e62022-02-15 19:45:59 +080029static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
30 size_t *lenp, loff_t *ppos);
Zhen Nidafd7a92022-02-15 19:46:01 +080031static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
32 size_t *lenp, loff_t *ppos);
Zhen Nid9ab0e62022-02-15 19:45:59 +080033static struct ctl_table sched_rt_sysctls[] = {
34 {
35 .procname = "sched_rt_period_us",
36 .data = &sysctl_sched_rt_period,
37 .maxlen = sizeof(unsigned int),
38 .mode = 0644,
39 .proc_handler = sched_rt_handler,
40 },
41 {
42 .procname = "sched_rt_runtime_us",
43 .data = &sysctl_sched_rt_runtime,
44 .maxlen = sizeof(int),
45 .mode = 0644,
46 .proc_handler = sched_rt_handler,
47 },
Zhen Nidafd7a92022-02-15 19:46:01 +080048 {
49 .procname = "sched_rr_timeslice_ms",
50 .data = &sysctl_sched_rr_timeslice,
51 .maxlen = sizeof(int),
52 .mode = 0644,
53 .proc_handler = sched_rr_handler,
54 },
Zhen Nid9ab0e62022-02-15 19:45:59 +080055 {}
56};
57
58static int __init sched_rt_sysctl_init(void)
59{
60 register_sysctl_init("kernel", sched_rt_sysctls);
61 return 0;
62}
63late_initcall(sched_rt_sysctl_init);
64#endif
65
Peter Zijlstra029632f2011-10-25 10:00:11 +020066static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
67{
68 struct rt_bandwidth *rt_b =
69 container_of(timer, struct rt_bandwidth, rt_period_timer);
Peter Zijlstra029632f2011-10-25 10:00:11 +020070 int idle = 0;
Peter Zijlstra77a4d1a2015-04-15 11:41:57 +020071 int overrun;
Peter Zijlstra029632f2011-10-25 10:00:11 +020072
Peter Zijlstra77a4d1a2015-04-15 11:41:57 +020073 raw_spin_lock(&rt_b->rt_runtime_lock);
Peter Zijlstra029632f2011-10-25 10:00:11 +020074 for (;;) {
Peter Zijlstra77a4d1a2015-04-15 11:41:57 +020075 overrun = hrtimer_forward_now(timer, rt_b->rt_period);
Peter Zijlstra029632f2011-10-25 10:00:11 +020076 if (!overrun)
77 break;
78
Peter Zijlstra77a4d1a2015-04-15 11:41:57 +020079 raw_spin_unlock(&rt_b->rt_runtime_lock);
Peter Zijlstra029632f2011-10-25 10:00:11 +020080 idle = do_sched_rt_period_timer(rt_b, overrun);
Peter Zijlstra77a4d1a2015-04-15 11:41:57 +020081 raw_spin_lock(&rt_b->rt_runtime_lock);
Peter Zijlstra029632f2011-10-25 10:00:11 +020082 }
Peter Zijlstra4cfafd32015-05-14 12:23:11 +020083 if (idle)
84 rt_b->rt_period_active = 0;
Peter Zijlstra77a4d1a2015-04-15 11:41:57 +020085 raw_spin_unlock(&rt_b->rt_runtime_lock);
Peter Zijlstra029632f2011-10-25 10:00:11 +020086
87 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
88}
89
90void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
91{
92 rt_b->rt_period = ns_to_ktime(period);
93 rt_b->rt_runtime = runtime;
94
95 raw_spin_lock_init(&rt_b->rt_runtime_lock);
96
Sebastian Andrzej Siewiord5096aa2019-07-26 20:30:52 +020097 hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
98 HRTIMER_MODE_REL_HARD);
Peter Zijlstra029632f2011-10-25 10:00:11 +020099 rt_b->rt_period_timer.function = sched_rt_period_timer;
100}
101
Li Hua9b58e972021-12-03 03:36:18 +0000102static inline void do_start_rt_bandwidth(struct rt_bandwidth *rt_b)
Peter Zijlstra029632f2011-10-25 10:00:11 +0200103{
Peter Zijlstra029632f2011-10-25 10:00:11 +0200104 raw_spin_lock(&rt_b->rt_runtime_lock);
Peter Zijlstra4cfafd32015-05-14 12:23:11 +0200105 if (!rt_b->rt_period_active) {
106 rt_b->rt_period_active = 1;
Steven Rostedtc3a990d2016-02-16 18:37:46 -0500107 /*
108 * SCHED_DEADLINE updates the bandwidth, as a run away
109 * RT task with a DL task could hog a CPU. But DL does
110 * not reset the period. If a deadline task was running
111 * without an RT task running, it can cause RT tasks to
112 * throttle when they start up. Kick the timer right away
113 * to update the period.
114 */
115 hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
Sebastian Andrzej Siewiord5096aa2019-07-26 20:30:52 +0200116 hrtimer_start_expires(&rt_b->rt_period_timer,
117 HRTIMER_MODE_ABS_PINNED_HARD);
Peter Zijlstra4cfafd32015-05-14 12:23:11 +0200118 }
Peter Zijlstra029632f2011-10-25 10:00:11 +0200119 raw_spin_unlock(&rt_b->rt_runtime_lock);
120}
121
Li Hua9b58e972021-12-03 03:36:18 +0000122static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
123{
124 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
125 return;
126
127 do_start_rt_bandwidth(rt_b);
128}
129
Abel Vesa07c54f72015-03-03 13:50:27 +0200130void init_rt_rq(struct rt_rq *rt_rq)
Peter Zijlstra029632f2011-10-25 10:00:11 +0200131{
132 struct rt_prio_array *array;
133 int i;
134
135 array = &rt_rq->active;
136 for (i = 0; i < MAX_RT_PRIO; i++) {
137 INIT_LIST_HEAD(array->queue + i);
138 __clear_bit(i, array->bitmap);
139 }
140 /* delimiter for bitsearch: */
141 __set_bit(MAX_RT_PRIO, array->bitmap);
142
143#if defined CONFIG_SMP
Peter Zijlstra934fc332020-10-14 21:06:49 +0200144 rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
145 rt_rq->highest_prio.next = MAX_RT_PRIO-1;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200146 rt_rq->rt_nr_migratory = 0;
147 rt_rq->overloaded = 0;
148 plist_head_init(&rt_rq->pushable_tasks);
Steven Rostedtb6366f02015-03-18 14:49:46 -0400149#endif /* CONFIG_SMP */
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +0400150 /* We start is dequeued state, because no RT tasks are queued */
151 rt_rq->rt_queued = 0;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200152
153 rt_rq->rt_time = 0;
154 rt_rq->rt_throttled = 0;
155 rt_rq->rt_runtime = 0;
156 raw_spin_lock_init(&rt_rq->rt_runtime_lock);
157}
158
Gregory Haskins398a1532009-01-14 09:10:04 -0500159#ifdef CONFIG_RT_GROUP_SCHED
Peter Zijlstra029632f2011-10-25 10:00:11 +0200160static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
161{
162 hrtimer_cancel(&rt_b->rt_period_timer);
163}
Gregory Haskins398a1532009-01-14 09:10:04 -0500164
Peter Zijlstraa1ba4d82009-04-01 18:40:15 +0200165#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
166
Peter Zijlstra8f488942009-07-24 12:25:30 +0200167static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
168{
169#ifdef CONFIG_SCHED_DEBUG
170 WARN_ON_ONCE(!rt_entity_is_task(rt_se));
171#endif
172 return container_of(rt_se, struct task_struct, rt);
173}
174
Gregory Haskins398a1532009-01-14 09:10:04 -0500175static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
176{
177 return rt_rq->rq;
178}
179
180static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
181{
182 return rt_se->rt_rq;
183}
184
Kirill Tkhai653d07a2014-03-15 02:14:55 +0400185static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
186{
187 struct rt_rq *rt_rq = rt_se->rt_rq;
188
189 return rt_rq->rq;
190}
191
Mathias Krauseb0277892021-11-03 20:06:13 +0100192void unregister_rt_sched_group(struct task_group *tg)
193{
194 if (tg->rt_se)
195 destroy_rt_bandwidth(&tg->rt_bandwidth);
196
197}
198
Peter Zijlstra029632f2011-10-25 10:00:11 +0200199void free_rt_sched_group(struct task_group *tg)
200{
201 int i;
202
Peter Zijlstra029632f2011-10-25 10:00:11 +0200203 for_each_possible_cpu(i) {
204 if (tg->rt_rq)
205 kfree(tg->rt_rq[i]);
206 if (tg->rt_se)
207 kfree(tg->rt_se[i]);
208 }
209
210 kfree(tg->rt_rq);
211 kfree(tg->rt_se);
212}
213
214void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
215 struct sched_rt_entity *rt_se, int cpu,
216 struct sched_rt_entity *parent)
217{
218 struct rq *rq = cpu_rq(cpu);
219
Peter Zijlstra934fc332020-10-14 21:06:49 +0200220 rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
Peter Zijlstra029632f2011-10-25 10:00:11 +0200221 rt_rq->rt_nr_boosted = 0;
222 rt_rq->rq = rq;
223 rt_rq->tg = tg;
224
225 tg->rt_rq[cpu] = rt_rq;
226 tg->rt_se[cpu] = rt_se;
227
228 if (!rt_se)
229 return;
230
231 if (!parent)
232 rt_se->rt_rq = &rq->rt;
233 else
234 rt_se->rt_rq = parent->my_q;
235
236 rt_se->my_q = rt_rq;
237 rt_se->parent = parent;
238 INIT_LIST_HEAD(&rt_se->run_list);
239}
240
241int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
242{
243 struct rt_rq *rt_rq;
244 struct sched_rt_entity *rt_se;
245 int i;
246
Kees Cook6396bb22018-06-12 14:03:40 -0700247 tg->rt_rq = kcalloc(nr_cpu_ids, sizeof(rt_rq), GFP_KERNEL);
Peter Zijlstra029632f2011-10-25 10:00:11 +0200248 if (!tg->rt_rq)
249 goto err;
Kees Cook6396bb22018-06-12 14:03:40 -0700250 tg->rt_se = kcalloc(nr_cpu_ids, sizeof(rt_se), GFP_KERNEL);
Peter Zijlstra029632f2011-10-25 10:00:11 +0200251 if (!tg->rt_se)
252 goto err;
253
254 init_rt_bandwidth(&tg->rt_bandwidth,
255 ktime_to_ns(def_rt_bandwidth.rt_period), 0);
256
257 for_each_possible_cpu(i) {
258 rt_rq = kzalloc_node(sizeof(struct rt_rq),
259 GFP_KERNEL, cpu_to_node(i));
260 if (!rt_rq)
261 goto err;
262
263 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
264 GFP_KERNEL, cpu_to_node(i));
265 if (!rt_se)
266 goto err_free_rq;
267
Abel Vesa07c54f72015-03-03 13:50:27 +0200268 init_rt_rq(rt_rq);
Peter Zijlstra029632f2011-10-25 10:00:11 +0200269 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
270 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
271 }
272
273 return 1;
274
275err_free_rq:
276 kfree(rt_rq);
277err:
278 return 0;
279}
280
Gregory Haskins398a1532009-01-14 09:10:04 -0500281#else /* CONFIG_RT_GROUP_SCHED */
282
Peter Zijlstraa1ba4d82009-04-01 18:40:15 +0200283#define rt_entity_is_task(rt_se) (1)
284
Peter Zijlstra8f488942009-07-24 12:25:30 +0200285static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
286{
287 return container_of(rt_se, struct task_struct, rt);
288}
289
Gregory Haskins398a1532009-01-14 09:10:04 -0500290static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
291{
292 return container_of(rt_rq, struct rq, rt);
293}
294
Kirill Tkhai653d07a2014-03-15 02:14:55 +0400295static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
Gregory Haskins398a1532009-01-14 09:10:04 -0500296{
297 struct task_struct *p = rt_task_of(rt_se);
Kirill Tkhai653d07a2014-03-15 02:14:55 +0400298
299 return task_rq(p);
300}
301
302static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
303{
304 struct rq *rq = rq_of_rt_se(rt_se);
Gregory Haskins398a1532009-01-14 09:10:04 -0500305
306 return &rq->rt;
307}
308
Mathias Krauseb0277892021-11-03 20:06:13 +0100309void unregister_rt_sched_group(struct task_group *tg) { }
310
Peter Zijlstra029632f2011-10-25 10:00:11 +0200311void free_rt_sched_group(struct task_group *tg) { }
312
313int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
314{
315 return 1;
316}
Gregory Haskins398a1532009-01-14 09:10:04 -0500317#endif /* CONFIG_RT_GROUP_SCHED */
318
Steven Rostedt4fd29172008-01-25 21:08:06 +0100319#ifdef CONFIG_SMP
Ingo Molnar84de4272008-01-25 21:08:15 +0100320
Peter Zijlstradc877342014-02-12 15:47:29 +0100321static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
322{
323 /* Try to pull RT tasks here if we lower this rq's prio */
Peter Zijlstra120455c52020-09-25 16:42:31 +0200324 return rq->online && rq->rt.highest_prio.curr > prev->prio;
Peter Zijlstradc877342014-02-12 15:47:29 +0100325}
326
Gregory Haskins637f5082008-01-25 21:08:18 +0100327static inline int rt_overloaded(struct rq *rq)
Steven Rostedt4fd29172008-01-25 21:08:06 +0100328{
Gregory Haskins637f5082008-01-25 21:08:18 +0100329 return atomic_read(&rq->rd->rto_count);
Steven Rostedt4fd29172008-01-25 21:08:06 +0100330}
Ingo Molnar84de4272008-01-25 21:08:15 +0100331
Steven Rostedt4fd29172008-01-25 21:08:06 +0100332static inline void rt_set_overload(struct rq *rq)
333{
Gregory Haskins1f11eb6a2008-06-04 15:04:05 -0400334 if (!rq->online)
335 return;
336
Rusty Russellc6c49272008-11-25 02:35:05 +1030337 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
Steven Rostedt4fd29172008-01-25 21:08:06 +0100338 /*
339 * Make sure the mask is visible before we set
340 * the overload count. That is checked to determine
341 * if we should look at the mask. It would be a shame
342 * if we looked at the mask, but the mask was not
343 * updated yet.
Peter Zijlstra7c3f2ab2013-10-15 12:35:07 +0200344 *
345 * Matched by the barrier in pull_rt_task().
Steven Rostedt4fd29172008-01-25 21:08:06 +0100346 */
Peter Zijlstra7c3f2ab2013-10-15 12:35:07 +0200347 smp_wmb();
Gregory Haskins637f5082008-01-25 21:08:18 +0100348 atomic_inc(&rq->rd->rto_count);
Steven Rostedt4fd29172008-01-25 21:08:06 +0100349}
Ingo Molnar84de4272008-01-25 21:08:15 +0100350
Steven Rostedt4fd29172008-01-25 21:08:06 +0100351static inline void rt_clear_overload(struct rq *rq)
352{
Gregory Haskins1f11eb6a2008-06-04 15:04:05 -0400353 if (!rq->online)
354 return;
355
Steven Rostedt4fd29172008-01-25 21:08:06 +0100356 /* the order here really doesn't matter */
Gregory Haskins637f5082008-01-25 21:08:18 +0100357 atomic_dec(&rq->rd->rto_count);
Rusty Russellc6c49272008-11-25 02:35:05 +1030358 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
Steven Rostedt4fd29172008-01-25 21:08:06 +0100359}
Gregory Haskins73fe6aae2008-01-25 21:08:07 +0100360
Gregory Haskins398a1532009-01-14 09:10:04 -0500361static void update_rt_migration(struct rt_rq *rt_rq)
Gregory Haskins73fe6aae2008-01-25 21:08:07 +0100362{
Peter Zijlstraa1ba4d82009-04-01 18:40:15 +0200363 if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
Gregory Haskins398a1532009-01-14 09:10:04 -0500364 if (!rt_rq->overloaded) {
365 rt_set_overload(rq_of_rt_rq(rt_rq));
366 rt_rq->overloaded = 1;
Gregory Haskinscdc8eb92008-01-25 21:08:23 +0100367 }
Gregory Haskins398a1532009-01-14 09:10:04 -0500368 } else if (rt_rq->overloaded) {
369 rt_clear_overload(rq_of_rt_rq(rt_rq));
370 rt_rq->overloaded = 0;
Gregory Haskins637f5082008-01-25 21:08:18 +0100371 }
Gregory Haskins73fe6aae2008-01-25 21:08:07 +0100372}
Steven Rostedt4fd29172008-01-25 21:08:06 +0100373
Gregory Haskins398a1532009-01-14 09:10:04 -0500374static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
Peter Zijlstrafa85ae22008-01-25 21:08:29 +0100375{
Peter Zijlstra29baa742012-04-23 12:11:21 +0200376 struct task_struct *p;
377
Peter Zijlstraa1ba4d82009-04-01 18:40:15 +0200378 if (!rt_entity_is_task(rt_se))
379 return;
380
Peter Zijlstra29baa742012-04-23 12:11:21 +0200381 p = rt_task_of(rt_se);
Peter Zijlstraa1ba4d82009-04-01 18:40:15 +0200382 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
383
384 rt_rq->rt_nr_total++;
Ingo Molnar4b53a342017-02-05 15:41:03 +0100385 if (p->nr_cpus_allowed > 1)
Gregory Haskins398a1532009-01-14 09:10:04 -0500386 rt_rq->rt_nr_migratory++;
387
388 update_rt_migration(rt_rq);
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100389}
390
Gregory Haskins398a1532009-01-14 09:10:04 -0500391static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
392{
Peter Zijlstra29baa742012-04-23 12:11:21 +0200393 struct task_struct *p;
394
Peter Zijlstraa1ba4d82009-04-01 18:40:15 +0200395 if (!rt_entity_is_task(rt_se))
396 return;
397
Peter Zijlstra29baa742012-04-23 12:11:21 +0200398 p = rt_task_of(rt_se);
Peter Zijlstraa1ba4d82009-04-01 18:40:15 +0200399 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
400
401 rt_rq->rt_nr_total--;
Ingo Molnar4b53a342017-02-05 15:41:03 +0100402 if (p->nr_cpus_allowed > 1)
Gregory Haskins398a1532009-01-14 09:10:04 -0500403 rt_rq->rt_nr_migratory--;
404
405 update_rt_migration(rt_rq);
406}
407
Steven Rostedt5181f4a42011-06-16 21:55:23 -0400408static inline int has_pushable_tasks(struct rq *rq)
409{
410 return !plist_head_empty(&rq->rt.pushable_tasks);
411}
412
Kees Cook8e5bad72022-10-07 17:07:58 -0700413static DEFINE_PER_CPU(struct balance_callback, rt_push_head);
414static DEFINE_PER_CPU(struct balance_callback, rt_pull_head);
Peter Zijlstrae3fca9e2015-06-11 14:46:37 +0200415
416static void push_rt_tasks(struct rq *);
Peter Zijlstrafd7a4be2015-06-11 14:46:41 +0200417static void pull_rt_task(struct rq *);
Peter Zijlstrae3fca9e2015-06-11 14:46:37 +0200418
Ingo Molnar02d8ec92018-03-03 16:27:54 +0100419static inline void rt_queue_push_tasks(struct rq *rq)
Peter Zijlstradc877342014-02-12 15:47:29 +0100420{
Peter Zijlstrae3fca9e2015-06-11 14:46:37 +0200421 if (!has_pushable_tasks(rq))
422 return;
423
Peter Zijlstrafd7a4be2015-06-11 14:46:41 +0200424 queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
425}
426
Ingo Molnar02d8ec92018-03-03 16:27:54 +0100427static inline void rt_queue_pull_task(struct rq *rq)
Peter Zijlstrafd7a4be2015-06-11 14:46:41 +0200428{
429 queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
Peter Zijlstradc877342014-02-12 15:47:29 +0100430}
431
Gregory Haskins917b6272008-12-29 09:39:53 -0500432static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
433{
434 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
435 plist_node_init(&p->pushable_tasks, p->prio);
436 plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
Steven Rostedt5181f4a42011-06-16 21:55:23 -0400437
438 /* Update the highest prio pushable task */
439 if (p->prio < rq->rt.highest_prio.next)
440 rq->rt.highest_prio.next = p->prio;
Gregory Haskins917b6272008-12-29 09:39:53 -0500441}
442
443static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
444{
445 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
Gregory Haskins917b6272008-12-29 09:39:53 -0500446
Steven Rostedt5181f4a42011-06-16 21:55:23 -0400447 /* Update the new highest prio pushable task */
448 if (has_pushable_tasks(rq)) {
449 p = plist_first_entry(&rq->rt.pushable_tasks,
450 struct task_struct, pushable_tasks);
451 rq->rt.highest_prio.next = p->prio;
Peter Zijlstra934fc332020-10-14 21:06:49 +0200452 } else {
453 rq->rt.highest_prio.next = MAX_RT_PRIO-1;
454 }
Ingo Molnarbcf08df2008-04-19 12:11:10 +0200455}
456
Gregory Haskins917b6272008-12-29 09:39:53 -0500457#else
458
Peter Zijlstraceacc2c2009-01-16 14:46:40 +0100459static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
460{
461}
462
463static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
464{
465}
466
Gregory Haskinsb07430a2009-01-14 08:55:39 -0500467static inline
Peter Zijlstraceacc2c2009-01-16 14:46:40 +0100468void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
469{
470}
471
Gregory Haskinsb07430a2009-01-14 08:55:39 -0500472static inline
Peter Zijlstraceacc2c2009-01-16 14:46:40 +0100473void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
474{
475}
Gregory Haskins917b6272008-12-29 09:39:53 -0500476
Ingo Molnar02d8ec92018-03-03 16:27:54 +0100477static inline void rt_queue_push_tasks(struct rq *rq)
Peter Zijlstradc877342014-02-12 15:47:29 +0100478{
479}
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200480#endif /* CONFIG_SMP */
481
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +0400482static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
Nicolas Saenz Julienne5c66d1b2022-06-28 11:22:59 +0200483static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +0400484
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100485static inline int on_rt_rq(struct sched_rt_entity *rt_se)
486{
Peter Zijlstraff77e462016-01-18 15:27:07 +0100487 return rt_se->on_rq;
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100488}
489
Qais Yousef804d4022019-10-09 11:46:11 +0100490#ifdef CONFIG_UCLAMP_TASK
491/*
492 * Verify the fitness of task @p to run on @cpu taking into account the uclamp
493 * settings.
494 *
495 * This check is only important for heterogeneous systems where uclamp_min value
496 * is higher than the capacity of a @cpu. For non-heterogeneous system this
497 * function will always return true.
498 *
499 * The function will return true if the capacity of the @cpu is >= the
500 * uclamp_min and false otherwise.
501 *
502 * Note that uclamp_min will be clamped to uclamp_max if uclamp_min
503 * > uclamp_max.
504 */
505static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
506{
507 unsigned int min_cap;
508 unsigned int max_cap;
509 unsigned int cpu_cap;
510
511 /* Only heterogeneous systems can benefit from this check */
Dietmar Eggemann740cf8a2022-07-29 13:13:03 +0200512 if (!sched_asym_cpucap_active())
Qais Yousef804d4022019-10-09 11:46:11 +0100513 return true;
514
515 min_cap = uclamp_eff_value(p, UCLAMP_MIN);
516 max_cap = uclamp_eff_value(p, UCLAMP_MAX);
517
518 cpu_cap = capacity_orig_of(cpu);
519
520 return cpu_cap >= min(min_cap, max_cap);
521}
522#else
523static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
524{
525 return true;
526}
527#endif
528
Peter Zijlstra052f1dc2008-02-13 15:45:40 +0100529#ifdef CONFIG_RT_GROUP_SCHED
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100530
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100531static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100532{
533 if (!rt_rq->tg)
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100534 return RUNTIME_INF;
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100535
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200536 return rt_rq->rt_runtime;
537}
538
539static inline u64 sched_rt_period(struct rt_rq *rt_rq)
540{
541 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100542}
543
Cheng Xuec514c42011-05-14 14:20:02 +0800544typedef struct task_group *rt_rq_iter_t;
545
Yong Zhang1c09ab02011-06-28 10:51:31 +0800546static inline struct task_group *next_task_group(struct task_group *tg)
547{
548 do {
549 tg = list_entry_rcu(tg->list.next,
550 typeof(struct task_group), list);
551 } while (&tg->list != &task_groups && task_group_is_autogroup(tg));
552
553 if (&tg->list == &task_groups)
554 tg = NULL;
555
556 return tg;
557}
558
559#define for_each_rt_rq(rt_rq, iter, rq) \
560 for (iter = container_of(&task_groups, typeof(*iter), list); \
561 (iter = next_task_group(iter)) && \
562 (rt_rq = iter->rt_rq[cpu_of(rq)]);)
Cheng Xuec514c42011-05-14 14:20:02 +0800563
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100564#define for_each_sched_rt_entity(rt_se) \
565 for (; rt_se; rt_se = rt_se->parent)
566
567static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
568{
569 return rt_se->my_q;
570}
571
Peter Zijlstraff77e462016-01-18 15:27:07 +0100572static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
573static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100574
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100575static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100576{
Dario Faggiolif6121f42008-10-03 17:40:46 +0200577 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
Kirill Tkhai88751252014-06-29 00:03:57 +0400578 struct rq *rq = rq_of_rt_rq(rt_rq);
Yong Zhang74b7eb52010-01-29 14:57:52 +0800579 struct sched_rt_entity *rt_se;
580
Kirill Tkhai88751252014-06-29 00:03:57 +0400581 int cpu = cpu_of(rq);
Balbir Singh0c3b9162011-03-03 17:04:35 +0530582
583 rt_se = rt_rq->tg->rt_se[cpu];
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100584
Dario Faggiolif6121f42008-10-03 17:40:46 +0200585 if (rt_rq->rt_nr_running) {
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +0400586 if (!rt_se)
587 enqueue_top_rt_rq(rt_rq);
588 else if (!on_rt_rq(rt_se))
Peter Zijlstraff77e462016-01-18 15:27:07 +0100589 enqueue_rt_entity(rt_se, 0);
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +0400590
Gregory Haskinse864c492008-12-29 09:39:49 -0500591 if (rt_rq->highest_prio.curr < curr->prio)
Kirill Tkhai88751252014-06-29 00:03:57 +0400592 resched_curr(rq);
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100593 }
594}
595
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100596static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100597{
Yong Zhang74b7eb52010-01-29 14:57:52 +0800598 struct sched_rt_entity *rt_se;
Balbir Singh0c3b9162011-03-03 17:04:35 +0530599 int cpu = cpu_of(rq_of_rt_rq(rt_rq));
Yong Zhang74b7eb52010-01-29 14:57:52 +0800600
Balbir Singh0c3b9162011-03-03 17:04:35 +0530601 rt_se = rt_rq->tg->rt_se[cpu];
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100602
Vincent Guittot296b2ff2018-06-26 15:53:22 +0200603 if (!rt_se) {
Nicolas Saenz Julienne5c66d1b2022-06-28 11:22:59 +0200604 dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
Vincent Guittot296b2ff2018-06-26 15:53:22 +0200605 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
606 cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
607 }
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +0400608 else if (on_rt_rq(rt_se))
Peter Zijlstraff77e462016-01-18 15:27:07 +0100609 dequeue_rt_entity(rt_se, 0);
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100610}
611
Kirill Tkhai46383642014-03-15 02:15:07 +0400612static inline int rt_rq_throttled(struct rt_rq *rt_rq)
613{
614 return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
615}
616
Peter Zijlstra23b0fdf2008-02-13 15:45:39 +0100617static int rt_se_boosted(struct sched_rt_entity *rt_se)
618{
619 struct rt_rq *rt_rq = group_rt_rq(rt_se);
620 struct task_struct *p;
621
622 if (rt_rq)
623 return !!rt_rq->rt_nr_boosted;
624
625 p = rt_task_of(rt_se);
626 return p->prio != p->normal_prio;
627}
628
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200629#ifdef CONFIG_SMP
Rusty Russellc6c49272008-11-25 02:35:05 +1030630static inline const struct cpumask *sched_rt_period_mask(void)
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200631{
Nathan Zimmer424c93f2013-05-09 11:24:03 -0500632 return this_rq()->rd->span;
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200633}
634#else
Rusty Russellc6c49272008-11-25 02:35:05 +1030635static inline const struct cpumask *sched_rt_period_mask(void)
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200636{
Rusty Russellc6c49272008-11-25 02:35:05 +1030637 return cpu_online_mask;
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200638}
639#endif
640
641static inline
642struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
643{
644 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
645}
646
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200647static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
648{
649 return &rt_rq->tg->rt_bandwidth;
650}
651
Dhaval Giani55e12e52008-06-24 23:39:43 +0530652#else /* !CONFIG_RT_GROUP_SCHED */
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100653
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100654static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100655{
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200656 return rt_rq->rt_runtime;
657}
658
659static inline u64 sched_rt_period(struct rt_rq *rt_rq)
660{
661 return ktime_to_ns(def_rt_bandwidth.rt_period);
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100662}
663
Cheng Xuec514c42011-05-14 14:20:02 +0800664typedef struct rt_rq *rt_rq_iter_t;
665
666#define for_each_rt_rq(rt_rq, iter, rq) \
667 for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
668
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100669#define for_each_sched_rt_entity(rt_se) \
670 for (; rt_se; rt_se = NULL)
671
672static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
673{
674 return NULL;
675}
676
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100677static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100678{
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +0400679 struct rq *rq = rq_of_rt_rq(rt_rq);
680
681 if (!rt_rq->rt_nr_running)
682 return;
683
684 enqueue_top_rt_rq(rt_rq);
Kirill Tkhai88751252014-06-29 00:03:57 +0400685 resched_curr(rq);
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100686}
687
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100688static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100689{
Nicolas Saenz Julienne5c66d1b2022-06-28 11:22:59 +0200690 dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100691}
692
Kirill Tkhai46383642014-03-15 02:15:07 +0400693static inline int rt_rq_throttled(struct rt_rq *rt_rq)
694{
695 return rt_rq->rt_throttled;
696}
697
Rusty Russellc6c49272008-11-25 02:35:05 +1030698static inline const struct cpumask *sched_rt_period_mask(void)
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200699{
Rusty Russellc6c49272008-11-25 02:35:05 +1030700 return cpu_online_mask;
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200701}
702
703static inline
704struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
705{
706 return &cpu_rq(cpu)->rt;
707}
708
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200709static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
710{
711 return &def_rt_bandwidth;
712}
713
Dhaval Giani55e12e52008-06-24 23:39:43 +0530714#endif /* CONFIG_RT_GROUP_SCHED */
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100715
Juri Lellifaa59932014-02-21 11:37:15 +0100716bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
717{
718 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
719
720 return (hrtimer_active(&rt_b->rt_period_timer) ||
721 rt_rq->rt_time < rt_b->rt_runtime);
722}
723
Peter Zijlstrab79f3832008-06-19 14:22:25 +0200724#ifdef CONFIG_SMP
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200725/*
726 * We ran out of runtime, see if we can borrow some from our neighbours.
727 */
Juri Lelli269b26a2015-09-02 11:01:36 +0100728static void do_balance_runtime(struct rt_rq *rt_rq)
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200729{
730 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
Shawn Bohreraa7f6732013-01-14 11:55:31 -0600731 struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
Juri Lelli269b26a2015-09-02 11:01:36 +0100732 int i, weight;
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200733 u64 rt_period;
734
Rusty Russellc6c49272008-11-25 02:35:05 +1030735 weight = cpumask_weight(rd->span);
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200736
Thomas Gleixner0986b112009-11-17 15:32:06 +0100737 raw_spin_lock(&rt_b->rt_runtime_lock);
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200738 rt_period = ktime_to_ns(rt_b->rt_period);
Rusty Russellc6c49272008-11-25 02:35:05 +1030739 for_each_cpu(i, rd->span) {
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200740 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
741 s64 diff;
742
743 if (iter == rt_rq)
744 continue;
745
Thomas Gleixner0986b112009-11-17 15:32:06 +0100746 raw_spin_lock(&iter->rt_runtime_lock);
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200747 /*
748 * Either all rqs have inf runtime and there's nothing to steal
749 * or __disable_runtime() below sets a specific rq to inf to
Ingo Molnar3b037062021-03-18 13:38:50 +0100750 * indicate its been disabled and disallow stealing.
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200751 */
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200752 if (iter->rt_runtime == RUNTIME_INF)
753 goto next;
754
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200755 /*
756 * From runqueues with spare time, take 1/n part of their
757 * spare time, but no more than our period.
758 */
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200759 diff = iter->rt_runtime - iter->rt_time;
760 if (diff > 0) {
Peter Zijlstra58838cf2008-07-24 12:43:13 +0200761 diff = div_u64((u64)diff, weight);
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200762 if (rt_rq->rt_runtime + diff > rt_period)
763 diff = rt_period - rt_rq->rt_runtime;
764 iter->rt_runtime -= diff;
765 rt_rq->rt_runtime += diff;
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200766 if (rt_rq->rt_runtime == rt_period) {
Thomas Gleixner0986b112009-11-17 15:32:06 +0100767 raw_spin_unlock(&iter->rt_runtime_lock);
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200768 break;
769 }
770 }
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200771next:
Thomas Gleixner0986b112009-11-17 15:32:06 +0100772 raw_spin_unlock(&iter->rt_runtime_lock);
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200773 }
Thomas Gleixner0986b112009-11-17 15:32:06 +0100774 raw_spin_unlock(&rt_b->rt_runtime_lock);
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200775}
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200776
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200777/*
778 * Ensure this RQ takes back all the runtime it lend to its neighbours.
779 */
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200780static void __disable_runtime(struct rq *rq)
781{
782 struct root_domain *rd = rq->rd;
Cheng Xuec514c42011-05-14 14:20:02 +0800783 rt_rq_iter_t iter;
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200784 struct rt_rq *rt_rq;
785
786 if (unlikely(!scheduler_running))
787 return;
788
Cheng Xuec514c42011-05-14 14:20:02 +0800789 for_each_rt_rq(rt_rq, iter, rq) {
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200790 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
791 s64 want;
792 int i;
793
Thomas Gleixner0986b112009-11-17 15:32:06 +0100794 raw_spin_lock(&rt_b->rt_runtime_lock);
795 raw_spin_lock(&rt_rq->rt_runtime_lock);
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200796 /*
797 * Either we're all inf and nobody needs to borrow, or we're
798 * already disabled and thus have nothing to do, or we have
799 * exactly the right amount of runtime to take out.
800 */
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200801 if (rt_rq->rt_runtime == RUNTIME_INF ||
802 rt_rq->rt_runtime == rt_b->rt_runtime)
803 goto balanced;
Thomas Gleixner0986b112009-11-17 15:32:06 +0100804 raw_spin_unlock(&rt_rq->rt_runtime_lock);
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200805
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200806 /*
807 * Calculate the difference between what we started out with
808 * and what we current have, that's the amount of runtime
809 * we lend and now have to reclaim.
810 */
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200811 want = rt_b->rt_runtime - rt_rq->rt_runtime;
812
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200813 /*
814 * Greedy reclaim, take back as much as we can.
815 */
Rusty Russellc6c49272008-11-25 02:35:05 +1030816 for_each_cpu(i, rd->span) {
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200817 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
818 s64 diff;
819
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200820 /*
821 * Can't reclaim from ourselves or disabled runqueues.
822 */
Peter Zijlstraf1679d02008-08-14 15:49:00 +0200823 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200824 continue;
825
Thomas Gleixner0986b112009-11-17 15:32:06 +0100826 raw_spin_lock(&iter->rt_runtime_lock);
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200827 if (want > 0) {
828 diff = min_t(s64, iter->rt_runtime, want);
829 iter->rt_runtime -= diff;
830 want -= diff;
831 } else {
832 iter->rt_runtime -= want;
833 want -= want;
834 }
Thomas Gleixner0986b112009-11-17 15:32:06 +0100835 raw_spin_unlock(&iter->rt_runtime_lock);
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200836
837 if (!want)
838 break;
839 }
840
Thomas Gleixner0986b112009-11-17 15:32:06 +0100841 raw_spin_lock(&rt_rq->rt_runtime_lock);
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200842 /*
843 * We cannot be left wanting - that would mean some runtime
844 * leaked out of the system.
845 */
Ingo Molnar09348d72022-08-11 08:54:52 +0200846 WARN_ON_ONCE(want);
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200847balanced:
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200848 /*
849 * Disable all the borrow logic by pretending we have inf
850 * runtime - in which case borrowing doesn't make sense.
851 */
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200852 rt_rq->rt_runtime = RUNTIME_INF;
Peter Boonstoppela4c96ae2012-08-09 15:34:47 -0700853 rt_rq->rt_throttled = 0;
Thomas Gleixner0986b112009-11-17 15:32:06 +0100854 raw_spin_unlock(&rt_rq->rt_runtime_lock);
855 raw_spin_unlock(&rt_b->rt_runtime_lock);
Kirill Tkhai99b62562014-06-25 12:19:48 +0400856
857 /* Make rt_rq available for pick_next_task() */
858 sched_rt_rq_enqueue(rt_rq);
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200859 }
860}
861
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200862static void __enable_runtime(struct rq *rq)
863{
Cheng Xuec514c42011-05-14 14:20:02 +0800864 rt_rq_iter_t iter;
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200865 struct rt_rq *rt_rq;
866
867 if (unlikely(!scheduler_running))
868 return;
869
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200870 /*
871 * Reset each runqueue's bandwidth settings
872 */
Cheng Xuec514c42011-05-14 14:20:02 +0800873 for_each_rt_rq(rt_rq, iter, rq) {
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200874 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
875
Thomas Gleixner0986b112009-11-17 15:32:06 +0100876 raw_spin_lock(&rt_b->rt_runtime_lock);
877 raw_spin_lock(&rt_rq->rt_runtime_lock);
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200878 rt_rq->rt_runtime = rt_b->rt_runtime;
879 rt_rq->rt_time = 0;
Zhang, Yanminbaf25732008-09-09 11:26:33 +0800880 rt_rq->rt_throttled = 0;
Thomas Gleixner0986b112009-11-17 15:32:06 +0100881 raw_spin_unlock(&rt_rq->rt_runtime_lock);
882 raw_spin_unlock(&rt_b->rt_runtime_lock);
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200883 }
884}
885
Juri Lelli269b26a2015-09-02 11:01:36 +0100886static void balance_runtime(struct rt_rq *rt_rq)
Peter Zijlstraeff65492008-06-19 14:22:26 +0200887{
Peter Zijlstra4a6184c2011-10-06 22:39:14 +0200888 if (!sched_feat(RT_RUNTIME_SHARE))
Juri Lelli269b26a2015-09-02 11:01:36 +0100889 return;
Peter Zijlstra4a6184c2011-10-06 22:39:14 +0200890
Peter Zijlstraeff65492008-06-19 14:22:26 +0200891 if (rt_rq->rt_time > rt_rq->rt_runtime) {
Thomas Gleixner0986b112009-11-17 15:32:06 +0100892 raw_spin_unlock(&rt_rq->rt_runtime_lock);
Juri Lelli269b26a2015-09-02 11:01:36 +0100893 do_balance_runtime(rt_rq);
Thomas Gleixner0986b112009-11-17 15:32:06 +0100894 raw_spin_lock(&rt_rq->rt_runtime_lock);
Peter Zijlstraeff65492008-06-19 14:22:26 +0200895 }
Peter Zijlstraeff65492008-06-19 14:22:26 +0200896}
Dhaval Giani55e12e52008-06-24 23:39:43 +0530897#else /* !CONFIG_SMP */
Juri Lelli269b26a2015-09-02 11:01:36 +0100898static inline void balance_runtime(struct rt_rq *rt_rq) {}
Dhaval Giani55e12e52008-06-24 23:39:43 +0530899#endif /* CONFIG_SMP */
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100900
901static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
902{
Peter Zijlstra42c62a52011-10-18 22:03:48 +0200903 int i, idle = 1, throttled = 0;
Rusty Russellc6c49272008-11-25 02:35:05 +1030904 const struct cpumask *span;
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200905
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200906 span = sched_rt_period_mask();
Mike Galbraithe221d022012-08-07 10:02:38 +0200907#ifdef CONFIG_RT_GROUP_SCHED
908 /*
909 * FIXME: isolated CPUs should really leave the root task group,
910 * whether they are isolcpus or were isolated via cpusets, lest
911 * the timer run on a CPU which does not service all runqueues,
912 * potentially leaving other CPUs indefinitely throttled. If
913 * isolation is really required, the user will turn the throttle
914 * off to kill the perturbations it causes anyway. Meanwhile,
915 * this maintains functionality for boot and/or troubleshooting.
916 */
917 if (rt_b == &root_task_group.rt_bandwidth)
918 span = cpu_online_mask;
919#endif
Rusty Russellc6c49272008-11-25 02:35:05 +1030920 for_each_cpu(i, span) {
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200921 int enqueue = 0;
922 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
923 struct rq *rq = rq_of_rt_rq(rt_rq);
Hao Jia2679a832022-04-30 16:58:42 +0800924 struct rq_flags rf;
Dave Kleikampc249f252017-05-15 14:14:13 -0500925 int skip;
926
927 /*
928 * When span == cpu_online_mask, taking each rq->lock
929 * can be time-consuming. Try to avoid it when possible.
930 */
931 raw_spin_lock(&rt_rq->rt_runtime_lock);
Hailong Liuf3d133e2018-07-18 08:46:55 +0800932 if (!sched_feat(RT_RUNTIME_SHARE) && rt_rq->rt_runtime != RUNTIME_INF)
933 rt_rq->rt_runtime = rt_b->rt_runtime;
Dave Kleikampc249f252017-05-15 14:14:13 -0500934 skip = !rt_rq->rt_time && !rt_rq->rt_nr_running;
935 raw_spin_unlock(&rt_rq->rt_runtime_lock);
936 if (skip)
937 continue;
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200938
Hao Jia2679a832022-04-30 16:58:42 +0800939 rq_lock(rq, &rf);
Davidlohr Buesod29a2062018-04-02 09:49:54 -0700940 update_rq_clock(rq);
941
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200942 if (rt_rq->rt_time) {
943 u64 runtime;
944
Thomas Gleixner0986b112009-11-17 15:32:06 +0100945 raw_spin_lock(&rt_rq->rt_runtime_lock);
Peter Zijlstraeff65492008-06-19 14:22:26 +0200946 if (rt_rq->rt_throttled)
947 balance_runtime(rt_rq);
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200948 runtime = rt_rq->rt_runtime;
949 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
950 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
951 rt_rq->rt_throttled = 0;
952 enqueue = 1;
Mike Galbraith61eadef2011-04-29 08:36:50 +0200953
954 /*
Peter Zijlstra9edfbfe2015-01-05 11:18:11 +0100955 * When we're idle and a woken (rt) task is
956 * throttled check_preempt_curr() will set
957 * skip_update and the time between the wakeup
958 * and this unthrottle will get accounted as
959 * 'runtime'.
Mike Galbraith61eadef2011-04-29 08:36:50 +0200960 */
961 if (rt_rq->rt_nr_running && rq->curr == rq->idle)
Davidlohr Buesoadcc8da2018-04-04 09:15:39 -0700962 rq_clock_cancel_skipupdate(rq);
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200963 }
964 if (rt_rq->rt_time || rt_rq->rt_nr_running)
965 idle = 0;
Thomas Gleixner0986b112009-11-17 15:32:06 +0100966 raw_spin_unlock(&rt_rq->rt_runtime_lock);
Balbir Singh0c3b9162011-03-03 17:04:35 +0530967 } else if (rt_rq->rt_nr_running) {
Peter Zijlstra8a8cde12008-06-19 14:22:28 +0200968 idle = 0;
Balbir Singh0c3b9162011-03-03 17:04:35 +0530969 if (!rt_rq_throttled(rt_rq))
970 enqueue = 1;
971 }
Peter Zijlstra42c62a52011-10-18 22:03:48 +0200972 if (rt_rq->rt_throttled)
973 throttled = 1;
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200974
975 if (enqueue)
976 sched_rt_rq_enqueue(rt_rq);
Hao Jia2679a832022-04-30 16:58:42 +0800977 rq_unlock(rq, &rf);
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200978 }
979
Peter Zijlstra42c62a52011-10-18 22:03:48 +0200980 if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
981 return 1;
982
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200983 return idle;
984}
985
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100986static inline int rt_se_prio(struct sched_rt_entity *rt_se)
987{
Peter Zijlstra052f1dc2008-02-13 15:45:40 +0100988#ifdef CONFIG_RT_GROUP_SCHED
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100989 struct rt_rq *rt_rq = group_rt_rq(rt_se);
990
991 if (rt_rq)
Gregory Haskinse864c492008-12-29 09:39:49 -0500992 return rt_rq->highest_prio.curr;
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100993#endif
994
995 return rt_task_of(rt_se)->prio;
996}
997
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100998static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100999{
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +01001000 u64 runtime = sched_rt_runtime(rt_rq);
Peter Zijlstrafa85ae22008-01-25 21:08:29 +01001001
Peter Zijlstrafa85ae22008-01-25 21:08:29 +01001002 if (rt_rq->rt_throttled)
Peter Zijlstra23b0fdf2008-02-13 15:45:39 +01001003 return rt_rq_throttled(rt_rq);
Peter Zijlstrafa85ae22008-01-25 21:08:29 +01001004
Shan Hai5b680fd2011-11-29 11:03:56 +08001005 if (runtime >= sched_rt_period(rt_rq))
Peter Zijlstraac086bc2008-04-19 19:44:58 +02001006 return 0;
1007
Peter Zijlstrab79f3832008-06-19 14:22:25 +02001008 balance_runtime(rt_rq);
1009 runtime = sched_rt_runtime(rt_rq);
1010 if (runtime == RUNTIME_INF)
1011 return 0;
Peter Zijlstraac086bc2008-04-19 19:44:58 +02001012
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +01001013 if (rt_rq->rt_time > runtime) {
Peter Zijlstra7abc63b2011-10-18 22:03:48 +02001014 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
1015
1016 /*
1017 * Don't actually throttle groups that have no runtime assigned
1018 * but accrue some time due to boosting.
1019 */
1020 if (likely(rt_b->rt_runtime)) {
1021 rt_rq->rt_throttled = 1;
John Stultzc2248152014-06-04 16:11:41 -07001022 printk_deferred_once("sched: RT throttling activated\n");
Peter Zijlstra7abc63b2011-10-18 22:03:48 +02001023 } else {
1024 /*
1025 * In case we did anyway, make it go away,
1026 * replenishment is a joke, since it will replenish us
1027 * with exactly 0 ns.
1028 */
1029 rt_rq->rt_time = 0;
1030 }
1031
Peter Zijlstra23b0fdf2008-02-13 15:45:39 +01001032 if (rt_rq_throttled(rt_rq)) {
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +01001033 sched_rt_rq_dequeue(rt_rq);
Peter Zijlstra23b0fdf2008-02-13 15:45:39 +01001034 return 1;
1035 }
Peter Zijlstrafa85ae22008-01-25 21:08:29 +01001036 }
1037
1038 return 0;
1039}
1040
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001041/*
1042 * Update the current task's runtime statistics. Skip current tasks that
1043 * are not in our scheduling class.
1044 */
Alexey Dobriyana9957442007-10-15 17:00:13 +02001045static void update_curr_rt(struct rq *rq)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001046{
1047 struct task_struct *curr = rq->curr;
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001048 struct sched_rt_entity *rt_se = &curr->rt;
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001049 u64 delta_exec;
Wen Yanga7711602018-02-06 09:53:28 +08001050 u64 now;
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001051
Peter Zijlstra06c3bc62011-02-02 13:19:48 +01001052 if (curr->sched_class != &rt_sched_class)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001053 return;
1054
Wen Yanga7711602018-02-06 09:53:28 +08001055 now = rq_clock_task(rq);
Wen Yange7ad2032018-02-05 11:18:41 +08001056 delta_exec = now - curr->se.exec_start;
Kirill Tkhaifc79e242013-01-30 16:50:36 +04001057 if (unlikely((s64)delta_exec <= 0))
1058 return;
Ingo Molnar6cfb0d52007-08-02 17:41:40 +02001059
Yafang Shaoceeadb82021-09-05 14:35:41 +00001060 schedstat_set(curr->stats.exec_max,
1061 max(curr->stats.exec_max, delta_exec));
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001062
Yafang Shaoed7b5642021-09-05 14:35:44 +00001063 trace_sched_stat_runtime(curr, delta_exec, 0);
1064
Shang XiaoJing5531ecf2022-08-24 16:28:56 +08001065 update_current_exec_runtime(curr, now, delta_exec);
Peter Zijlstrafa85ae22008-01-25 21:08:29 +01001066
Peter Zijlstra0b148fa2008-08-19 12:33:04 +02001067 if (!rt_bandwidth_enabled())
1068 return;
1069
Dhaval Giani354d60c2008-04-19 19:44:59 +02001070 for_each_sched_rt_entity(rt_se) {
Giedrius Rekasius0b079392014-05-25 15:23:31 +01001071 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
Li Hua9b58e972021-12-03 03:36:18 +00001072 int exceeded;
Dhaval Giani354d60c2008-04-19 19:44:59 +02001073
Peter Zijlstracc2991c2008-08-19 12:33:03 +02001074 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
Thomas Gleixner0986b112009-11-17 15:32:06 +01001075 raw_spin_lock(&rt_rq->rt_runtime_lock);
Peter Zijlstracc2991c2008-08-19 12:33:03 +02001076 rt_rq->rt_time += delta_exec;
Li Hua9b58e972021-12-03 03:36:18 +00001077 exceeded = sched_rt_runtime_exceeded(rt_rq);
1078 if (exceeded)
Kirill Tkhai88751252014-06-29 00:03:57 +04001079 resched_curr(rq);
Thomas Gleixner0986b112009-11-17 15:32:06 +01001080 raw_spin_unlock(&rt_rq->rt_runtime_lock);
Li Hua9b58e972021-12-03 03:36:18 +00001081 if (exceeded)
1082 do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq));
Peter Zijlstracc2991c2008-08-19 12:33:03 +02001083 }
Dhaval Giani354d60c2008-04-19 19:44:59 +02001084 }
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001085}
1086
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001087static void
Nicolas Saenz Julienne5c66d1b2022-06-28 11:22:59 +02001088dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count)
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001089{
1090 struct rq *rq = rq_of_rt_rq(rt_rq);
1091
1092 BUG_ON(&rq->rt != rt_rq);
1093
1094 if (!rt_rq->rt_queued)
1095 return;
1096
1097 BUG_ON(!rq->nr_running);
1098
Nicolas Saenz Julienne5c66d1b2022-06-28 11:22:59 +02001099 sub_nr_running(rq, count);
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001100 rt_rq->rt_queued = 0;
Peter Zijlstra8f111bc2017-12-20 16:26:12 +01001101
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001102}
1103
1104static void
1105enqueue_top_rt_rq(struct rt_rq *rt_rq)
1106{
1107 struct rq *rq = rq_of_rt_rq(rt_rq);
1108
1109 BUG_ON(&rq->rt != rt_rq);
1110
1111 if (rt_rq->rt_queued)
1112 return;
Vincent Guittot296b2ff2018-06-26 15:53:22 +02001113
1114 if (rt_rq_throttled(rt_rq))
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001115 return;
1116
Vincent Guittot296b2ff2018-06-26 15:53:22 +02001117 if (rt_rq->rt_nr_running) {
1118 add_nr_running(rq, rt_rq->rt_nr_running);
1119 rt_rq->rt_queued = 1;
1120 }
Peter Zijlstra8f111bc2017-12-20 16:26:12 +01001121
1122 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
1123 cpufreq_update_util(rq, 0);
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001124}
1125
Gregory Haskins398a1532009-01-14 09:10:04 -05001126#if defined CONFIG_SMP
Gregory Haskinse864c492008-12-29 09:39:49 -05001127
Gregory Haskins398a1532009-01-14 09:10:04 -05001128static void
1129inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
Steven Rostedt63489e42008-01-25 21:08:03 +01001130{
Gregory Haskins4d984272008-12-29 09:39:49 -05001131 struct rq *rq = rq_of_rt_rq(rt_rq);
Gregory Haskins4d984272008-12-29 09:39:49 -05001132
Kirill Tkhai757dfca2013-11-27 19:59:13 +04001133#ifdef CONFIG_RT_GROUP_SCHED
1134 /*
1135 * Change rq's cpupri only if rt_rq is the top queue.
1136 */
1137 if (&rq->rt != rt_rq)
1138 return;
1139#endif
Steven Rostedt5181f4a42011-06-16 21:55:23 -04001140 if (rq->online && prio < prev_prio)
1141 cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
Steven Rostedt63489e42008-01-25 21:08:03 +01001142}
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001143
Gregory Haskins398a1532009-01-14 09:10:04 -05001144static void
1145dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
Steven Rostedt63489e42008-01-25 21:08:03 +01001146{
Gregory Haskins4d984272008-12-29 09:39:49 -05001147 struct rq *rq = rq_of_rt_rq(rt_rq);
Gregory Haskins6e0534f2008-05-12 21:21:01 +02001148
Kirill Tkhai757dfca2013-11-27 19:59:13 +04001149#ifdef CONFIG_RT_GROUP_SCHED
1150 /*
1151 * Change rq's cpupri only if rt_rq is the top queue.
1152 */
1153 if (&rq->rt != rt_rq)
1154 return;
1155#endif
Gregory Haskins398a1532009-01-14 09:10:04 -05001156 if (rq->online && rt_rq->highest_prio.curr != prev_prio)
1157 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
1158}
1159
1160#else /* CONFIG_SMP */
1161
1162static inline
1163void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
1164static inline
1165void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
1166
1167#endif /* CONFIG_SMP */
1168
Steven Rostedt63489e42008-01-25 21:08:03 +01001169#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
Gregory Haskins398a1532009-01-14 09:10:04 -05001170static void
1171inc_rt_prio(struct rt_rq *rt_rq, int prio)
1172{
1173 int prev_prio = rt_rq->highest_prio.curr;
Steven Rostedt63489e42008-01-25 21:08:03 +01001174
Gregory Haskins398a1532009-01-14 09:10:04 -05001175 if (prio < prev_prio)
1176 rt_rq->highest_prio.curr = prio;
1177
1178 inc_rt_prio_smp(rt_rq, prio, prev_prio);
1179}
1180
1181static void
1182dec_rt_prio(struct rt_rq *rt_rq, int prio)
1183{
1184 int prev_prio = rt_rq->highest_prio.curr;
1185
1186 if (rt_rq->rt_nr_running) {
1187
1188 WARN_ON(prio < prev_prio);
Gregory Haskinse864c492008-12-29 09:39:49 -05001189
1190 /*
Gregory Haskins398a1532009-01-14 09:10:04 -05001191 * This may have been our highest task, and therefore
1192 * we may have some recomputation to do
Gregory Haskinse864c492008-12-29 09:39:49 -05001193 */
Gregory Haskins398a1532009-01-14 09:10:04 -05001194 if (prio == prev_prio) {
Gregory Haskinse864c492008-12-29 09:39:49 -05001195 struct rt_prio_array *array = &rt_rq->active;
1196
1197 rt_rq->highest_prio.curr =
Steven Rostedt764a9d62008-01-25 21:08:04 +01001198 sched_find_first_bit(array->bitmap);
Gregory Haskinse864c492008-12-29 09:39:49 -05001199 }
1200
Peter Zijlstra934fc332020-10-14 21:06:49 +02001201 } else {
1202 rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
1203 }
Gregory Haskins73fe6aae2008-01-25 21:08:07 +01001204
Gregory Haskins398a1532009-01-14 09:10:04 -05001205 dec_rt_prio_smp(rt_rq, prio, prev_prio);
1206}
Gregory Haskins1f11eb6a2008-06-04 15:04:05 -04001207
Gregory Haskins398a1532009-01-14 09:10:04 -05001208#else
1209
1210static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
1211static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
1212
1213#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
1214
Gregory Haskins73fe6aae2008-01-25 21:08:07 +01001215#ifdef CONFIG_RT_GROUP_SCHED
Gregory Haskins398a1532009-01-14 09:10:04 -05001216
1217static void
1218inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1219{
Gregory Haskins73fe6aae2008-01-25 21:08:07 +01001220 if (rt_se_boosted(rt_se))
Steven Rostedt764a9d62008-01-25 21:08:04 +01001221 rt_rq->rt_nr_boosted++;
Peter Zijlstra052f1dc2008-02-13 15:45:40 +01001222
Peter Zijlstra23b0fdf2008-02-13 15:45:39 +01001223 if (rt_rq->tg)
1224 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
Gregory Haskins398a1532009-01-14 09:10:04 -05001225}
1226
1227static void
1228dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1229{
Peter Zijlstra23b0fdf2008-02-13 15:45:39 +01001230 if (rt_se_boosted(rt_se))
1231 rt_rq->rt_nr_boosted--;
1232
1233 WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
Gregory Haskins398a1532009-01-14 09:10:04 -05001234}
1235
1236#else /* CONFIG_RT_GROUP_SCHED */
1237
1238static void
1239inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1240{
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001241 start_rt_bandwidth(&def_rt_bandwidth);
Gregory Haskins398a1532009-01-14 09:10:04 -05001242}
1243
1244static inline
1245void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
1246
1247#endif /* CONFIG_RT_GROUP_SCHED */
1248
1249static inline
Kirill Tkhai22abdef2014-03-15 02:14:49 +04001250unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
1251{
1252 struct rt_rq *group_rq = group_rt_rq(rt_se);
1253
1254 if (group_rq)
1255 return group_rq->rt_nr_running;
1256 else
1257 return 1;
1258}
1259
1260static inline
Frederic Weisbecker01d36d02015-11-04 18:17:10 +01001261unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
1262{
1263 struct rt_rq *group_rq = group_rt_rq(rt_se);
1264 struct task_struct *tsk;
1265
1266 if (group_rq)
1267 return group_rq->rr_nr_running;
1268
1269 tsk = rt_task_of(rt_se);
1270
1271 return (tsk->policy == SCHED_RR) ? 1 : 0;
1272}
1273
1274static inline
Gregory Haskins398a1532009-01-14 09:10:04 -05001275void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1276{
1277 int prio = rt_se_prio(rt_se);
1278
1279 WARN_ON(!rt_prio(prio));
Kirill Tkhai22abdef2014-03-15 02:14:49 +04001280 rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
Frederic Weisbecker01d36d02015-11-04 18:17:10 +01001281 rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
Gregory Haskins398a1532009-01-14 09:10:04 -05001282
1283 inc_rt_prio(rt_rq, prio);
1284 inc_rt_migration(rt_se, rt_rq);
1285 inc_rt_group(rt_se, rt_rq);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001286}
1287
Peter Zijlstra23b0fdf2008-02-13 15:45:39 +01001288static inline
1289void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1290{
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001291 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001292 WARN_ON(!rt_rq->rt_nr_running);
Kirill Tkhai22abdef2014-03-15 02:14:49 +04001293 rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
Frederic Weisbecker01d36d02015-11-04 18:17:10 +01001294 rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001295
Gregory Haskins398a1532009-01-14 09:10:04 -05001296 dec_rt_prio(rt_rq, rt_se_prio(rt_se));
1297 dec_rt_migration(rt_se, rt_rq);
1298 dec_rt_group(rt_se, rt_rq);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001299}
1300
Peter Zijlstraff77e462016-01-18 15:27:07 +01001301/*
1302 * Change rt_se->run_list location unless SAVE && !MOVE
1303 *
1304 * assumes ENQUEUE/DEQUEUE flags match
1305 */
1306static inline bool move_entity(unsigned int flags)
1307{
1308 if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
1309 return false;
1310
1311 return true;
1312}
1313
1314static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
1315{
1316 list_del_init(&rt_se->run_list);
1317
1318 if (list_empty(array->queue + rt_se_prio(rt_se)))
1319 __clear_bit(rt_se_prio(rt_se), array->bitmap);
1320
1321 rt_se->on_list = 0;
1322}
1323
Yafang Shao57a5c2d2021-09-05 14:35:45 +00001324static inline struct sched_statistics *
1325__schedstats_from_rt_se(struct sched_rt_entity *rt_se)
1326{
1327#ifdef CONFIG_RT_GROUP_SCHED
1328 /* schedstats is not supported for rt group. */
1329 if (!rt_entity_is_task(rt_se))
1330 return NULL;
1331#endif
1332
1333 return &rt_task_of(rt_se)->stats;
1334}
1335
1336static inline void
1337update_stats_wait_start_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
1338{
1339 struct sched_statistics *stats;
1340 struct task_struct *p = NULL;
1341
1342 if (!schedstat_enabled())
1343 return;
1344
1345 if (rt_entity_is_task(rt_se))
1346 p = rt_task_of(rt_se);
1347
1348 stats = __schedstats_from_rt_se(rt_se);
1349 if (!stats)
1350 return;
1351
1352 __update_stats_wait_start(rq_of_rt_rq(rt_rq), p, stats);
1353}
1354
1355static inline void
1356update_stats_enqueue_sleeper_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
1357{
1358 struct sched_statistics *stats;
1359 struct task_struct *p = NULL;
1360
1361 if (!schedstat_enabled())
1362 return;
1363
1364 if (rt_entity_is_task(rt_se))
1365 p = rt_task_of(rt_se);
1366
1367 stats = __schedstats_from_rt_se(rt_se);
1368 if (!stats)
1369 return;
1370
1371 __update_stats_enqueue_sleeper(rq_of_rt_rq(rt_rq), p, stats);
1372}
1373
1374static inline void
1375update_stats_enqueue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
1376 int flags)
1377{
1378 if (!schedstat_enabled())
1379 return;
1380
1381 if (flags & ENQUEUE_WAKEUP)
1382 update_stats_enqueue_sleeper_rt(rt_rq, rt_se);
1383}
1384
1385static inline void
1386update_stats_wait_end_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
1387{
1388 struct sched_statistics *stats;
1389 struct task_struct *p = NULL;
1390
1391 if (!schedstat_enabled())
1392 return;
1393
1394 if (rt_entity_is_task(rt_se))
1395 p = rt_task_of(rt_se);
1396
1397 stats = __schedstats_from_rt_se(rt_se);
1398 if (!stats)
1399 return;
1400
1401 __update_stats_wait_end(rq_of_rt_rq(rt_rq), p, stats);
1402}
1403
1404static inline void
1405update_stats_dequeue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
1406 int flags)
1407{
1408 struct task_struct *p = NULL;
1409
1410 if (!schedstat_enabled())
1411 return;
1412
1413 if (rt_entity_is_task(rt_se))
1414 p = rt_task_of(rt_se);
1415
1416 if ((flags & DEQUEUE_SLEEP) && p) {
1417 unsigned int state;
1418
1419 state = READ_ONCE(p->__state);
1420 if (state & TASK_INTERRUPTIBLE)
1421 __schedstat_set(p->stats.sleep_start,
1422 rq_clock(rq_of_rt_rq(rt_rq)));
1423
1424 if (state & TASK_UNINTERRUPTIBLE)
1425 __schedstat_set(p->stats.block_start,
1426 rq_clock(rq_of_rt_rq(rt_rq)));
1427 }
1428}
1429
Peter Zijlstraff77e462016-01-18 15:27:07 +01001430static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001431{
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001432 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1433 struct rt_prio_array *array = &rt_rq->active;
1434 struct rt_rq *group_rq = group_rt_rq(rt_se);
Dmitry Adamushko20b63312008-06-11 00:58:30 +02001435 struct list_head *queue = array->queue + rt_se_prio(rt_se);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001436
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001437 /*
1438 * Don't enqueue the group if its throttled, or when empty.
1439 * The latter is a consequence of the former when a child group
1440 * get throttled and the current group doesn't have any other
1441 * active members.
1442 */
Peter Zijlstraff77e462016-01-18 15:27:07 +01001443 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
1444 if (rt_se->on_list)
1445 __delist_rt_entity(rt_se, array);
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001446 return;
Peter Zijlstraff77e462016-01-18 15:27:07 +01001447 }
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001448
Peter Zijlstraff77e462016-01-18 15:27:07 +01001449 if (move_entity(flags)) {
1450 WARN_ON_ONCE(rt_se->on_list);
1451 if (flags & ENQUEUE_HEAD)
1452 list_add(&rt_se->run_list, queue);
1453 else
1454 list_add_tail(&rt_se->run_list, queue);
1455
1456 __set_bit(rt_se_prio(rt_se), array->bitmap);
1457 rt_se->on_list = 1;
1458 }
1459 rt_se->on_rq = 1;
Peter Zijlstra78f2c7d2008-01-25 21:08:27 +01001460
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001461 inc_rt_tasks(rt_se, rt_rq);
1462}
1463
Peter Zijlstraff77e462016-01-18 15:27:07 +01001464static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001465{
1466 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1467 struct rt_prio_array *array = &rt_rq->active;
1468
Peter Zijlstraff77e462016-01-18 15:27:07 +01001469 if (move_entity(flags)) {
1470 WARN_ON_ONCE(!rt_se->on_list);
1471 __delist_rt_entity(rt_se, array);
1472 }
1473 rt_se->on_rq = 0;
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001474
1475 dec_rt_tasks(rt_se, rt_rq);
1476}
1477
1478/*
1479 * Because the prio of an upper entry depends on the lower
1480 * entries, we must remove entries top - down.
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001481 */
Peter Zijlstraff77e462016-01-18 15:27:07 +01001482static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001483{
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001484 struct sched_rt_entity *back = NULL;
Nicolas Saenz Julienne5c66d1b2022-06-28 11:22:59 +02001485 unsigned int rt_nr_running;
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001486
Peter Zijlstra58d6c2d2008-04-19 19:45:00 +02001487 for_each_sched_rt_entity(rt_se) {
1488 rt_se->back = back;
1489 back = rt_se;
1490 }
1491
Nicolas Saenz Julienne5c66d1b2022-06-28 11:22:59 +02001492 rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001493
Peter Zijlstra58d6c2d2008-04-19 19:45:00 +02001494 for (rt_se = back; rt_se; rt_se = rt_se->back) {
1495 if (on_rt_rq(rt_se))
Peter Zijlstraff77e462016-01-18 15:27:07 +01001496 __dequeue_rt_entity(rt_se, flags);
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001497 }
Nicolas Saenz Julienne5c66d1b2022-06-28 11:22:59 +02001498
1499 dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001500}
1501
Peter Zijlstraff77e462016-01-18 15:27:07 +01001502static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001503{
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001504 struct rq *rq = rq_of_rt_se(rt_se);
1505
Yafang Shao57a5c2d2021-09-05 14:35:45 +00001506 update_stats_enqueue_rt(rt_rq_of_se(rt_se), rt_se, flags);
1507
Peter Zijlstraff77e462016-01-18 15:27:07 +01001508 dequeue_rt_stack(rt_se, flags);
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001509 for_each_sched_rt_entity(rt_se)
Peter Zijlstraff77e462016-01-18 15:27:07 +01001510 __enqueue_rt_entity(rt_se, flags);
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001511 enqueue_top_rt_rq(&rq->rt);
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001512}
1513
Peter Zijlstraff77e462016-01-18 15:27:07 +01001514static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001515{
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001516 struct rq *rq = rq_of_rt_se(rt_se);
1517
Yafang Shao57a5c2d2021-09-05 14:35:45 +00001518 update_stats_dequeue_rt(rt_rq_of_se(rt_se), rt_se, flags);
1519
Peter Zijlstraff77e462016-01-18 15:27:07 +01001520 dequeue_rt_stack(rt_se, flags);
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001521
1522 for_each_sched_rt_entity(rt_se) {
1523 struct rt_rq *rt_rq = group_rt_rq(rt_se);
1524
1525 if (rt_rq && rt_rq->rt_nr_running)
Peter Zijlstraff77e462016-01-18 15:27:07 +01001526 __enqueue_rt_entity(rt_se, flags);
Peter Zijlstra58d6c2d2008-04-19 19:45:00 +02001527 }
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001528 enqueue_top_rt_rq(&rq->rt);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001529}
1530
1531/*
1532 * Adding/removing a task to/from a priority array:
1533 */
Thomas Gleixnerea87bb72010-01-20 20:58:57 +00001534static void
Peter Zijlstra371fd7e2010-03-24 16:38:48 +01001535enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001536{
1537 struct sched_rt_entity *rt_se = &p->rt;
1538
Peter Zijlstra371fd7e2010-03-24 16:38:48 +01001539 if (flags & ENQUEUE_WAKEUP)
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001540 rt_se->timeout = 0;
1541
Yafang Shao57a5c2d2021-09-05 14:35:45 +00001542 check_schedstat_required();
1543 update_stats_wait_start_rt(rt_rq_of_se(rt_se), rt_se);
1544
Peter Zijlstraff77e462016-01-18 15:27:07 +01001545 enqueue_rt_entity(rt_se, flags);
Peter Zijlstrac09595f2008-06-27 13:41:14 +02001546
Ingo Molnar4b53a342017-02-05 15:41:03 +01001547 if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
Gregory Haskins917b6272008-12-29 09:39:53 -05001548 enqueue_pushable_task(rq, p);
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001549}
1550
Peter Zijlstra371fd7e2010-03-24 16:38:48 +01001551static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001552{
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001553 struct sched_rt_entity *rt_se = &p->rt;
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001554
1555 update_curr_rt(rq);
Peter Zijlstraff77e462016-01-18 15:27:07 +01001556 dequeue_rt_entity(rt_se, flags);
Peter Zijlstrac09595f2008-06-27 13:41:14 +02001557
Gregory Haskins917b6272008-12-29 09:39:53 -05001558 dequeue_pushable_task(rq, p);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001559}
1560
1561/*
Richard Weinberger60686312011-11-12 18:07:57 +01001562 * Put task to the head or the end of the run list without the overhead of
1563 * dequeue followed by enqueue.
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001564 */
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001565static void
1566requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001567{
Ingo Molnar1cdad712008-06-19 09:09:15 +02001568 if (on_rt_rq(rt_se)) {
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001569 struct rt_prio_array *array = &rt_rq->active;
1570 struct list_head *queue = array->queue + rt_se_prio(rt_se);
1571
1572 if (head)
1573 list_move(&rt_se->run_list, queue);
1574 else
1575 list_move_tail(&rt_se->run_list, queue);
Ingo Molnar1cdad712008-06-19 09:09:15 +02001576 }
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001577}
1578
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001579static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001580{
1581 struct sched_rt_entity *rt_se = &p->rt;
1582 struct rt_rq *rt_rq;
1583
1584 for_each_sched_rt_entity(rt_se) {
1585 rt_rq = rt_rq_of_se(rt_se);
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001586 requeue_rt_entity(rt_rq, rt_se, head);
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001587 }
1588}
1589
1590static void yield_task_rt(struct rq *rq)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001591{
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001592 requeue_task_rt(rq, rq->curr, 0);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001593}
1594
Gregory Haskinse7693a32008-01-25 21:08:09 +01001595#ifdef CONFIG_SMP
Gregory Haskins318e0892008-01-25 21:08:10 +01001596static int find_lowest_rq(struct task_struct *task);
1597
Peter Zijlstra0017d732010-03-24 18:34:10 +01001598static int
Valentin Schneider3aef15512020-11-02 18:45:13 +00001599select_task_rq_rt(struct task_struct *p, int cpu, int flags)
Gregory Haskinse7693a32008-01-25 21:08:09 +01001600{
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001601 struct task_struct *curr;
1602 struct rq *rq;
Qais Yousef804d4022019-10-09 11:46:11 +01001603 bool test;
Steven Rostedtc37495f2011-06-16 21:55:22 -04001604
1605 /* For anything but wake ups, just return the task_cpu */
Valentin Schneider3aef15512020-11-02 18:45:13 +00001606 if (!(flags & (WF_TTWU | WF_FORK)))
Steven Rostedtc37495f2011-06-16 21:55:22 -04001607 goto out;
1608
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001609 rq = cpu_rq(cpu);
1610
1611 rcu_read_lock();
Jason Low316c1608d2015-04-28 13:00:20 -07001612 curr = READ_ONCE(rq->curr); /* unlocked access */
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001613
Gregory Haskins318e0892008-01-25 21:08:10 +01001614 /*
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001615 * If the current task on @p's runqueue is an RT task, then
Steven Rostedte1f47d82008-01-25 21:08:12 +01001616 * try to see if we can wake this RT task up on another
1617 * runqueue. Otherwise simply start this RT task
1618 * on its current runqueue.
1619 *
Steven Rostedt43fa5462010-09-20 22:40:03 -04001620 * We want to avoid overloading runqueues. If the woken
1621 * task is a higher priority, then it will stay on this CPU
1622 * and the lower prio task should be moved to another CPU.
1623 * Even though this will probably make the lower prio task
1624 * lose its cache, we do not want to bounce a higher task
1625 * around just because it gave up its CPU, perhaps for a
1626 * lock?
1627 *
1628 * For equal prio tasks, we just let the scheduler sort it out.
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001629 *
Gregory Haskins318e0892008-01-25 21:08:10 +01001630 * Otherwise, just let it ride on the affined RQ and the
1631 * post-schedule router will push the preempted task away
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001632 *
1633 * This test is optimistic, if we get it wrong the load-balancer
1634 * will have to sort it out.
Qais Yousef804d4022019-10-09 11:46:11 +01001635 *
1636 * We take into account the capacity of the CPU to ensure it fits the
1637 * requirement of the task - which is only important on heterogeneous
1638 * systems like big.LITTLE.
Gregory Haskins318e0892008-01-25 21:08:10 +01001639 */
Qais Yousef804d4022019-10-09 11:46:11 +01001640 test = curr &&
1641 unlikely(rt_task(curr)) &&
1642 (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio);
1643
1644 if (test || !rt_task_fits_capacity(p, cpu)) {
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001645 int target = find_lowest_rq(p);
1646
Tim Chen80e3d872014-12-12 15:38:12 -08001647 /*
Qais Yousefb28bc1e2020-03-02 13:27:17 +00001648 * Bail out if we were forcing a migration to find a better
1649 * fitting CPU but our search failed.
1650 */
1651 if (!test && target != -1 && !rt_task_fits_capacity(p, target))
1652 goto out_unlock;
1653
1654 /*
Tim Chen80e3d872014-12-12 15:38:12 -08001655 * Don't bother moving it if the destination CPU is
1656 * not running a lower priority task.
1657 */
1658 if (target != -1 &&
1659 p->prio < cpu_rq(target)->rt.highest_prio.curr)
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001660 cpu = target;
1661 }
Qais Yousefb28bc1e2020-03-02 13:27:17 +00001662
1663out_unlock:
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001664 rcu_read_unlock();
1665
Steven Rostedtc37495f2011-06-16 21:55:22 -04001666out:
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001667 return cpu;
Gregory Haskinse7693a32008-01-25 21:08:09 +01001668}
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001669
1670static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
1671{
Wanpeng Li308a6232014-10-31 06:39:31 +08001672 /*
1673 * Current can't be migrated, useless to reschedule,
1674 * let's hope p can move out.
1675 */
Ingo Molnar4b53a342017-02-05 15:41:03 +01001676 if (rq->curr->nr_cpus_allowed == 1 ||
Qais Yousefa1bd02e2020-03-02 13:27:18 +00001677 !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001678 return;
1679
Wanpeng Li308a6232014-10-31 06:39:31 +08001680 /*
1681 * p is migratable, so let's not schedule it and
1682 * see if it is pushed or pulled somewhere else.
1683 */
Qais Yousef804d4022019-10-09 11:46:11 +01001684 if (p->nr_cpus_allowed != 1 &&
Qais Yousefa1bd02e2020-03-02 13:27:18 +00001685 cpupri_find(&rq->rd->cpupri, p, NULL))
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001686 return;
1687
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001688 /*
Ingo Molnar97fb7a02018-03-03 14:01:12 +01001689 * There appear to be other CPUs that can accept
1690 * the current task but none can run 'p', so lets reschedule
1691 * to try and push the current task away:
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001692 */
1693 requeue_task_rt(rq, p, 1);
Kirill Tkhai88751252014-06-29 00:03:57 +04001694 resched_curr(rq);
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001695}
1696
Peter Zijlstra6e2df052019-11-08 11:11:52 +01001697static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
1698{
1699 if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
1700 /*
1701 * This is OK, because current is on_cpu, which avoids it being
1702 * picked for load-balance and preemption/IRQs are still
1703 * disabled avoiding further scheduler activity on it and we've
1704 * not yet started the picking loop.
1705 */
1706 rq_unpin_lock(rq, rf);
1707 pull_rt_task(rq);
1708 rq_repin_lock(rq, rf);
1709 }
1710
1711 return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq);
1712}
Gregory Haskinse7693a32008-01-25 21:08:09 +01001713#endif /* CONFIG_SMP */
1714
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001715/*
1716 * Preempt the current task with a newly woken task if needed:
1717 */
Peter Zijlstra7d478722009-09-14 19:55:44 +02001718static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001719{
Gregory Haskins45c01e82008-05-12 21:20:41 +02001720 if (p->prio < rq->curr->prio) {
Kirill Tkhai88751252014-06-29 00:03:57 +04001721 resched_curr(rq);
Gregory Haskins45c01e82008-05-12 21:20:41 +02001722 return;
1723 }
1724
1725#ifdef CONFIG_SMP
1726 /*
1727 * If:
1728 *
1729 * - the newly woken task is of equal priority to the current task
1730 * - the newly woken task is non-migratable while current is migratable
1731 * - current will be preempted on the next reschedule
1732 *
1733 * we should check to see if current can readily move to a different
1734 * cpu. If so, we will reschedule to allow the push logic to try
1735 * to move current somewhere else, making room for our non-migratable
1736 * task.
1737 */
Hillf Danton8dd0de8b2011-06-14 18:36:24 -04001738 if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001739 check_preempt_equal_prio(rq, p);
Gregory Haskins45c01e82008-05-12 21:20:41 +02001740#endif
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001741}
1742
Peter Zijlstraa0e813f2019-11-08 14:16:00 +01001743static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)
Muchun Songff1cdc92018-10-26 21:17:43 +08001744{
Yafang Shao57a5c2d2021-09-05 14:35:45 +00001745 struct sched_rt_entity *rt_se = &p->rt;
1746 struct rt_rq *rt_rq = &rq->rt;
1747
Muchun Songff1cdc92018-10-26 21:17:43 +08001748 p->se.exec_start = rq_clock_task(rq);
Yafang Shao57a5c2d2021-09-05 14:35:45 +00001749 if (on_rt_rq(&p->rt))
1750 update_stats_wait_end_rt(rt_rq, rt_se);
Muchun Songff1cdc92018-10-26 21:17:43 +08001751
1752 /* The running task is never eligible for pushing */
1753 dequeue_pushable_task(rq, p);
Peter Zijlstraf95d4ea2019-05-29 20:36:40 +00001754
Peter Zijlstraa0e813f2019-11-08 14:16:00 +01001755 if (!first)
1756 return;
1757
Peter Zijlstraf95d4ea2019-05-29 20:36:40 +00001758 /*
1759 * If prev task was rt, put_prev_task() has already updated the
1760 * utilization. We only care of the case where we start to schedule a
1761 * rt task
1762 */
1763 if (rq->curr->sched_class != &rt_sched_class)
1764 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
1765
1766 rt_queue_push_tasks(rq);
Muchun Songff1cdc92018-10-26 21:17:43 +08001767}
1768
Dietmar Eggemann821aecd2022-03-02 19:34:33 +01001769static struct sched_rt_entity *pick_next_rt_entity(struct rt_rq *rt_rq)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001770{
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001771 struct rt_prio_array *array = &rt_rq->active;
1772 struct sched_rt_entity *next = NULL;
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001773 struct list_head *queue;
1774 int idx;
1775
1776 idx = sched_find_first_bit(array->bitmap);
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001777 BUG_ON(idx >= MAX_RT_PRIO);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001778
1779 queue = array->queue + idx;
Pietro Borrello7c4a5b82023-02-06 22:33:54 +00001780 if (SCHED_WARN_ON(list_empty(queue)))
1781 return NULL;
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001782 next = list_entry(queue->next, struct sched_rt_entity, run_list);
Dmitry Adamushko326587b2008-01-25 21:08:34 +01001783
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001784 return next;
1785}
1786
Gregory Haskins917b6272008-12-29 09:39:53 -05001787static struct task_struct *_pick_next_task_rt(struct rq *rq)
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001788{
1789 struct sched_rt_entity *rt_se;
Peter Zijlstra606dba22012-02-11 06:05:00 +01001790 struct rt_rq *rt_rq = &rq->rt;
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001791
1792 do {
Dietmar Eggemann821aecd2022-03-02 19:34:33 +01001793 rt_se = pick_next_rt_entity(rt_rq);
Pietro Borrello7c4a5b82023-02-06 22:33:54 +00001794 if (unlikely(!rt_se))
1795 return NULL;
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001796 rt_rq = group_rt_rq(rt_se);
1797 } while (rt_rq);
1798
Muchun Songff1cdc92018-10-26 21:17:43 +08001799 return rt_task_of(rt_se);
Gregory Haskins917b6272008-12-29 09:39:53 -05001800}
1801
Peter Zijlstra21f56ffe2020-11-17 18:19:32 -05001802static struct task_struct *pick_task_rt(struct rq *rq)
Gregory Haskins917b6272008-12-29 09:39:53 -05001803{
Peter Zijlstra606dba22012-02-11 06:05:00 +01001804 struct task_struct *p;
Peter Zijlstra606dba22012-02-11 06:05:00 +01001805
Peter Zijlstra6e2df052019-11-08 11:11:52 +01001806 if (!sched_rt_runnable(rq))
Peter Zijlstra606dba22012-02-11 06:05:00 +01001807 return NULL;
1808
Peter Zijlstra606dba22012-02-11 06:05:00 +01001809 p = _pick_next_task_rt(rq);
Peter Zijlstra21f56ffe2020-11-17 18:19:32 -05001810
1811 return p;
1812}
1813
1814static struct task_struct *pick_next_task_rt(struct rq *rq)
1815{
1816 struct task_struct *p = pick_task_rt(rq);
1817
1818 if (p)
1819 set_next_task_rt(rq, p, true);
1820
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001821 return p;
1822}
1823
Peter Zijlstra6e2df052019-11-08 11:11:52 +01001824static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001825{
Yafang Shao57a5c2d2021-09-05 14:35:45 +00001826 struct sched_rt_entity *rt_se = &p->rt;
1827 struct rt_rq *rt_rq = &rq->rt;
1828
1829 if (on_rt_rq(&p->rt))
1830 update_stats_wait_start_rt(rt_rq, rt_se);
1831
Ingo Molnarf1e14ef2007-08-09 11:16:48 +02001832 update_curr_rt(rq);
Gregory Haskins917b6272008-12-29 09:39:53 -05001833
Vincent Guittot23127292019-01-23 16:26:53 +01001834 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
Vincent Guittot371bf422018-06-28 17:45:05 +02001835
Gregory Haskins917b6272008-12-29 09:39:53 -05001836 /*
1837 * The previous task needs to be made eligible for pushing
1838 * if it is still active
1839 */
Ingo Molnar4b53a342017-02-05 15:41:03 +01001840 if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
Gregory Haskins917b6272008-12-29 09:39:53 -05001841 enqueue_pushable_task(rq, p);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001842}
1843
Peter Williams681f3e62007-10-24 18:23:51 +02001844#ifdef CONFIG_SMP
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001845
Steven Rostedte8fa1362008-01-25 21:08:05 +01001846/* Only try algorithms three times */
1847#define RT_MAX_TRIES 3
1848
Steven Rostedtf65eda42008-01-25 21:08:07 +01001849static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
1850{
Peter Zijlstra0b9d46f2022-09-06 12:33:04 +02001851 if (!task_on_cpu(rq, p) &&
Peter Zijlstra95158a82020-10-01 16:05:39 +02001852 cpumask_test_cpu(cpu, &p->cpus_mask))
Steven Rostedtf65eda42008-01-25 21:08:07 +01001853 return 1;
Ingo Molnar97fb7a02018-03-03 14:01:12 +01001854
Steven Rostedtf65eda42008-01-25 21:08:07 +01001855 return 0;
1856}
1857
Kirill Tkhaie23ee742013-06-07 15:37:43 -04001858/*
1859 * Return the highest pushable rq's task, which is suitable to be executed
Ingo Molnar97fb7a02018-03-03 14:01:12 +01001860 * on the CPU, NULL otherwise
Kirill Tkhaie23ee742013-06-07 15:37:43 -04001861 */
1862static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
Steven Rostedte8fa1362008-01-25 21:08:05 +01001863{
Kirill Tkhaie23ee742013-06-07 15:37:43 -04001864 struct plist_head *head = &rq->rt.pushable_tasks;
1865 struct task_struct *p;
Steven Rostedte8fa1362008-01-25 21:08:05 +01001866
Kirill Tkhaie23ee742013-06-07 15:37:43 -04001867 if (!has_pushable_tasks(rq))
1868 return NULL;
Peter Zijlstra3d074672010-03-10 17:07:24 +01001869
Kirill Tkhaie23ee742013-06-07 15:37:43 -04001870 plist_for_each_entry(p, head, pushable_tasks) {
1871 if (pick_rt_task(rq, p, cpu))
1872 return p;
Steven Rostedte8fa1362008-01-25 21:08:05 +01001873 }
1874
Kirill Tkhaie23ee742013-06-07 15:37:43 -04001875 return NULL;
Steven Rostedte8fa1362008-01-25 21:08:05 +01001876}
1877
Rusty Russell0e3900e2008-11-25 02:35:13 +10301878static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
Steven Rostedte8fa1362008-01-25 21:08:05 +01001879
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001880static int find_lowest_rq(struct task_struct *task)
1881{
1882 struct sched_domain *sd;
Christoph Lameter4ba29682014-08-26 19:12:21 -05001883 struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001884 int this_cpu = smp_processor_id();
1885 int cpu = task_cpu(task);
Qais Yousefa1bd02e2020-03-02 13:27:18 +00001886 int ret;
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001887
Steven Rostedt0da938c2011-06-14 18:36:25 -04001888 /* Make sure the mask is initialized first */
1889 if (unlikely(!lowest_mask))
1890 return -1;
1891
Ingo Molnar4b53a342017-02-05 15:41:03 +01001892 if (task->nr_cpus_allowed == 1)
Gregory Haskins6e0534f2008-05-12 21:21:01 +02001893 return -1; /* No other targets possible */
1894
Qais Yousefa1bd02e2020-03-02 13:27:18 +00001895 /*
1896 * If we're on asym system ensure we consider the different capacities
1897 * of the CPUs when searching for the lowest_mask.
1898 */
Dietmar Eggemann740cf8a2022-07-29 13:13:03 +02001899 if (sched_asym_cpucap_active()) {
Qais Yousefa1bd02e2020-03-02 13:27:18 +00001900
1901 ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,
1902 task, lowest_mask,
1903 rt_task_fits_capacity);
1904 } else {
1905
1906 ret = cpupri_find(&task_rq(task)->rd->cpupri,
1907 task, lowest_mask);
1908 }
1909
1910 if (!ret)
Gregory Haskins06f90db2008-01-25 21:08:13 +01001911 return -1; /* No targets found */
1912
1913 /*
Ingo Molnar97fb7a02018-03-03 14:01:12 +01001914 * At this point we have built a mask of CPUs representing the
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001915 * lowest priority tasks in the system. Now we want to elect
1916 * the best one based on our affinity and topology.
1917 *
Ingo Molnar97fb7a02018-03-03 14:01:12 +01001918 * We prioritize the last CPU that the task executed on since
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001919 * it is most likely cache-hot in that location.
1920 */
Rusty Russell96f874e22008-11-25 02:35:14 +10301921 if (cpumask_test_cpu(cpu, lowest_mask))
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001922 return cpu;
1923
1924 /*
1925 * Otherwise, we consult the sched_domains span maps to figure
Ingo Molnar97fb7a02018-03-03 14:01:12 +01001926 * out which CPU is logically closest to our hot cache data.
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001927 */
Rusty Russelle2c88062009-11-03 14:53:15 +10301928 if (!cpumask_test_cpu(this_cpu, lowest_mask))
1929 this_cpu = -1; /* Skip this_cpu opt if not among lowest */
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001930
Xiaotian Fengcd4ae6a2011-04-22 18:53:54 +08001931 rcu_read_lock();
Rusty Russelle2c88062009-11-03 14:53:15 +10301932 for_each_domain(cpu, sd) {
1933 if (sd->flags & SD_WAKE_AFFINE) {
1934 int best_cpu;
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001935
Rusty Russelle2c88062009-11-03 14:53:15 +10301936 /*
1937 * "this_cpu" is cheaper to preempt than a
1938 * remote processor.
1939 */
1940 if (this_cpu != -1 &&
Xiaotian Fengcd4ae6a2011-04-22 18:53:54 +08001941 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
1942 rcu_read_unlock();
Rusty Russelle2c88062009-11-03 14:53:15 +10301943 return this_cpu;
Xiaotian Fengcd4ae6a2011-04-22 18:53:54 +08001944 }
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001945
Peter Zijlstra14e292f82020-10-01 15:54:14 +02001946 best_cpu = cpumask_any_and_distribute(lowest_mask,
1947 sched_domain_span(sd));
Xiaotian Fengcd4ae6a2011-04-22 18:53:54 +08001948 if (best_cpu < nr_cpu_ids) {
1949 rcu_read_unlock();
Rusty Russelle2c88062009-11-03 14:53:15 +10301950 return best_cpu;
Xiaotian Fengcd4ae6a2011-04-22 18:53:54 +08001951 }
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001952 }
1953 }
Xiaotian Fengcd4ae6a2011-04-22 18:53:54 +08001954 rcu_read_unlock();
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001955
1956 /*
1957 * And finally, if there were no matches within the domains
1958 * just give the caller *something* to work with from the compatible
1959 * locations.
1960 */
Rusty Russelle2c88062009-11-03 14:53:15 +10301961 if (this_cpu != -1)
1962 return this_cpu;
1963
Peter Zijlstra14e292f82020-10-01 15:54:14 +02001964 cpu = cpumask_any_distribute(lowest_mask);
Rusty Russelle2c88062009-11-03 14:53:15 +10301965 if (cpu < nr_cpu_ids)
1966 return cpu;
Ingo Molnar97fb7a02018-03-03 14:01:12 +01001967
Rusty Russelle2c88062009-11-03 14:53:15 +10301968 return -1;
Gregory Haskins07b40322008-01-25 21:08:10 +01001969}
1970
Steven Rostedte8fa1362008-01-25 21:08:05 +01001971/* Will lock the rq it finds */
Ingo Molnar4df64c02008-01-25 21:08:15 +01001972static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
Steven Rostedte8fa1362008-01-25 21:08:05 +01001973{
1974 struct rq *lowest_rq = NULL;
Steven Rostedte8fa1362008-01-25 21:08:05 +01001975 int tries;
Ingo Molnar4df64c02008-01-25 21:08:15 +01001976 int cpu;
Steven Rostedte8fa1362008-01-25 21:08:05 +01001977
1978 for (tries = 0; tries < RT_MAX_TRIES; tries++) {
Gregory Haskins07b40322008-01-25 21:08:10 +01001979 cpu = find_lowest_rq(task);
Steven Rostedte8fa1362008-01-25 21:08:05 +01001980
Gregory Haskins2de0b462008-01-25 21:08:10 +01001981 if ((cpu == -1) || (cpu == rq->cpu))
Steven Rostedte8fa1362008-01-25 21:08:05 +01001982 break;
1983
Gregory Haskins07b40322008-01-25 21:08:10 +01001984 lowest_rq = cpu_rq(cpu);
1985
Tim Chen80e3d872014-12-12 15:38:12 -08001986 if (lowest_rq->rt.highest_prio.curr <= task->prio) {
1987 /*
1988 * Target rq has tasks of equal or higher priority,
1989 * retrying does not release any lock and is unlikely
1990 * to yield a different result.
1991 */
1992 lowest_rq = NULL;
1993 break;
1994 }
1995
Steven Rostedte8fa1362008-01-25 21:08:05 +01001996 /* if the prio of this runqueue changed, try again */
Gregory Haskins07b40322008-01-25 21:08:10 +01001997 if (double_lock_balance(rq, lowest_rq)) {
Steven Rostedte8fa1362008-01-25 21:08:05 +01001998 /*
1999 * We had to unlock the run queue. In
2000 * the mean time, task could have
2001 * migrated already or had its affinity changed.
2002 * Also make sure that it wasn't scheduled on its rq.
Schspa Shifeffe5b2022-08-29 01:03:02 +08002003 * It is possible the task was scheduled, set
2004 * "migrate_disabled" and then got preempted, so we must
2005 * check the task migration disable flag here too.
Steven Rostedte8fa1362008-01-25 21:08:05 +01002006 */
Gregory Haskins07b40322008-01-25 21:08:10 +01002007 if (unlikely(task_rq(task) != rq ||
Peter Zijlstra95158a82020-10-01 16:05:39 +02002008 !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
Peter Zijlstra0b9d46f2022-09-06 12:33:04 +02002009 task_on_cpu(rq, task) ||
Xunlei Pang13b5ab02016-05-09 12:11:31 +08002010 !rt_task(task) ||
Schspa Shifeffe5b2022-08-29 01:03:02 +08002011 is_migration_disabled(task) ||
Kirill Tkhaida0c1e62014-08-20 13:47:32 +04002012 !task_on_rq_queued(task))) {
Ingo Molnar4df64c02008-01-25 21:08:15 +01002013
Peter Zijlstra7f1b4392012-05-17 21:19:46 +02002014 double_unlock_balance(rq, lowest_rq);
Steven Rostedte8fa1362008-01-25 21:08:05 +01002015 lowest_rq = NULL;
2016 break;
2017 }
2018 }
2019
2020 /* If this rq is still suitable use it. */
Gregory Haskinse864c492008-12-29 09:39:49 -05002021 if (lowest_rq->rt.highest_prio.curr > task->prio)
Steven Rostedte8fa1362008-01-25 21:08:05 +01002022 break;
2023
2024 /* try again */
Peter Zijlstra1b12bbc2008-08-11 09:30:22 +02002025 double_unlock_balance(rq, lowest_rq);
Steven Rostedte8fa1362008-01-25 21:08:05 +01002026 lowest_rq = NULL;
2027 }
2028
2029 return lowest_rq;
2030}
2031
Gregory Haskins917b6272008-12-29 09:39:53 -05002032static struct task_struct *pick_next_pushable_task(struct rq *rq)
2033{
2034 struct task_struct *p;
2035
2036 if (!has_pushable_tasks(rq))
2037 return NULL;
2038
2039 p = plist_first_entry(&rq->rt.pushable_tasks,
2040 struct task_struct, pushable_tasks);
2041
2042 BUG_ON(rq->cpu != task_cpu(p));
2043 BUG_ON(task_current(rq, p));
Ingo Molnar4b53a342017-02-05 15:41:03 +01002044 BUG_ON(p->nr_cpus_allowed <= 1);
Gregory Haskins917b6272008-12-29 09:39:53 -05002045
Kirill Tkhaida0c1e62014-08-20 13:47:32 +04002046 BUG_ON(!task_on_rq_queued(p));
Gregory Haskins917b6272008-12-29 09:39:53 -05002047 BUG_ON(!rt_task(p));
2048
2049 return p;
2050}
2051
Steven Rostedte8fa1362008-01-25 21:08:05 +01002052/*
2053 * If the current CPU has more than one RT task, see if the non
2054 * running task can migrate over to a CPU that is running a task
2055 * of lesser priority.
2056 */
Peter Zijlstraa7c81552020-09-28 17:06:07 +02002057static int push_rt_task(struct rq *rq, bool pull)
Steven Rostedte8fa1362008-01-25 21:08:05 +01002058{
2059 struct task_struct *next_task;
2060 struct rq *lowest_rq;
Hillf Danton311e8002011-06-16 21:55:20 -04002061 int ret = 0;
Steven Rostedte8fa1362008-01-25 21:08:05 +01002062
Gregory Haskinsa22d7fc2008-01-25 21:08:12 +01002063 if (!rq->rt.overloaded)
2064 return 0;
2065
Gregory Haskins917b6272008-12-29 09:39:53 -05002066 next_task = pick_next_pushable_task(rq);
Steven Rostedte8fa1362008-01-25 21:08:05 +01002067 if (!next_task)
2068 return 0;
2069
Peter Zijlstra49246272010-10-17 21:46:10 +02002070retry:
Valentin Schneider49bef332022-01-27 15:40:59 +00002071 /*
2072 * It's possible that the next_task slipped in of
2073 * higher priority than current. If that's the case
2074 * just reschedule current.
2075 */
2076 if (unlikely(next_task->prio < rq->curr->prio)) {
2077 resched_curr(rq);
2078 return 0;
2079 }
2080
Peter Zijlstraa7c81552020-09-28 17:06:07 +02002081 if (is_migration_disabled(next_task)) {
2082 struct task_struct *push_task = NULL;
2083 int cpu;
2084
2085 if (!pull || rq->push_busy)
2086 return 0;
2087
Valentin Schneider49bef332022-01-27 15:40:59 +00002088 /*
2089 * Invoking find_lowest_rq() on anything but an RT task doesn't
2090 * make sense. Per the above priority check, curr has to
2091 * be of higher priority than next_task, so no need to
2092 * reschedule when bailing out.
2093 *
2094 * Note that the stoppers are masqueraded as SCHED_FIFO
2095 * (cf. sched_set_stop_task()), so we can't rely on rt_task().
2096 */
2097 if (rq->curr->sched_class != &rt_sched_class)
2098 return 0;
2099
Peter Zijlstraa7c81552020-09-28 17:06:07 +02002100 cpu = find_lowest_rq(rq->curr);
2101 if (cpu == -1 || cpu == rq->cpu)
2102 return 0;
2103
2104 /*
2105 * Given we found a CPU with lower priority than @next_task,
2106 * therefore it should be running. However we cannot migrate it
2107 * to this other CPU, instead attempt to push the current
2108 * running task on this CPU away.
2109 */
2110 push_task = get_push_task(rq);
2111 if (push_task) {
Peter Zijlstra5cb9eaa2020-11-17 18:19:31 -05002112 raw_spin_rq_unlock(rq);
Peter Zijlstraa7c81552020-09-28 17:06:07 +02002113 stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
2114 push_task, &rq->push_work);
Peter Zijlstra5cb9eaa2020-11-17 18:19:31 -05002115 raw_spin_rq_lock(rq);
Peter Zijlstraa7c81552020-09-28 17:06:07 +02002116 }
2117
2118 return 0;
2119 }
2120
Yangtao Li9ebc6052018-11-03 13:26:02 -04002121 if (WARN_ON(next_task == rq->curr))
Steven Rostedte8fa1362008-01-25 21:08:05 +01002122 return 0;
2123
Gregory Haskins697f0a42008-01-25 21:08:09 +01002124 /* We might release rq lock */
Steven Rostedte8fa1362008-01-25 21:08:05 +01002125 get_task_struct(next_task);
2126
2127 /* find_lock_lowest_rq locks the rq if found */
Gregory Haskins697f0a42008-01-25 21:08:09 +01002128 lowest_rq = find_lock_lowest_rq(next_task, rq);
Steven Rostedte8fa1362008-01-25 21:08:05 +01002129 if (!lowest_rq) {
2130 struct task_struct *task;
2131 /*
Hillf Danton311e8002011-06-16 21:55:20 -04002132 * find_lock_lowest_rq releases rq->lock
Gregory Haskins15635132008-12-29 09:39:53 -05002133 * so it is possible that next_task has migrated.
2134 *
2135 * We need to make sure that the task is still on the same
2136 * run-queue and is also still the next task eligible for
2137 * pushing.
Steven Rostedte8fa1362008-01-25 21:08:05 +01002138 */
Gregory Haskins917b6272008-12-29 09:39:53 -05002139 task = pick_next_pushable_task(rq);
Byungchul Parkde16b912017-05-12 10:05:43 +09002140 if (task == next_task) {
Gregory Haskins15635132008-12-29 09:39:53 -05002141 /*
Hillf Danton311e8002011-06-16 21:55:20 -04002142 * The task hasn't migrated, and is still the next
2143 * eligible task, but we failed to find a run-queue
2144 * to push it to. Do not retry in this case, since
Ingo Molnar97fb7a02018-03-03 14:01:12 +01002145 * other CPUs will pull from us when ready.
Gregory Haskins15635132008-12-29 09:39:53 -05002146 */
Gregory Haskins15635132008-12-29 09:39:53 -05002147 goto out;
Steven Rostedte8fa1362008-01-25 21:08:05 +01002148 }
Gregory Haskins917b6272008-12-29 09:39:53 -05002149
Gregory Haskins15635132008-12-29 09:39:53 -05002150 if (!task)
2151 /* No more tasks, just exit */
2152 goto out;
2153
Gregory Haskins917b6272008-12-29 09:39:53 -05002154 /*
Gregory Haskins15635132008-12-29 09:39:53 -05002155 * Something has shifted, try again.
Gregory Haskins917b6272008-12-29 09:39:53 -05002156 */
Gregory Haskins15635132008-12-29 09:39:53 -05002157 put_task_struct(next_task);
2158 next_task = task;
2159 goto retry;
Steven Rostedte8fa1362008-01-25 21:08:05 +01002160 }
2161
Gregory Haskins697f0a42008-01-25 21:08:09 +01002162 deactivate_task(rq, next_task, 0);
Steven Rostedte8fa1362008-01-25 21:08:05 +01002163 set_task_cpu(next_task, lowest_rq->cpu);
2164 activate_task(lowest_rq, next_task, 0);
Peter Zijlstraa7c81552020-09-28 17:06:07 +02002165 resched_curr(lowest_rq);
Hillf Danton311e8002011-06-16 21:55:20 -04002166 ret = 1;
Steven Rostedte8fa1362008-01-25 21:08:05 +01002167
Peter Zijlstra1b12bbc2008-08-11 09:30:22 +02002168 double_unlock_balance(rq, lowest_rq);
Steven Rostedte8fa1362008-01-25 21:08:05 +01002169out:
2170 put_task_struct(next_task);
2171
Hillf Danton311e8002011-06-16 21:55:20 -04002172 return ret;
Steven Rostedte8fa1362008-01-25 21:08:05 +01002173}
2174
Steven Rostedte8fa1362008-01-25 21:08:05 +01002175static void push_rt_tasks(struct rq *rq)
2176{
2177 /* push_rt_task will return true if it moved an RT */
Peter Zijlstraa7c81552020-09-28 17:06:07 +02002178 while (push_rt_task(rq, false))
Steven Rostedte8fa1362008-01-25 21:08:05 +01002179 ;
2180}
2181
Steven Rostedtb6366f02015-03-18 14:49:46 -04002182#ifdef HAVE_RT_PUSH_IPI
Steven Rostedtb6366f02015-03-18 14:49:46 -04002183
Steven Rostedt (VMware)3e777f92017-02-28 15:50:30 -05002184/*
2185 * When a high priority task schedules out from a CPU and a lower priority
2186 * task is scheduled in, a check is made to see if there's any RT tasks
2187 * on other CPUs that are waiting to run because a higher priority RT task
2188 * is currently running on its CPU. In this case, the CPU with multiple RT
2189 * tasks queued on it (overloaded) needs to be notified that a CPU has opened
2190 * up that may be able to run one of its non-running queued RT tasks.
2191 *
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002192 * All CPUs with overloaded RT tasks need to be notified as there is currently
2193 * no way to know which of these CPUs have the highest priority task waiting
2194 * to run. Instead of trying to take a spinlock on each of these CPUs,
2195 * which has shown to cause large latency when done on machines with many
2196 * CPUs, sending an IPI to the CPUs to have them push off the overloaded
2197 * RT tasks waiting to run.
Steven Rostedt (VMware)3e777f92017-02-28 15:50:30 -05002198 *
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002199 * Just sending an IPI to each of the CPUs is also an issue, as on large
2200 * count CPU machines, this can cause an IPI storm on a CPU, especially
2201 * if its the only CPU with multiple RT tasks queued, and a large number
2202 * of CPUs scheduling a lower priority task at the same time.
Steven Rostedt (VMware)3e777f92017-02-28 15:50:30 -05002203 *
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002204 * Each root domain has its own irq work function that can iterate over
2205 * all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT
Ingo Molnar3b037062021-03-18 13:38:50 +01002206 * task must be checked if there's one or many CPUs that are lowering
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002207 * their priority, there's a single irq work iterator that will try to
2208 * push off RT tasks that are waiting to run.
Steven Rostedt (VMware)3e777f92017-02-28 15:50:30 -05002209 *
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002210 * When a CPU schedules a lower priority task, it will kick off the
2211 * irq work iterator that will jump to each CPU with overloaded RT tasks.
2212 * As it only takes the first CPU that schedules a lower priority task
2213 * to start the process, the rto_start variable is incremented and if
2214 * the atomic result is one, then that CPU will try to take the rto_lock.
2215 * This prevents high contention on the lock as the process handles all
2216 * CPUs scheduling lower priority tasks.
Steven Rostedt (VMware)3e777f92017-02-28 15:50:30 -05002217 *
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002218 * All CPUs that are scheduling a lower priority task will increment the
2219 * rt_loop_next variable. This will make sure that the irq work iterator
2220 * checks all RT overloaded CPUs whenever a CPU schedules a new lower
2221 * priority task, even if the iterator is in the middle of a scan. Incrementing
2222 * the rt_loop_next will cause the iterator to perform another scan.
Steven Rostedt (VMware)3e777f92017-02-28 15:50:30 -05002223 *
Steven Rostedt (VMware)3e777f92017-02-28 15:50:30 -05002224 */
Steven Rostedt (VMware)ad0f1d92018-01-23 20:45:37 -05002225static int rto_next_cpu(struct root_domain *rd)
Steven Rostedtb6366f02015-03-18 14:49:46 -04002226{
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002227 int next;
Steven Rostedtb6366f02015-03-18 14:49:46 -04002228 int cpu;
2229
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002230 /*
2231 * When starting the IPI RT pushing, the rto_cpu is set to -1,
2232 * rt_next_cpu() will simply return the first CPU found in
2233 * the rto_mask.
2234 *
Ingo Molnar97fb7a02018-03-03 14:01:12 +01002235 * If rto_next_cpu() is called with rto_cpu is a valid CPU, it
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002236 * will return the next CPU found in the rto_mask.
2237 *
2238 * If there are no more CPUs left in the rto_mask, then a check is made
2239 * against rto_loop and rto_loop_next. rto_loop is only updated with
2240 * the rto_lock held, but any CPU may increment the rto_loop_next
2241 * without any locking.
2242 */
2243 for (;;) {
2244
2245 /* When rto_cpu is -1 this acts like cpumask_first() */
2246 cpu = cpumask_next(rd->rto_cpu, rd->rto_mask);
2247
2248 rd->rto_cpu = cpu;
2249
2250 if (cpu < nr_cpu_ids)
2251 return cpu;
2252
2253 rd->rto_cpu = -1;
2254
2255 /*
2256 * ACQUIRE ensures we see the @rto_mask changes
2257 * made prior to the @next value observed.
2258 *
2259 * Matches WMB in rt_set_overload().
2260 */
2261 next = atomic_read_acquire(&rd->rto_loop_next);
2262
2263 if (rd->rto_loop == next)
2264 break;
2265
2266 rd->rto_loop = next;
Steven Rostedtb6366f02015-03-18 14:49:46 -04002267 }
2268
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002269 return -1;
2270}
Steven Rostedtb6366f02015-03-18 14:49:46 -04002271
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002272static inline bool rto_start_trylock(atomic_t *v)
2273{
2274 return !atomic_cmpxchg_acquire(v, 0, 1);
2275}
2276
2277static inline void rto_start_unlock(atomic_t *v)
2278{
2279 atomic_set_release(v, 0);
2280}
2281
2282static void tell_cpu_to_push(struct rq *rq)
2283{
2284 int cpu = -1;
2285
2286 /* Keep the loop going if the IPI is currently active */
2287 atomic_inc(&rq->rd->rto_loop_next);
2288
2289 /* Only one CPU can initiate a loop at a time */
2290 if (!rto_start_trylock(&rq->rd->rto_loop_start))
Steven Rostedtb6366f02015-03-18 14:49:46 -04002291 return;
2292
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002293 raw_spin_lock(&rq->rd->rto_lock);
Steven Rostedtb6366f02015-03-18 14:49:46 -04002294
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002295 /*
Ingo Molnar97fb7a02018-03-03 14:01:12 +01002296 * The rto_cpu is updated under the lock, if it has a valid CPU
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002297 * then the IPI is still running and will continue due to the
2298 * update to loop_next, and nothing needs to be done here.
2299 * Otherwise it is finishing up and an ipi needs to be sent.
2300 */
2301 if (rq->rd->rto_cpu < 0)
Steven Rostedt (VMware)ad0f1d92018-01-23 20:45:37 -05002302 cpu = rto_next_cpu(rq->rd);
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002303
2304 raw_spin_unlock(&rq->rd->rto_lock);
2305
2306 rto_start_unlock(&rq->rd->rto_loop_start);
2307
Steven Rostedt (VMware)364f5662018-01-23 20:45:38 -05002308 if (cpu >= 0) {
2309 /* Make sure the rd does not get freed while pushing */
2310 sched_get_rd(rq->rd);
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002311 irq_work_queue_on(&rq->rd->rto_push_work, cpu);
Steven Rostedt (VMware)364f5662018-01-23 20:45:38 -05002312 }
Steven Rostedtb6366f02015-03-18 14:49:46 -04002313}
2314
2315/* Called from hardirq context */
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002316void rto_push_irq_work_func(struct irq_work *work)
Steven Rostedtb6366f02015-03-18 14:49:46 -04002317{
Steven Rostedt (VMware)ad0f1d92018-01-23 20:45:37 -05002318 struct root_domain *rd =
2319 container_of(work, struct root_domain, rto_push_work);
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002320 struct rq *rq;
Steven Rostedtb6366f02015-03-18 14:49:46 -04002321 int cpu;
2322
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002323 rq = this_rq();
Steven Rostedtb6366f02015-03-18 14:49:46 -04002324
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002325 /*
2326 * We do not need to grab the lock to check for has_pushable_tasks.
2327 * When it gets updated, a check is made if a push is possible.
2328 */
Steven Rostedtb6366f02015-03-18 14:49:46 -04002329 if (has_pushable_tasks(rq)) {
Peter Zijlstra5cb9eaa2020-11-17 18:19:31 -05002330 raw_spin_rq_lock(rq);
Peter Zijlstraa7c81552020-09-28 17:06:07 +02002331 while (push_rt_task(rq, true))
2332 ;
Peter Zijlstra5cb9eaa2020-11-17 18:19:31 -05002333 raw_spin_rq_unlock(rq);
Steven Rostedtb6366f02015-03-18 14:49:46 -04002334 }
2335
Steven Rostedt (VMware)ad0f1d92018-01-23 20:45:37 -05002336 raw_spin_lock(&rd->rto_lock);
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002337
Steven Rostedtb6366f02015-03-18 14:49:46 -04002338 /* Pass the IPI to the next rt overloaded queue */
Steven Rostedt (VMware)ad0f1d92018-01-23 20:45:37 -05002339 cpu = rto_next_cpu(rd);
Steven Rostedtb6366f02015-03-18 14:49:46 -04002340
Steven Rostedt (VMware)ad0f1d92018-01-23 20:45:37 -05002341 raw_spin_unlock(&rd->rto_lock);
Steven Rostedtb6366f02015-03-18 14:49:46 -04002342
Steven Rostedt (VMware)364f5662018-01-23 20:45:38 -05002343 if (cpu < 0) {
2344 sched_put_rd(rd);
Steven Rostedtb6366f02015-03-18 14:49:46 -04002345 return;
Steven Rostedt (VMware)364f5662018-01-23 20:45:38 -05002346 }
Steven Rostedtb6366f02015-03-18 14:49:46 -04002347
Steven Rostedtb6366f02015-03-18 14:49:46 -04002348 /* Try the next RT overloaded CPU */
Steven Rostedt (VMware)ad0f1d92018-01-23 20:45:37 -05002349 irq_work_queue_on(&rd->rto_push_work, cpu);
Steven Rostedtb6366f02015-03-18 14:49:46 -04002350}
2351#endif /* HAVE_RT_PUSH_IPI */
2352
Peter Zijlstra8046d682015-06-11 14:46:40 +02002353static void pull_rt_task(struct rq *this_rq)
Steven Rostedtf65eda42008-01-25 21:08:07 +01002354{
Peter Zijlstra8046d682015-06-11 14:46:40 +02002355 int this_cpu = this_rq->cpu, cpu;
2356 bool resched = false;
Peter Zijlstraa7c81552020-09-28 17:06:07 +02002357 struct task_struct *p, *push_task;
Steven Rostedtf65eda42008-01-25 21:08:07 +01002358 struct rq *src_rq;
Steven Rostedtf73c52a2017-12-02 13:04:54 -05002359 int rt_overload_count = rt_overloaded(this_rq);
Steven Rostedtf65eda42008-01-25 21:08:07 +01002360
Steven Rostedtf73c52a2017-12-02 13:04:54 -05002361 if (likely(!rt_overload_count))
Peter Zijlstra8046d682015-06-11 14:46:40 +02002362 return;
Steven Rostedtf65eda42008-01-25 21:08:07 +01002363
Peter Zijlstra7c3f2ab2013-10-15 12:35:07 +02002364 /*
2365 * Match the barrier from rt_set_overloaded; this guarantees that if we
2366 * see overloaded we must also see the rto_mask bit.
2367 */
2368 smp_rmb();
2369
Steven Rostedtf73c52a2017-12-02 13:04:54 -05002370 /* If we are the only overloaded CPU do nothing */
2371 if (rt_overload_count == 1 &&
2372 cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
2373 return;
2374
Steven Rostedtb6366f02015-03-18 14:49:46 -04002375#ifdef HAVE_RT_PUSH_IPI
2376 if (sched_feat(RT_PUSH_IPI)) {
2377 tell_cpu_to_push(this_rq);
Peter Zijlstra8046d682015-06-11 14:46:40 +02002378 return;
Steven Rostedtb6366f02015-03-18 14:49:46 -04002379 }
2380#endif
2381
Rusty Russellc6c49272008-11-25 02:35:05 +10302382 for_each_cpu(cpu, this_rq->rd->rto_mask) {
Steven Rostedtf65eda42008-01-25 21:08:07 +01002383 if (this_cpu == cpu)
2384 continue;
2385
2386 src_rq = cpu_rq(cpu);
Gregory Haskins74ab8e42008-12-29 09:39:50 -05002387
2388 /*
2389 * Don't bother taking the src_rq->lock if the next highest
2390 * task is known to be lower-priority than our current task.
2391 * This may look racy, but if this value is about to go
2392 * logically higher, the src_rq will push this task away.
2393 * And if its going logically lower, we do not care
2394 */
2395 if (src_rq->rt.highest_prio.next >=
2396 this_rq->rt.highest_prio.curr)
2397 continue;
2398
Steven Rostedtf65eda42008-01-25 21:08:07 +01002399 /*
2400 * We can potentially drop this_rq's lock in
2401 * double_lock_balance, and another CPU could
Gregory Haskinsa8728942008-12-29 09:39:49 -05002402 * alter this_rq
Steven Rostedtf65eda42008-01-25 21:08:07 +01002403 */
Peter Zijlstraa7c81552020-09-28 17:06:07 +02002404 push_task = NULL;
Gregory Haskinsa8728942008-12-29 09:39:49 -05002405 double_lock_balance(this_rq, src_rq);
Steven Rostedtf65eda42008-01-25 21:08:07 +01002406
2407 /*
Kirill Tkhaie23ee742013-06-07 15:37:43 -04002408 * We can pull only a task, which is pushable
2409 * on its rq, and no others.
Steven Rostedtf65eda42008-01-25 21:08:07 +01002410 */
Kirill Tkhaie23ee742013-06-07 15:37:43 -04002411 p = pick_highest_pushable_task(src_rq, this_cpu);
Steven Rostedtf65eda42008-01-25 21:08:07 +01002412
2413 /*
2414 * Do we have an RT task that preempts
2415 * the to-be-scheduled task?
2416 */
Gregory Haskinsa8728942008-12-29 09:39:49 -05002417 if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
Steven Rostedtf65eda42008-01-25 21:08:07 +01002418 WARN_ON(p == src_rq->curr);
Kirill Tkhaida0c1e62014-08-20 13:47:32 +04002419 WARN_ON(!task_on_rq_queued(p));
Steven Rostedtf65eda42008-01-25 21:08:07 +01002420
2421 /*
2422 * There's a chance that p is higher in priority
Ingo Molnar97fb7a02018-03-03 14:01:12 +01002423 * than what's currently running on its CPU.
Ingo Molnar3b037062021-03-18 13:38:50 +01002424 * This is just that p is waking up and hasn't
Steven Rostedtf65eda42008-01-25 21:08:07 +01002425 * had a chance to schedule. We only pull
2426 * p if it is lower in priority than the
Gregory Haskinsa8728942008-12-29 09:39:49 -05002427 * current task on the run queue
Steven Rostedtf65eda42008-01-25 21:08:07 +01002428 */
Gregory Haskinsa8728942008-12-29 09:39:49 -05002429 if (p->prio < src_rq->curr->prio)
Mike Galbraith614ee1f2008-01-25 21:08:30 +01002430 goto skip;
Steven Rostedtf65eda42008-01-25 21:08:07 +01002431
Peter Zijlstraa7c81552020-09-28 17:06:07 +02002432 if (is_migration_disabled(p)) {
2433 push_task = get_push_task(src_rq);
2434 } else {
2435 deactivate_task(src_rq, p, 0);
2436 set_task_cpu(p, this_cpu);
2437 activate_task(this_rq, p, 0);
2438 resched = true;
2439 }
Steven Rostedtf65eda42008-01-25 21:08:07 +01002440 /*
2441 * We continue with the search, just in
2442 * case there's an even higher prio task
Lucas De Marchi25985ed2011-03-30 22:57:33 -03002443 * in another runqueue. (low likelihood
Steven Rostedtf65eda42008-01-25 21:08:07 +01002444 * but possible)
Steven Rostedtf65eda42008-01-25 21:08:07 +01002445 */
Steven Rostedtf65eda42008-01-25 21:08:07 +01002446 }
Peter Zijlstra49246272010-10-17 21:46:10 +02002447skip:
Peter Zijlstra1b12bbc2008-08-11 09:30:22 +02002448 double_unlock_balance(this_rq, src_rq);
Peter Zijlstraa7c81552020-09-28 17:06:07 +02002449
2450 if (push_task) {
Peter Zijlstra5cb9eaa2020-11-17 18:19:31 -05002451 raw_spin_rq_unlock(this_rq);
Peter Zijlstraa7c81552020-09-28 17:06:07 +02002452 stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
2453 push_task, &src_rq->push_work);
Peter Zijlstra5cb9eaa2020-11-17 18:19:31 -05002454 raw_spin_rq_lock(this_rq);
Peter Zijlstraa7c81552020-09-28 17:06:07 +02002455 }
Steven Rostedtf65eda42008-01-25 21:08:07 +01002456 }
2457
Peter Zijlstra8046d682015-06-11 14:46:40 +02002458 if (resched)
2459 resched_curr(this_rq);
Steven Rostedtf65eda42008-01-25 21:08:07 +01002460}
2461
Gregory Haskins8ae121a2008-04-23 07:13:29 -04002462/*
2463 * If we are not running and we are not going to reschedule soon, we should
2464 * try to push tasks away now
2465 */
Peter Zijlstraefbbd052009-12-16 18:04:40 +01002466static void task_woken_rt(struct rq *rq, struct task_struct *p)
Steven Rostedt4642daf2008-01-25 21:08:07 +01002467{
Peter Zijlstra0b9d46f2022-09-06 12:33:04 +02002468 bool need_to_push = !task_on_cpu(rq, p) &&
Qais Yousef804d4022019-10-09 11:46:11 +01002469 !test_tsk_need_resched(rq->curr) &&
2470 p->nr_cpus_allowed > 1 &&
2471 (dl_task(rq->curr) || rt_task(rq->curr)) &&
2472 (rq->curr->nr_cpus_allowed < 2 ||
2473 rq->curr->prio <= p->prio);
2474
Qais Yousefd94a9df2020-03-02 13:27:20 +00002475 if (need_to_push)
Steven Rostedt4642daf2008-01-25 21:08:07 +01002476 push_rt_tasks(rq);
2477}
2478
Ingo Molnarbdd7c812008-01-25 21:08:18 +01002479/* Assumes rq->lock is held */
Gregory Haskins1f11eb6a2008-06-04 15:04:05 -04002480static void rq_online_rt(struct rq *rq)
Ingo Molnarbdd7c812008-01-25 21:08:18 +01002481{
2482 if (rq->rt.overloaded)
2483 rt_set_overload(rq);
Gregory Haskins6e0534f2008-05-12 21:21:01 +02002484
Peter Zijlstra7def2be2008-06-05 14:49:58 +02002485 __enable_runtime(rq);
2486
Gregory Haskinse864c492008-12-29 09:39:49 -05002487 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
Ingo Molnarbdd7c812008-01-25 21:08:18 +01002488}
2489
2490/* Assumes rq->lock is held */
Gregory Haskins1f11eb6a2008-06-04 15:04:05 -04002491static void rq_offline_rt(struct rq *rq)
Ingo Molnarbdd7c812008-01-25 21:08:18 +01002492{
2493 if (rq->rt.overloaded)
2494 rt_clear_overload(rq);
Gregory Haskins6e0534f2008-05-12 21:21:01 +02002495
Peter Zijlstra7def2be2008-06-05 14:49:58 +02002496 __disable_runtime(rq);
2497
Gregory Haskins6e0534f2008-05-12 21:21:01 +02002498 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
Ingo Molnarbdd7c812008-01-25 21:08:18 +01002499}
Steven Rostedtcb469842008-01-25 21:08:22 +01002500
2501/*
2502 * When switch from the rt queue, we bring ourselves to a position
2503 * that we might want to pull RT tasks from other runqueues.
2504 */
Peter Zijlstrada7a7352011-01-17 17:03:27 +01002505static void switched_from_rt(struct rq *rq, struct task_struct *p)
Steven Rostedtcb469842008-01-25 21:08:22 +01002506{
2507 /*
2508 * If there are other RT tasks then we will reschedule
2509 * and the scheduling of the other RT tasks will handle
2510 * the balancing. But if we are the last RT task
2511 * we may need to handle the pulling of RT tasks
2512 * now.
2513 */
Kirill Tkhaida0c1e62014-08-20 13:47:32 +04002514 if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
Kirill Tkhai1158ddb2012-11-23 00:02:15 +04002515 return;
2516
Ingo Molnar02d8ec92018-03-03 16:27:54 +01002517 rt_queue_pull_task(rq);
Steven Rostedtcb469842008-01-25 21:08:22 +01002518}
Rusty Russell3d8cbdf2008-11-25 09:58:41 +10302519
Li Zefan11c785b2014-02-08 14:17:45 +08002520void __init init_sched_rt_class(void)
Rusty Russell3d8cbdf2008-11-25 09:58:41 +10302521{
2522 unsigned int i;
2523
Peter Zijlstra029632f2011-10-25 10:00:11 +02002524 for_each_possible_cpu(i) {
Yinghai Lueaa95842009-06-06 14:51:36 -07002525 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
Mike Travis6ca09df2008-12-31 18:08:45 -08002526 GFP_KERNEL, cpu_to_node(i));
Peter Zijlstra029632f2011-10-25 10:00:11 +02002527 }
Rusty Russell3d8cbdf2008-11-25 09:58:41 +10302528}
Steven Rostedte8fa1362008-01-25 21:08:05 +01002529#endif /* CONFIG_SMP */
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002530
Steven Rostedtcb469842008-01-25 21:08:22 +01002531/*
2532 * When switching a task to RT, we may overload the runqueue
2533 * with RT tasks. In this case we try to push them off to
2534 * other runqueues.
2535 */
Peter Zijlstrada7a7352011-01-17 17:03:27 +01002536static void switched_to_rt(struct rq *rq, struct task_struct *p)
Steven Rostedtcb469842008-01-25 21:08:22 +01002537{
Steven Rostedtcb469842008-01-25 21:08:22 +01002538 /*
Vincent Donnefortfecfcbc2021-06-21 11:37:51 +01002539 * If we are running, update the avg_rt tracking, as the running time
2540 * will now on be accounted into the latter.
2541 */
2542 if (task_current(rq, p)) {
2543 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
2544 return;
2545 }
2546
2547 /*
2548 * If we are not running we may need to preempt the current
2549 * running task. If that current running task is also an RT task
Steven Rostedtcb469842008-01-25 21:08:22 +01002550 * then see if we can move to another run queue.
2551 */
Vincent Donnefortfecfcbc2021-06-21 11:37:51 +01002552 if (task_on_rq_queued(p)) {
Steven Rostedtcb469842008-01-25 21:08:22 +01002553#ifdef CONFIG_SMP
Qais Yousefd94a9df2020-03-02 13:27:20 +00002554 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
Ingo Molnar02d8ec92018-03-03 16:27:54 +01002555 rt_queue_push_tasks(rq);
Sebastian Andrzej Siewior619bd4a2017-01-24 15:40:06 +01002556#endif /* CONFIG_SMP */
Paul E. McKenney2fe25822017-10-13 17:00:18 -07002557 if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
Kirill Tkhai88751252014-06-29 00:03:57 +04002558 resched_curr(rq);
Steven Rostedtcb469842008-01-25 21:08:22 +01002559 }
2560}
2561
2562/*
2563 * Priority of the task has changed. This may cause
2564 * us to initiate a push or pull.
2565 */
Peter Zijlstrada7a7352011-01-17 17:03:27 +01002566static void
2567prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
Steven Rostedtcb469842008-01-25 21:08:22 +01002568{
Kirill Tkhaida0c1e62014-08-20 13:47:32 +04002569 if (!task_on_rq_queued(p))
Peter Zijlstrada7a7352011-01-17 17:03:27 +01002570 return;
2571
Hui Su65bcf072020-10-31 01:32:23 +08002572 if (task_current(rq, p)) {
Steven Rostedtcb469842008-01-25 21:08:22 +01002573#ifdef CONFIG_SMP
2574 /*
2575 * If our priority decreases while running, we
2576 * may need to pull tasks to this runqueue.
2577 */
2578 if (oldprio < p->prio)
Ingo Molnar02d8ec92018-03-03 16:27:54 +01002579 rt_queue_pull_task(rq);
Peter Zijlstrafd7a4be2015-06-11 14:46:41 +02002580
Steven Rostedtcb469842008-01-25 21:08:22 +01002581 /*
2582 * If there's a higher priority task waiting to run
Peter Zijlstrafd7a4be2015-06-11 14:46:41 +02002583 * then reschedule.
Steven Rostedtcb469842008-01-25 21:08:22 +01002584 */
Peter Zijlstrafd7a4be2015-06-11 14:46:41 +02002585 if (p->prio > rq->rt.highest_prio.curr)
Kirill Tkhai88751252014-06-29 00:03:57 +04002586 resched_curr(rq);
Steven Rostedtcb469842008-01-25 21:08:22 +01002587#else
2588 /* For UP simply resched on drop of prio */
2589 if (oldprio < p->prio)
Kirill Tkhai88751252014-06-29 00:03:57 +04002590 resched_curr(rq);
Steven Rostedtcb469842008-01-25 21:08:22 +01002591#endif /* CONFIG_SMP */
2592 } else {
2593 /*
2594 * This task is not running, but if it is
2595 * greater than the current running task
2596 * then reschedule.
2597 */
2598 if (p->prio < rq->curr->prio)
Kirill Tkhai88751252014-06-29 00:03:57 +04002599 resched_curr(rq);
Steven Rostedtcb469842008-01-25 21:08:22 +01002600 }
2601}
2602
Nicolas Pitreb18b6a92017-01-21 00:09:08 -05002603#ifdef CONFIG_POSIX_TIMERS
Peter Zijlstra78f2c7d2008-01-25 21:08:27 +01002604static void watchdog(struct rq *rq, struct task_struct *p)
2605{
2606 unsigned long soft, hard;
2607
Jiri Slaby78d7d402010-03-05 13:42:54 -08002608 /* max may change after cur was read, this will be fixed next tick */
2609 soft = task_rlimit(p, RLIMIT_RTTIME);
2610 hard = task_rlimit_max(p, RLIMIT_RTTIME);
Peter Zijlstra78f2c7d2008-01-25 21:08:27 +01002611
2612 if (soft != RLIM_INFINITY) {
2613 unsigned long next;
2614
Ying Xue57d2aa02012-07-17 15:03:43 +08002615 if (p->rt.watchdog_stamp != jiffies) {
2616 p->rt.timeout++;
2617 p->rt.watchdog_stamp = jiffies;
2618 }
2619
Peter Zijlstra78f2c7d2008-01-25 21:08:27 +01002620 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
Thomas Gleixner3a245c02019-08-21 21:09:06 +02002621 if (p->rt.timeout > next) {
2622 posix_cputimers_rt_watchdog(&p->posix_cputimers,
2623 p->se.sum_exec_runtime);
2624 }
Peter Zijlstra78f2c7d2008-01-25 21:08:27 +01002625 }
2626}
Nicolas Pitreb18b6a92017-01-21 00:09:08 -05002627#else
2628static inline void watchdog(struct rq *rq, struct task_struct *p) { }
2629#endif
Steven Rostedtcb469842008-01-25 21:08:22 +01002630
Frederic Weisbeckerd84b3132018-02-21 05:17:27 +01002631/*
2632 * scheduler tick hitting a task of our scheduling class.
2633 *
2634 * NOTE: This function can be called remotely by the tick offload that
2635 * goes along full dynticks. Therefore no local assumption can be made
2636 * and everything must be accessed through the @rq and @curr passed in
2637 * parameters.
2638 */
Peter Zijlstra8f4d37e2008-01-25 21:08:29 +01002639static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002640{
Colin Cross454c7992012-05-16 21:34:23 -07002641 struct sched_rt_entity *rt_se = &p->rt;
2642
Peter Zijlstra67e2be02007-12-20 15:01:17 +01002643 update_curr_rt(rq);
Vincent Guittot23127292019-01-23 16:26:53 +01002644 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
Peter Zijlstra67e2be02007-12-20 15:01:17 +01002645
Peter Zijlstra78f2c7d2008-01-25 21:08:27 +01002646 watchdog(rq, p);
2647
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002648 /*
2649 * RR tasks need a special form of timeslice management.
2650 * FIFO tasks have no timeslices.
2651 */
2652 if (p->policy != SCHED_RR)
2653 return;
2654
Peter Zijlstrafa717062008-01-25 21:08:27 +01002655 if (--p->rt.time_slice)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002656 return;
2657
Clark Williamsce0dbbb2013-02-07 09:47:04 -06002658 p->rt.time_slice = sched_rr_timeslice;
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002659
Dmitry Adamushko98fbc792007-08-24 20:39:10 +02002660 /*
Li Bine9aa39b2013-10-21 20:15:43 +08002661 * Requeue to the end of queue if we (and all of our ancestors) are not
2662 * the only element on the queue
Dmitry Adamushko98fbc792007-08-24 20:39:10 +02002663 */
Colin Cross454c7992012-05-16 21:34:23 -07002664 for_each_sched_rt_entity(rt_se) {
2665 if (rt_se->run_list.prev != rt_se->run_list.next) {
2666 requeue_task_rt(rq, p, 0);
Kirill Tkhai8aa6f0e2014-09-22 22:36:43 +04002667 resched_curr(rq);
Colin Cross454c7992012-05-16 21:34:23 -07002668 return;
2669 }
Dmitry Adamushko98fbc792007-08-24 20:39:10 +02002670 }
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002671}
2672
H Hartley Sweeten6d686f42010-01-13 20:21:52 -07002673static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
Peter Williams0d721ce2009-09-21 01:31:53 +00002674{
2675 /*
2676 * Time slice is 0 for SCHED_FIFO tasks
2677 */
2678 if (task->policy == SCHED_RR)
Clark Williamsce0dbbb2013-02-07 09:47:04 -06002679 return sched_rr_timeslice;
Peter Williams0d721ce2009-09-21 01:31:53 +00002680 else
2681 return 0;
2682}
2683
Hao Jia530bfad2023-03-16 16:18:06 +08002684#ifdef CONFIG_SCHED_CORE
2685static int task_is_throttled_rt(struct task_struct *p, int cpu)
2686{
2687 struct rt_rq *rt_rq;
2688
2689#ifdef CONFIG_RT_GROUP_SCHED
2690 rt_rq = task_group(p)->rt_rq[cpu];
2691#else
2692 rt_rq = &cpu_rq(cpu)->rt;
2693#endif
2694
2695 return rt_rq_throttled(rt_rq);
2696}
2697#endif
2698
Peter Zijlstra43c31ac2020-10-21 15:45:33 +02002699DEFINE_SCHED_CLASS(rt) = {
2700
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002701 .enqueue_task = enqueue_task_rt,
2702 .dequeue_task = dequeue_task_rt,
2703 .yield_task = yield_task_rt,
2704
2705 .check_preempt_curr = check_preempt_curr_rt,
2706
2707 .pick_next_task = pick_next_task_rt,
2708 .put_prev_task = put_prev_task_rt,
Peter Zijlstra03b7fad2019-05-29 20:36:41 +00002709 .set_next_task = set_next_task_rt,
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002710
Peter Williams681f3e62007-10-24 18:23:51 +02002711#ifdef CONFIG_SMP
Peter Zijlstra6e2df052019-11-08 11:11:52 +01002712 .balance = balance_rt,
Peter Zijlstra21f56ffe2020-11-17 18:19:32 -05002713 .pick_task = pick_task_rt,
Li Zefan4ce72a22008-10-22 15:25:26 +08002714 .select_task_rq = select_task_rq_rt,
Peter Zijlstra6c370672015-05-15 17:43:36 +02002715 .set_cpus_allowed = set_cpus_allowed_common,
Gregory Haskins1f11eb6a2008-06-04 15:04:05 -04002716 .rq_online = rq_online_rt,
2717 .rq_offline = rq_offline_rt,
Peter Zijlstraefbbd052009-12-16 18:04:40 +01002718 .task_woken = task_woken_rt,
Steven Rostedtcb469842008-01-25 21:08:22 +01002719 .switched_from = switched_from_rt,
Peter Zijlstraa7c81552020-09-28 17:06:07 +02002720 .find_lock_rq = find_lock_lowest_rq,
Peter Williams681f3e62007-10-24 18:23:51 +02002721#endif
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002722
2723 .task_tick = task_tick_rt,
Steven Rostedtcb469842008-01-25 21:08:22 +01002724
Peter Williams0d721ce2009-09-21 01:31:53 +00002725 .get_rr_interval = get_rr_interval_rt,
2726
Steven Rostedtcb469842008-01-25 21:08:22 +01002727 .prio_changed = prio_changed_rt,
2728 .switched_to = switched_to_rt,
Stanislaw Gruszka6e998912014-11-12 16:58:44 +01002729
2730 .update_curr = update_curr_rt,
Patrick Bellasi982d9cd2019-06-21 09:42:10 +01002731
Hao Jia530bfad2023-03-16 16:18:06 +08002732#ifdef CONFIG_SCHED_CORE
2733 .task_is_throttled = task_is_throttled_rt,
2734#endif
2735
Patrick Bellasi982d9cd2019-06-21 09:42:10 +01002736#ifdef CONFIG_UCLAMP_TASK
2737 .uclamp_enabled = 1,
2738#endif
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002739};
Peter Zijlstraada18de2008-06-19 14:22:24 +02002740
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002741#ifdef CONFIG_RT_GROUP_SCHED
2742/*
2743 * Ensure that the real time constraints are schedulable.
2744 */
2745static DEFINE_MUTEX(rt_constraints_mutex);
2746
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002747static inline int tg_has_rt_tasks(struct task_group *tg)
2748{
Konstantin Khlebnikovb4fb0152020-01-25 17:50:38 +03002749 struct task_struct *task;
2750 struct css_task_iter it;
2751 int ret = 0;
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002752
2753 /*
2754 * Autogroups do not have RT tasks; see autogroup_create().
2755 */
2756 if (task_group_is_autogroup(tg))
2757 return 0;
2758
Konstantin Khlebnikovb4fb0152020-01-25 17:50:38 +03002759 css_task_iter_start(&tg->css, 0, &it);
2760 while (!ret && (task = css_task_iter_next(&it)))
2761 ret |= rt_task(task);
2762 css_task_iter_end(&it);
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002763
Konstantin Khlebnikovb4fb0152020-01-25 17:50:38 +03002764 return ret;
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002765}
2766
2767struct rt_schedulable_data {
2768 struct task_group *tg;
2769 u64 rt_period;
2770 u64 rt_runtime;
2771};
2772
2773static int tg_rt_schedulable(struct task_group *tg, void *data)
2774{
2775 struct rt_schedulable_data *d = data;
2776 struct task_group *child;
2777 unsigned long total, sum = 0;
2778 u64 period, runtime;
2779
2780 period = ktime_to_ns(tg->rt_bandwidth.rt_period);
2781 runtime = tg->rt_bandwidth.rt_runtime;
2782
2783 if (tg == d->tg) {
2784 period = d->rt_period;
2785 runtime = d->rt_runtime;
2786 }
2787
2788 /*
2789 * Cannot have more runtime than the period.
2790 */
2791 if (runtime > period && runtime != RUNTIME_INF)
2792 return -EINVAL;
2793
2794 /*
Konstantin Khlebnikovb4fb0152020-01-25 17:50:38 +03002795 * Ensure we don't starve existing RT tasks if runtime turns zero.
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002796 */
Konstantin Khlebnikovb4fb0152020-01-25 17:50:38 +03002797 if (rt_bandwidth_enabled() && !runtime &&
2798 tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg))
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002799 return -EBUSY;
2800
2801 total = to_ratio(period, runtime);
2802
2803 /*
2804 * Nobody can have more than the global setting allows.
2805 */
2806 if (total > to_ratio(global_rt_period(), global_rt_runtime()))
2807 return -EINVAL;
2808
2809 /*
2810 * The sum of our children's runtime should not exceed our own.
2811 */
2812 list_for_each_entry_rcu(child, &tg->children, siblings) {
2813 period = ktime_to_ns(child->rt_bandwidth.rt_period);
2814 runtime = child->rt_bandwidth.rt_runtime;
2815
2816 if (child == d->tg) {
2817 period = d->rt_period;
2818 runtime = d->rt_runtime;
2819 }
2820
2821 sum += to_ratio(period, runtime);
2822 }
2823
2824 if (sum > total)
2825 return -EINVAL;
2826
2827 return 0;
2828}
2829
2830static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
2831{
2832 int ret;
2833
2834 struct rt_schedulable_data data = {
2835 .tg = tg,
2836 .rt_period = period,
2837 .rt_runtime = runtime,
2838 };
2839
2840 rcu_read_lock();
2841 ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
2842 rcu_read_unlock();
2843
2844 return ret;
2845}
2846
2847static int tg_set_rt_bandwidth(struct task_group *tg,
2848 u64 rt_period, u64 rt_runtime)
2849{
2850 int i, err = 0;
2851
2852 /*
2853 * Disallowing the root group RT runtime is BAD, it would disallow the
2854 * kernel creating (and or operating) RT threads.
2855 */
2856 if (tg == &root_task_group && rt_runtime == 0)
2857 return -EINVAL;
2858
2859 /* No period doesn't make any sense. */
2860 if (rt_period == 0)
2861 return -EINVAL;
2862
Huaixin Changd505b8a2020-04-25 18:52:48 +08002863 /*
2864 * Bound quota to defend quota against overflow during bandwidth shift.
2865 */
2866 if (rt_runtime != RUNTIME_INF && rt_runtime > max_rt_runtime)
2867 return -EINVAL;
2868
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002869 mutex_lock(&rt_constraints_mutex);
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002870 err = __rt_schedulable(tg, rt_period, rt_runtime);
2871 if (err)
2872 goto unlock;
2873
2874 raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
2875 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
2876 tg->rt_bandwidth.rt_runtime = rt_runtime;
2877
2878 for_each_possible_cpu(i) {
2879 struct rt_rq *rt_rq = tg->rt_rq[i];
2880
2881 raw_spin_lock(&rt_rq->rt_runtime_lock);
2882 rt_rq->rt_runtime = rt_runtime;
2883 raw_spin_unlock(&rt_rq->rt_runtime_lock);
2884 }
2885 raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
2886unlock:
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002887 mutex_unlock(&rt_constraints_mutex);
2888
2889 return err;
2890}
2891
2892int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
2893{
2894 u64 rt_runtime, rt_period;
2895
2896 rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
2897 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
2898 if (rt_runtime_us < 0)
2899 rt_runtime = RUNTIME_INF;
Konstantin Khlebnikov1a010e22019-02-27 11:10:17 +03002900 else if ((u64)rt_runtime_us > U64_MAX / NSEC_PER_USEC)
2901 return -EINVAL;
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002902
2903 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
2904}
2905
2906long sched_group_rt_runtime(struct task_group *tg)
2907{
2908 u64 rt_runtime_us;
2909
2910 if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
2911 return -1;
2912
2913 rt_runtime_us = tg->rt_bandwidth.rt_runtime;
2914 do_div(rt_runtime_us, NSEC_PER_USEC);
2915 return rt_runtime_us;
2916}
2917
2918int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
2919{
2920 u64 rt_runtime, rt_period;
2921
Konstantin Khlebnikov1a010e22019-02-27 11:10:17 +03002922 if (rt_period_us > U64_MAX / NSEC_PER_USEC)
2923 return -EINVAL;
2924
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002925 rt_period = rt_period_us * NSEC_PER_USEC;
2926 rt_runtime = tg->rt_bandwidth.rt_runtime;
2927
2928 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
2929}
2930
2931long sched_group_rt_period(struct task_group *tg)
2932{
2933 u64 rt_period_us;
2934
2935 rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
2936 do_div(rt_period_us, NSEC_PER_USEC);
2937 return rt_period_us;
2938}
2939
Baisong Zhong28f152c2022-03-18 10:54:17 +08002940#ifdef CONFIG_SYSCTL
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002941static int sched_rt_global_constraints(void)
2942{
2943 int ret = 0;
2944
2945 mutex_lock(&rt_constraints_mutex);
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002946 ret = __rt_schedulable(NULL, 0, 0);
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002947 mutex_unlock(&rt_constraints_mutex);
2948
2949 return ret;
2950}
Baisong Zhong28f152c2022-03-18 10:54:17 +08002951#endif /* CONFIG_SYSCTL */
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002952
2953int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
2954{
2955 /* Don't accept realtime tasks when there is no way for them to run */
2956 if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
2957 return 0;
2958
2959 return 1;
2960}
2961
2962#else /* !CONFIG_RT_GROUP_SCHED */
Baisong Zhong28f152c2022-03-18 10:54:17 +08002963
2964#ifdef CONFIG_SYSCTL
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002965static int sched_rt_global_constraints(void)
2966{
2967 unsigned long flags;
2968 int i;
2969
2970 raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
2971 for_each_possible_cpu(i) {
2972 struct rt_rq *rt_rq = &cpu_rq(i)->rt;
2973
2974 raw_spin_lock(&rt_rq->rt_runtime_lock);
2975 rt_rq->rt_runtime = global_rt_runtime();
2976 raw_spin_unlock(&rt_rq->rt_runtime_lock);
2977 }
2978 raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
2979
2980 return 0;
2981}
Baisong Zhong28f152c2022-03-18 10:54:17 +08002982#endif /* CONFIG_SYSCTL */
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002983#endif /* CONFIG_RT_GROUP_SCHED */
2984
Baisong Zhong28f152c2022-03-18 10:54:17 +08002985#ifdef CONFIG_SYSCTL
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002986static int sched_rt_global_validate(void)
2987{
2988 if (sysctl_sched_rt_period <= 0)
2989 return -EINVAL;
2990
2991 if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
Huaixin Changd505b8a2020-04-25 18:52:48 +08002992 ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) ||
2993 ((u64)sysctl_sched_rt_runtime *
2994 NSEC_PER_USEC > max_rt_runtime)))
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002995 return -EINVAL;
2996
2997 return 0;
2998}
2999
3000static void sched_rt_do_global(void)
3001{
Li Hua9b58e972021-12-03 03:36:18 +00003002 unsigned long flags;
3003
3004 raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
Nicolas Pitre8887cd92017-06-21 14:22:02 -04003005 def_rt_bandwidth.rt_runtime = global_rt_runtime();
3006 def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
Li Hua9b58e972021-12-03 03:36:18 +00003007 raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
Nicolas Pitre8887cd92017-06-21 14:22:02 -04003008}
3009
Zhen Nid9ab0e62022-02-15 19:45:59 +08003010static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
Christoph Hellwig32927392020-04-24 08:43:38 +02003011 size_t *lenp, loff_t *ppos)
Nicolas Pitre8887cd92017-06-21 14:22:02 -04003012{
3013 int old_period, old_runtime;
3014 static DEFINE_MUTEX(mutex);
3015 int ret;
3016
3017 mutex_lock(&mutex);
3018 old_period = sysctl_sched_rt_period;
3019 old_runtime = sysctl_sched_rt_runtime;
3020
3021 ret = proc_dointvec(table, write, buffer, lenp, ppos);
3022
3023 if (!ret && write) {
3024 ret = sched_rt_global_validate();
3025 if (ret)
3026 goto undo;
3027
3028 ret = sched_dl_global_validate();
3029 if (ret)
3030 goto undo;
3031
3032 ret = sched_rt_global_constraints();
3033 if (ret)
3034 goto undo;
3035
3036 sched_rt_do_global();
3037 sched_dl_do_global();
3038 }
3039 if (0) {
3040undo:
3041 sysctl_sched_rt_period = old_period;
3042 sysctl_sched_rt_runtime = old_runtime;
3043 }
3044 mutex_unlock(&mutex);
3045
3046 return ret;
3047}
3048
Zhen Nidafd7a92022-02-15 19:46:01 +08003049static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
Christoph Hellwig32927392020-04-24 08:43:38 +02003050 size_t *lenp, loff_t *ppos)
Nicolas Pitre8887cd92017-06-21 14:22:02 -04003051{
3052 int ret;
3053 static DEFINE_MUTEX(mutex);
3054
3055 mutex_lock(&mutex);
3056 ret = proc_dointvec(table, write, buffer, lenp, ppos);
3057 /*
3058 * Make sure that internally we keep jiffies.
3059 * Also, writing zero resets the timeslice to default:
3060 */
3061 if (!ret && write) {
3062 sched_rr_timeslice =
3063 sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
3064 msecs_to_jiffies(sysctl_sched_rr_timeslice);
Cyril Hrubisc1fc6482023-08-02 17:19:06 +02003065
3066 if (sysctl_sched_rr_timeslice <= 0)
3067 sysctl_sched_rr_timeslice = jiffies_to_msecs(RR_TIMESLICE);
Nicolas Pitre8887cd92017-06-21 14:22:02 -04003068 }
3069 mutex_unlock(&mutex);
Ingo Molnar97fb7a02018-03-03 14:01:12 +01003070
Nicolas Pitre8887cd92017-06-21 14:22:02 -04003071 return ret;
3072}
Baisong Zhong28f152c2022-03-18 10:54:17 +08003073#endif /* CONFIG_SYSCTL */
Nicolas Pitre8887cd92017-06-21 14:22:02 -04003074
Peter Zijlstraada18de2008-06-19 14:22:24 +02003075#ifdef CONFIG_SCHED_DEBUG
Peter Zijlstra029632f2011-10-25 10:00:11 +02003076void print_rt_stats(struct seq_file *m, int cpu)
Peter Zijlstraada18de2008-06-19 14:22:24 +02003077{
Cheng Xuec514c42011-05-14 14:20:02 +08003078 rt_rq_iter_t iter;
Peter Zijlstraada18de2008-06-19 14:22:24 +02003079 struct rt_rq *rt_rq;
3080
3081 rcu_read_lock();
Cheng Xuec514c42011-05-14 14:20:02 +08003082 for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
Peter Zijlstraada18de2008-06-19 14:22:24 +02003083 print_rt_rq(m, cpu, rt_rq);
3084 rcu_read_unlock();
3085}
Dhaval Giani55e12e52008-06-24 23:39:43 +05303086#endif /* CONFIG_SCHED_DEBUG */