[RFC,V3,5/6] sched/fair: Add trivial fair server
Commit Message
From: Peter Zijlstra <peterz@infradead.org>
Use deadline servers to service fair tasks.
This patch adds a fair_server deadline entity which acts as a container
for fair entities and can be used to fix starvation when higher priority
(wrt fair) tasks are monopolizing CPU(s).
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
---
kernel/sched/core.c | 1 +
kernel/sched/fair.c | 29 +++++++++++++++++++++++++++++
kernel/sched/sched.h | 4 ++++
3 files changed, 34 insertions(+)
Comments
On Thu, Jun 08, 2023 at 05:58:17PM +0200, Daniel Bristot de Oliveira wrote:
> +void fair_server_init(struct rq *rq)
> +{
> + struct sched_dl_entity *dl_se = &rq->fair_server;
> +
> + init_dl_entity(dl_se);
> +
> + dl_se->dl_runtime = TICK_NSEC;
> + dl_se->dl_deadline = 20 * TICK_NSEC;
> + dl_se->dl_period = 20 * TICK_NSEC;
> +
> + dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick);
> +}
These here numbers should obviously become a tunable somewhere... :-)
On 6/16/23 14:59, Peter Zijlstra wrote:
> On Thu, Jun 08, 2023 at 05:58:17PM +0200, Daniel Bristot de Oliveira wrote:
>> +void fair_server_init(struct rq *rq)
>> +{
>> + struct sched_dl_entity *dl_se = &rq->fair_server;
>> +
>> + init_dl_entity(dl_se);
>> +
>> + dl_se->dl_runtime = TICK_NSEC;
>> + dl_se->dl_deadline = 20 * TICK_NSEC;
>> + dl_se->dl_period = 20 * TICK_NSEC;
>> +
>> + dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick);
>> +}
>
> These here numbers should obviously become a tunable somewhere... :-)
From sched_rt_runtime and sched_rt_period, no?
-- Daniel
On Fri, Jun 16, 2023 at 03:00:57PM +0200, Daniel Bristot de Oliveira wrote:
> On 6/16/23 14:59, Peter Zijlstra wrote:
> > On Thu, Jun 08, 2023 at 05:58:17PM +0200, Daniel Bristot de Oliveira wrote:
> >> +void fair_server_init(struct rq *rq)
> >> +{
> >> + struct sched_dl_entity *dl_se = &rq->fair_server;
> >> +
> >> + init_dl_entity(dl_se);
> >> +
> >> + dl_se->dl_runtime = TICK_NSEC;
> >> + dl_se->dl_deadline = 20 * TICK_NSEC;
> >> + dl_se->dl_period = 20 * TICK_NSEC;
> >> +
> >> + dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick);
> >> +}
> >
> > These here numbers should obviously become a tunable somewhere... :-)
>
> From sched_rt_runtime and sched_rt_period, no?
Well, probably new names. IIRC those are also limits on the whole
rt-cgroup mess, so e can't trivially change it without also ripping all
that out, and that's a little bit more work.
Ripping out the basic throttle is much simpler than replacing all that
and should be done earlier.
On 6/16/23 15:12, Peter Zijlstra wrote:
> On Fri, Jun 16, 2023 at 03:00:57PM +0200, Daniel Bristot de Oliveira wrote:
>> On 6/16/23 14:59, Peter Zijlstra wrote:
>>> On Thu, Jun 08, 2023 at 05:58:17PM +0200, Daniel Bristot de Oliveira wrote:
>>>> +void fair_server_init(struct rq *rq)
>>>> +{
>>>> + struct sched_dl_entity *dl_se = &rq->fair_server;
>>>> +
>>>> + init_dl_entity(dl_se);
>>>> +
>>>> + dl_se->dl_runtime = TICK_NSEC;
>>>> + dl_se->dl_deadline = 20 * TICK_NSEC;
>>>> + dl_se->dl_period = 20 * TICK_NSEC;
>>>> +
>>>> + dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick);
>>>> +}
>>>
>>> These here numbers should obviously become a tunable somewhere... :-)
>>
>> From sched_rt_runtime and sched_rt_period, no?
>
> Well, probably new names. IIRC those are also limits on the whole
> rt-cgroup mess, so e can't trivially change it without also ripping all
> that out, and that's a little bit more work.
>
> Ripping out the basic throttle is much simpler than replacing all that
> and should be done earlier.
>
Life is easier then. Should them be a sysctl?
Like:
kernel.sched_other_min_runtime_us ?
kernel.sched_other_min_period_us ?
I bet you can come up with better names.
-- Daniel
@@ -10058,6 +10058,7 @@ void __init sched_init(void)
#endif /* CONFIG_SMP */
hrtick_rq_init(rq);
atomic_set(&rq->nr_iowait, 0);
+ fair_server_init(rq);
#ifdef CONFIG_SCHED_CORE
rq->core = rq;
@@ -6336,6 +6336,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
*/
util_est_enqueue(&rq->cfs, p);
+ if (!rq->cfs.h_nr_running)
+ dl_server_start(&rq->fair_server);
+
/*
* If in_iowait is set, the code below may not trigger any cpufreq
* utilization updates, so do it here explicitly with the IOWAIT flag
@@ -6480,6 +6483,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
rq->next_balance = jiffies;
dequeue_throttle:
+ if (!rq->cfs.h_nr_running)
+ dl_server_stop(&rq->fair_server);
+
util_est_update(&rq->cfs, p, task_sleep);
hrtick_update(rq);
}
@@ -8221,6 +8227,29 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq)
return pick_next_task_fair(rq, NULL, NULL);
}
+static bool fair_server_has_tasks(struct sched_dl_entity *dl_se)
+{
+ return !!dl_se->rq->cfs.nr_running;
+}
+
+static struct task_struct *fair_server_pick(struct sched_dl_entity *dl_se)
+{
+ return pick_next_task_fair(dl_se->rq, NULL, NULL);
+}
+
+void fair_server_init(struct rq *rq)
+{
+ struct sched_dl_entity *dl_se = &rq->fair_server;
+
+ init_dl_entity(dl_se);
+
+ dl_se->dl_runtime = TICK_NSEC;
+ dl_se->dl_deadline = 20 * TICK_NSEC;
+ dl_se->dl_period = 20 * TICK_NSEC;
+
+ dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick);
+}
+
/*
* Account for a descheduled task:
*/
@@ -353,6 +353,8 @@ extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
dl_server_has_tasks_f has_tasks,
dl_server_pick_f pick);
+extern void fair_server_init(struct rq *);
+
#ifdef CONFIG_CGROUP_SCHED
struct cfs_rq;
@@ -1015,6 +1017,8 @@ struct rq {
struct rt_rq rt;
struct dl_rq dl;
+ struct sched_dl_entity fair_server;
+
#ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this CPU: */
struct list_head leaf_cfs_rq_list;