[v5,5/7] sched/fair: Add trivial fair server
Commit Message
From: Peter Zijlstra <peterz@infradead.org>
Use deadline servers to service fair tasks.
This patch adds a fair_server deadline entity which acts as a container
for fair entities and can be used to fix starvation when higher priority
(wrt fair) tasks are monopolizing CPU(s).
[ dl_server do not account for rt ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
---
kernel/sched/core.c | 1 +
kernel/sched/deadline.c | 7 +++++++
kernel/sched/fair.c | 29 +++++++++++++++++++++++++++++
kernel/sched/sched.h | 4 ++++
4 files changed, 41 insertions(+)
Comments
On Sat, Nov 04, 2023 at 11:59:22AM +0100, Daniel Bristot de Oliveira wrote:
> [ dl_server do not account for rt ]
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index 541d547e1019..1d7b96ca9011 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -1382,6 +1382,13 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
> resched_curr(rq);
> }
>
> + /*
> + * The fair server (sole dl_server) does not account for real-time
> + * workload because it is running fair work.
> + */
> + if (dl_server(dl_se))
> + return;
> +
> /*
> * Because -- for now -- we share the rt bandwidth, we need to
> * account our runtime there too, otherwise actual rt tasks
Should we perhaps write this like so?
if (dl_se == &rq->fair_server)
return;
On 11/6/23 15:24, Peter Zijlstra wrote:
> On Sat, Nov 04, 2023 at 11:59:22AM +0100, Daniel Bristot de Oliveira wrote:
>
>> [ dl_server do not account for rt ]
>> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>> index 541d547e1019..1d7b96ca9011 100644
>> --- a/kernel/sched/deadline.c
>> +++ b/kernel/sched/deadline.c
>> @@ -1382,6 +1382,13 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
>> resched_curr(rq);
>> }
>>
>> + /*
>> + * The fair server (sole dl_server) does not account for real-time
>> + * workload because it is running fair work.
>> + */
>> + if (dl_server(dl_se))
>> + return;
>> +
>> /*
>> * Because -- for now -- we share the rt bandwidth, we need to
>> * account our runtime there too, otherwise actual rt tasks
> Should we perhaps write this like so?
>
> if (dl_se == &rq->fair_server)
> return;
right, it is better for the next step (making it generic).
-- Daniel
@@ -10019,6 +10019,7 @@ void __init sched_init(void)
#endif /* CONFIG_SMP */
hrtick_rq_init(rq);
atomic_set(&rq->nr_iowait, 0);
+ fair_server_init(rq);
#ifdef CONFIG_SCHED_CORE
rq->core = rq;
@@ -1382,6 +1382,13 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
resched_curr(rq);
}
+ /*
+ * The fair server (sole dl_server) does not account for real-time
+ * workload because it is running fair work.
+ */
+ if (dl_server(dl_se))
+ return;
+
/*
* Because -- for now -- we share the rt bandwidth, we need to
* account our runtime there too, otherwise actual rt tasks
@@ -6600,6 +6600,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
*/
util_est_enqueue(&rq->cfs, p);
+ if (!rq->cfs.h_nr_running)
+ dl_server_start(&rq->fair_server);
+
/*
* If in_iowait is set, the code below may not trigger any cpufreq
* utilization updates, so do it here explicitly with the IOWAIT flag
@@ -6744,6 +6747,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
rq->next_balance = jiffies;
dequeue_throttle:
+ if (!rq->cfs.h_nr_running)
+ dl_server_stop(&rq->fair_server);
+
util_est_update(&rq->cfs, p, task_sleep);
hrtick_update(rq);
}
@@ -8396,6 +8402,29 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq)
return pick_next_task_fair(rq, NULL, NULL);
}
+static bool fair_server_has_tasks(struct sched_dl_entity *dl_se)
+{
+ return !!dl_se->rq->cfs.nr_running;
+}
+
+static struct task_struct *fair_server_pick(struct sched_dl_entity *dl_se)
+{
+ return pick_next_task_fair(dl_se->rq, NULL, NULL);
+}
+
+void fair_server_init(struct rq *rq)
+{
+ struct sched_dl_entity *dl_se = &rq->fair_server;
+
+ init_dl_entity(dl_se);
+
+ dl_se->dl_runtime = 50 * NSEC_PER_MSEC;
+ dl_se->dl_deadline = 1000 * NSEC_PER_MSEC;
+ dl_se->dl_period = 1000 * NSEC_PER_MSEC;
+
+ dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick);
+}
+
/*
* Account for a descheduled task:
*/
@@ -340,6 +340,8 @@ extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
dl_server_has_tasks_f has_tasks,
dl_server_pick_f pick);
+extern void fair_server_init(struct rq *);
+
#ifdef CONFIG_CGROUP_SCHED
struct cfs_rq;
@@ -1005,6 +1007,8 @@ struct rq {
struct rt_rq rt;
struct dl_rq dl;
+ struct sched_dl_entity fair_server;
+
#ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this CPU: */
struct list_head leaf_cfs_rq_list;