[RFC,V3,5/6] sched/fair: Add trivial fair server

Message ID 8db5a49ea92ad8b875d331d6136721645a382fe8.1686239016.git.bristot@kernel.org
State New
Headers
Series SCHED_DEADLINE server infrastructure |

Commit Message

Daniel Bristot de Oliveira June 8, 2023, 3:58 p.m. UTC
  From: Peter Zijlstra <peterz@infradead.org>

Use deadline servers to service fair tasks.

This patch adds a fair_server deadline entity which acts as a container
for fair entities and can be used to fix starvation when higher priority
(wrt fair) tasks are monopolizing CPU(s).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
---
 kernel/sched/core.c  |  1 +
 kernel/sched/fair.c  | 29 +++++++++++++++++++++++++++++
 kernel/sched/sched.h |  4 ++++
 3 files changed, 34 insertions(+)
  

Comments

Peter Zijlstra June 16, 2023, 12:59 p.m. UTC | #1
On Thu, Jun 08, 2023 at 05:58:17PM +0200, Daniel Bristot de Oliveira wrote:
> +void fair_server_init(struct rq *rq)
> +{
> +	struct sched_dl_entity *dl_se = &rq->fair_server;
> +
> +	init_dl_entity(dl_se);
> +
> +	dl_se->dl_runtime = TICK_NSEC;
> +	dl_se->dl_deadline = 20 * TICK_NSEC;
> +	dl_se->dl_period = 20 * TICK_NSEC;
> +
> +	dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick);
> +}

These here numbers should obviously become a tunable somewhere... :-)
  
Daniel Bristot de Oliveira June 16, 2023, 1 p.m. UTC | #2
On 6/16/23 14:59, Peter Zijlstra wrote:
> On Thu, Jun 08, 2023 at 05:58:17PM +0200, Daniel Bristot de Oliveira wrote:
>> +void fair_server_init(struct rq *rq)
>> +{
>> +	struct sched_dl_entity *dl_se = &rq->fair_server;
>> +
>> +	init_dl_entity(dl_se);
>> +
>> +	dl_se->dl_runtime = TICK_NSEC;
>> +	dl_se->dl_deadline = 20 * TICK_NSEC;
>> +	dl_se->dl_period = 20 * TICK_NSEC;
>> +
>> +	dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick);
>> +}
> 
> These here numbers should obviously become a tunable somewhere... :-)

From sched_rt_runtime and sched_rt_period, no?

-- Daniel
  
Peter Zijlstra June 16, 2023, 1:12 p.m. UTC | #3
On Fri, Jun 16, 2023 at 03:00:57PM +0200, Daniel Bristot de Oliveira wrote:
> On 6/16/23 14:59, Peter Zijlstra wrote:
> > On Thu, Jun 08, 2023 at 05:58:17PM +0200, Daniel Bristot de Oliveira wrote:
> >> +void fair_server_init(struct rq *rq)
> >> +{
> >> +	struct sched_dl_entity *dl_se = &rq->fair_server;
> >> +
> >> +	init_dl_entity(dl_se);
> >> +
> >> +	dl_se->dl_runtime = TICK_NSEC;
> >> +	dl_se->dl_deadline = 20 * TICK_NSEC;
> >> +	dl_se->dl_period = 20 * TICK_NSEC;
> >> +
> >> +	dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick);
> >> +}
> > 
> > These here numbers should obviously become a tunable somewhere... :-)
> 
> From sched_rt_runtime and sched_rt_period, no?

Well, probably new names. IIRC those are also limits on the whole
rt-cgroup mess, so e can't trivially change it without also ripping all
that out, and that's a little bit more work.

Ripping out the basic throttle is much simpler than replacing all that
and should be done earlier.
  
Daniel Bristot de Oliveira June 16, 2023, 1:18 p.m. UTC | #4
On 6/16/23 15:12, Peter Zijlstra wrote:
> On Fri, Jun 16, 2023 at 03:00:57PM +0200, Daniel Bristot de Oliveira wrote:
>> On 6/16/23 14:59, Peter Zijlstra wrote:
>>> On Thu, Jun 08, 2023 at 05:58:17PM +0200, Daniel Bristot de Oliveira wrote:
>>>> +void fair_server_init(struct rq *rq)
>>>> +{
>>>> +	struct sched_dl_entity *dl_se = &rq->fair_server;
>>>> +
>>>> +	init_dl_entity(dl_se);
>>>> +
>>>> +	dl_se->dl_runtime = TICK_NSEC;
>>>> +	dl_se->dl_deadline = 20 * TICK_NSEC;
>>>> +	dl_se->dl_period = 20 * TICK_NSEC;
>>>> +
>>>> +	dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick);
>>>> +}
>>>
>>> These here numbers should obviously become a tunable somewhere... :-)
>>
>> From sched_rt_runtime and sched_rt_period, no?
> 
> Well, probably new names. IIRC those are also limits on the whole
> rt-cgroup mess, so e can't trivially change it without also ripping all
> that out, and that's a little bit more work.
> 
> Ripping out the basic throttle is much simpler than replacing all that
> and should be done earlier.
> 

Life is easier then. Should them be a sysctl?

Like:
  kernel.sched_other_min_runtime_us ?
  kernel.sched_other_min_period_us ?

I bet you can come up with better names.

-- Daniel
  

Patch

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5b88b822ec89..7506dde9849d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10058,6 +10058,7 @@  void __init sched_init(void)
 #endif /* CONFIG_SMP */
 		hrtick_rq_init(rq);
 		atomic_set(&rq->nr_iowait, 0);
+		fair_server_init(rq);
 
 #ifdef CONFIG_SCHED_CORE
 		rq->core = rq;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0c58d8e55b69..f493f05c1f84 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6336,6 +6336,9 @@  enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	 */
 	util_est_enqueue(&rq->cfs, p);
 
+	if (!rq->cfs.h_nr_running)
+		dl_server_start(&rq->fair_server);
+
 	/*
 	 * If in_iowait is set, the code below may not trigger any cpufreq
 	 * utilization updates, so do it here explicitly with the IOWAIT flag
@@ -6480,6 +6483,9 @@  static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		rq->next_balance = jiffies;
 
 dequeue_throttle:
+	if (!rq->cfs.h_nr_running)
+		dl_server_stop(&rq->fair_server);
+
 	util_est_update(&rq->cfs, p, task_sleep);
 	hrtick_update(rq);
 }
@@ -8221,6 +8227,29 @@  static struct task_struct *__pick_next_task_fair(struct rq *rq)
 	return pick_next_task_fair(rq, NULL, NULL);
 }
 
+static bool fair_server_has_tasks(struct sched_dl_entity *dl_se)
+{
+	return !!dl_se->rq->cfs.nr_running;
+}
+
+static struct task_struct *fair_server_pick(struct sched_dl_entity *dl_se)
+{
+	return pick_next_task_fair(dl_se->rq, NULL, NULL);
+}
+
+void fair_server_init(struct rq *rq)
+{
+	struct sched_dl_entity *dl_se = &rq->fair_server;
+
+	init_dl_entity(dl_se);
+
+	dl_se->dl_runtime = TICK_NSEC;
+	dl_se->dl_deadline = 20 * TICK_NSEC;
+	dl_se->dl_period = 20 * TICK_NSEC;
+
+	dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick);
+}
+
 /*
  * Account for a descheduled task:
  */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 390c99e2f8a8..d4a7c0823c53 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -353,6 +353,8 @@  extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
 		    dl_server_has_tasks_f has_tasks,
 		    dl_server_pick_f pick);
 
+extern void fair_server_init(struct rq *);
+
 #ifdef CONFIG_CGROUP_SCHED
 
 struct cfs_rq;
@@ -1015,6 +1017,8 @@  struct rq {
 	struct rt_rq		rt;
 	struct dl_rq		dl;
 
+	struct sched_dl_entity	fair_server;
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	/* list of leaf cfs_rq on this CPU: */
 	struct list_head	leaf_cfs_rq_list;