[v5,5/7] sched/fair: Add trivial fair server

Message ID 4e0d14eb6e0ec33055197ac7ddb57ef7ab3894a5.1699095159.git.bristot@kernel.org
State New
Headers
Series SCHED_DEADLINE server infrastructure |

Commit Message

Daniel Bristot de Oliveira Nov. 4, 2023, 10:59 a.m. UTC
  From: Peter Zijlstra <peterz@infradead.org>

Use deadline servers to service fair tasks.

This patch adds a fair_server deadline entity which acts as a container
for fair entities and can be used to fix starvation when higher priority
(wrt fair) tasks are monopolizing CPU(s).

[ dl_server do not account for rt ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
---
 kernel/sched/core.c     |  1 +
 kernel/sched/deadline.c |  7 +++++++
 kernel/sched/fair.c     | 29 +++++++++++++++++++++++++++++
 kernel/sched/sched.h    |  4 ++++
 4 files changed, 41 insertions(+)
  

Comments

Peter Zijlstra Nov. 6, 2023, 2:24 p.m. UTC | #1
On Sat, Nov 04, 2023 at 11:59:22AM +0100, Daniel Bristot de Oliveira wrote:

> [ dl_server do not account for rt ]

> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index 541d547e1019..1d7b96ca9011 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -1382,6 +1382,13 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
>  			resched_curr(rq);
>  	}
>  
> +	/*
> +	 * The fair server (sole dl_server) does not account for real-time
> +	 * workload because it is running fair work.
> +	 */
> +	if (dl_server(dl_se))
> +		return;
> +
>  	/*
>  	 * Because -- for now -- we share the rt bandwidth, we need to
>  	 * account our runtime there too, otherwise actual rt tasks

Should we perhaps write this like so?

	if (dl_se == &rq->fair_server)
		return;
  
Daniel Bristot de Oliveira Nov. 6, 2023, 2:26 p.m. UTC | #2
On 11/6/23 15:24, Peter Zijlstra wrote:
> On Sat, Nov 04, 2023 at 11:59:22AM +0100, Daniel Bristot de Oliveira wrote:
> 
>> [ dl_server do not account for rt ]
>> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>> index 541d547e1019..1d7b96ca9011 100644
>> --- a/kernel/sched/deadline.c
>> +++ b/kernel/sched/deadline.c
>> @@ -1382,6 +1382,13 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
>>  			resched_curr(rq);
>>  	}
>>  
>> +	/*
>> +	 * The fair server (sole dl_server) does not account for real-time
>> +	 * workload because it is running fair work.
>> +	 */
>> +	if (dl_server(dl_se))
>> +		return;
>> +
>>  	/*
>>  	 * Because -- for now -- we share the rt bandwidth, we need to
>>  	 * account our runtime there too, otherwise actual rt tasks
> Should we perhaps write this like so?
> 
> 	if (dl_se == &rq->fair_server)
> 		return;

right, it is better for the next step (making it generic).

-- Daniel
  

Patch

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a721f6776b12..939266d29681 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10019,6 +10019,7 @@  void __init sched_init(void)
 #endif /* CONFIG_SMP */
 		hrtick_rq_init(rq);
 		atomic_set(&rq->nr_iowait, 0);
+		fair_server_init(rq);
 
 #ifdef CONFIG_SCHED_CORE
 		rq->core = rq;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 541d547e1019..1d7b96ca9011 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1382,6 +1382,13 @@  static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
 			resched_curr(rq);
 	}
 
+	/*
+	 * The fair server (sole dl_server) does not account for real-time
+	 * workload because it is running fair work.
+	 */
+	if (dl_server(dl_se))
+		return;
+
 	/*
 	 * Because -- for now -- we share the rt bandwidth, we need to
 	 * account our runtime there too, otherwise actual rt tasks
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bc3a4bc6c438..b15f7f376a67 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6600,6 +6600,9 @@  enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	 */
 	util_est_enqueue(&rq->cfs, p);
 
+	if (!rq->cfs.h_nr_running)
+		dl_server_start(&rq->fair_server);
+
 	/*
 	 * If in_iowait is set, the code below may not trigger any cpufreq
 	 * utilization updates, so do it here explicitly with the IOWAIT flag
@@ -6744,6 +6747,9 @@  static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		rq->next_balance = jiffies;
 
 dequeue_throttle:
+	if (!rq->cfs.h_nr_running)
+		dl_server_stop(&rq->fair_server);
+
 	util_est_update(&rq->cfs, p, task_sleep);
 	hrtick_update(rq);
 }
@@ -8396,6 +8402,29 @@  static struct task_struct *__pick_next_task_fair(struct rq *rq)
 	return pick_next_task_fair(rq, NULL, NULL);
 }
 
+static bool fair_server_has_tasks(struct sched_dl_entity *dl_se)
+{
+	return !!dl_se->rq->cfs.nr_running;
+}
+
+static struct task_struct *fair_server_pick(struct sched_dl_entity *dl_se)
+{
+	return pick_next_task_fair(dl_se->rq, NULL, NULL);
+}
+
+void fair_server_init(struct rq *rq)
+{
+	struct sched_dl_entity *dl_se = &rq->fair_server;
+
+	init_dl_entity(dl_se);
+
+	dl_se->dl_runtime = 50 * NSEC_PER_MSEC;
+	dl_se->dl_deadline = 1000 * NSEC_PER_MSEC;
+	dl_se->dl_period = 1000 * NSEC_PER_MSEC;
+
+	dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick);
+}
+
 /*
  * Account for a descheduled task:
  */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 24a2bc7c453b..ec0e288c8e06 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -340,6 +340,8 @@  extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
 		    dl_server_has_tasks_f has_tasks,
 		    dl_server_pick_f pick);
 
+extern void fair_server_init(struct rq *);
+
 #ifdef CONFIG_CGROUP_SCHED
 
 struct cfs_rq;
@@ -1005,6 +1007,8 @@  struct rq {
 	struct rt_rq		rt;
 	struct dl_rq		dl;
 
+	struct sched_dl_entity	fair_server;
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	/* list of leaf cfs_rq on this CPU: */
 	struct list_head	leaf_cfs_rq_list;