sched: push force idled core_pick task to another cpu

Message ID 1678106502-58189-1-git-send-email-CruzZhao@linux.alibaba.com
State New
Headers
Series sched: push force idled core_pick task to another cpu |

Commit Message

cruzzhao March 6, 2023, 12:41 p.m. UTC
  When a task with the max priority of its rq is force
idled because of unmatched cookie, we'd better to find
a suitable cpu for it to run as soon as possible, which
is idle and cookie matched. In order to achieve this
goal, we push the task in sched_core_balance(), after
steal_cookie_task().

Signed-off-by: Cruz Zhao <CruzZhao@linux.alibaba.com>
---
 kernel/sched/core.c  | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 kernel/sched/sched.h |  1 +
 2 files changed, 72 insertions(+), 2 deletions(-)
  

Comments

cruzzhao March 24, 2023, 3:37 a.m. UTC | #1
ping...
As core pick task is the max priority task, if it's forced
idle, it's better to find a suitable and idle cpu to run
rather than waiting for other cpus to steal. BTW, there's
no chance for uncookie'd tasks to be stolen.

Consider the following scenario:
Task A is cookie'd a, task B1 and B2 are cookie'd b.
A and B1 are running on core1, and B2 is running on core2
with sibling idle.
There's no chance for B1 to migrate to ht1 of B2 immidiately,
and will cause a lot of force idle.

	core1				core2
ht0		ht1		ht0		ht1
A		force idle	B2		real idle
force idle	B1		B2		real idle
A		force idle	B2		real idle
force idle	B1		B2		real idle

After applying this patch, B1 will be pushed immidiately, and
force idle will decrase.

在 2023/3/6 下午8:41, Cruz Zhao 写道:
> When a task with the max priority of its rq is force
> idled because of unmatched cookie, we'd better to find
> a suitable cpu for it to run as soon as possible, which
> is idle and cookie matched. In order to achieve this
> goal, we push the task in sched_core_balance(), after
> steal_cookie_task().
> 
> Signed-off-by: Cruz Zhao <CruzZhao@linux.alibaba.com>
> ---
>  kernel/sched/core.c  | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++--
>  kernel/sched/sched.h |  1 +
>  2 files changed, 72 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index a3f5147..2a2005a 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -246,6 +246,8 @@ void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags)
>  {
>  	rq->core->core_task_seq++;
>  
> +	if (p == rq->force_idled_core_pick)
> +		rq->force_idled_core_pick = NULL;
>  	if (sched_core_enqueued(p)) {
>  		rb_erase(&p->core_node, &rq->core_tree);
>  		RB_CLEAR_NODE(&p->core_node);
> @@ -346,9 +348,10 @@ static void __sched_core_flip(bool enabled)
>  
>  		sched_core_lock(cpu, &flags);
>  
> -		for_each_cpu(t, smt_mask)
> +		for_each_cpu(t, smt_mask) {
>  			cpu_rq(t)->core_enabled = enabled;
> -
> +			cpu_rq(t)->force_idled_core_pick = NULL;
> +		}
>  		cpu_rq(cpu)->core->core_forceidle_start = 0;
>  
>  		sched_core_unlock(cpu, &flags);
> @@ -6085,6 +6088,7 @@ static inline struct task_struct *pick_task(struct rq *rq)
>  		next = pick_task(rq);
>  		if (!next->core_cookie) {
>  			rq->core_pick = NULL;
> +			rq->force_idled_core_pick = NULL;
>  			/*
>  			 * For robustness, update the min_vruntime_fi for
>  			 * unconstrained picks as well.
> @@ -6135,6 +6139,8 @@ static inline struct task_struct *pick_task(struct rq *rq)
>  				p = idle_sched_class.pick_task(rq_i);
>  		}
>  
> +		if (p != rq_i->core_pick)
> +			rq_i->force_idled_core_pick = rq_i->core_pick;
>  		rq_i->core_pick = p;
>  
>  		if (p == rq_i->idle) {
> @@ -6288,10 +6294,61 @@ static bool steal_cookie_task(int cpu, struct sched_domain *sd)
>  	return false;
>  }
>  
> +static bool try_push_unmatch_task(struct task_struct *p, int this, int that)
> +{
> +	struct rq *src = cpu_rq(this), *dst = cpu_rq(that);
> +	bool success = false;
> +
> +	local_irq_disable();
> +	double_rq_lock(src, dst);
> +	if (!available_idle_cpu(that))
> +		goto unlock;
> +	if (!cpumask_test_cpu(that, &p->cpus_mask))
> +		goto unlock;
> +	if (!sched_cpu_cookie_match(dst, p))
> +		goto unlock;
> +	if (p->core_occupation > dst->idle->core_occupation)
> +		goto unlock;
> +
> +	deactivate_task(src, p, 0);
> +	set_task_cpu(p, that);
> +	src->force_idled_core_pick = NULL;
> +	activate_task(dst, p, 0);
> +
> +	resched_curr(dst);
> +
> +	success = true;
> +unlock:
> +	double_rq_unlock(src, dst);
> +	local_irq_enable();
> +
> +	return success;
> +}
> +
> +static bool push_unmatch_task(struct task_struct *p, int cpu, struct sched_domain *sd)
> +{
> +	int i;
> +	struct cpumask mask;
> +
> +	cpumask_and(&mask, sched_domain_span(sd), &p->cpus_mask);
> +	for_each_cpu_wrap(i, &mask, cpu) {
> +		if (i == cpu)
> +			continue;
> +
> +		if (need_resched())
> +			break;
> +
> +		if (try_push_unmatch_task(p, cpu, i))
> +			return true;
> +	}
> +	return false;
> +}
> +
>  static void sched_core_balance(struct rq *rq)
>  {
>  	struct sched_domain *sd;
>  	int cpu = cpu_of(rq);
> +	struct task_struct *p;
>  
>  	preempt_disable();
>  	rcu_read_lock();
> @@ -6303,6 +6360,18 @@ static void sched_core_balance(struct rq *rq)
>  		if (steal_cookie_task(cpu, sd))
>  			break;
>  	}
> +
> +	p = rq->force_idled_core_pick;	
> +	if (!p || p == rq->idle)
> +		goto unlock;
> +	for_each_domain(cpu, sd) {
> +		if (need_resched())
> +			break;
> +
> +		if (push_unmatch_task(p, cpu, sd))
> +			break;
> +	}
> +unlock:
>  	raw_spin_rq_lock_irq(rq);
>  	rcu_read_unlock();
>  	preempt_enable();
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 3e8df6d..f9e7988 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1140,6 +1140,7 @@ struct rq {
>  	/* per rq */
>  	struct rq		*core;
>  	struct task_struct	*core_pick;
> +	struct task_struct	*force_idled_core_pick;
>  	unsigned int		core_enabled;
>  	unsigned int		core_sched_seq;
>  	struct rb_root		core_tree;
  

Patch

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a3f5147..2a2005a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -246,6 +246,8 @@  void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags)
 {
 	rq->core->core_task_seq++;
 
+	if (p == rq->force_idled_core_pick)
+		rq->force_idled_core_pick = NULL;
 	if (sched_core_enqueued(p)) {
 		rb_erase(&p->core_node, &rq->core_tree);
 		RB_CLEAR_NODE(&p->core_node);
@@ -346,9 +348,10 @@  static void __sched_core_flip(bool enabled)
 
 		sched_core_lock(cpu, &flags);
 
-		for_each_cpu(t, smt_mask)
+		for_each_cpu(t, smt_mask) {
 			cpu_rq(t)->core_enabled = enabled;
-
+			cpu_rq(t)->force_idled_core_pick = NULL;
+		}
 		cpu_rq(cpu)->core->core_forceidle_start = 0;
 
 		sched_core_unlock(cpu, &flags);
@@ -6085,6 +6088,7 @@  static inline struct task_struct *pick_task(struct rq *rq)
 		next = pick_task(rq);
 		if (!next->core_cookie) {
 			rq->core_pick = NULL;
+			rq->force_idled_core_pick = NULL;
 			/*
 			 * For robustness, update the min_vruntime_fi for
 			 * unconstrained picks as well.
@@ -6135,6 +6139,8 @@  static inline struct task_struct *pick_task(struct rq *rq)
 				p = idle_sched_class.pick_task(rq_i);
 		}
 
+		if (p != rq_i->core_pick)
+			rq_i->force_idled_core_pick = rq_i->core_pick;
 		rq_i->core_pick = p;
 
 		if (p == rq_i->idle) {
@@ -6288,10 +6294,61 @@  static bool steal_cookie_task(int cpu, struct sched_domain *sd)
 	return false;
 }
 
+static bool try_push_unmatch_task(struct task_struct *p, int this, int that)
+{
+	struct rq *src = cpu_rq(this), *dst = cpu_rq(that);
+	bool success = false;
+
+	local_irq_disable();
+	double_rq_lock(src, dst);
+	if (!available_idle_cpu(that))
+		goto unlock;
+	if (!cpumask_test_cpu(that, &p->cpus_mask))
+		goto unlock;
+	if (!sched_cpu_cookie_match(dst, p))
+		goto unlock;
+	if (p->core_occupation > dst->idle->core_occupation)
+		goto unlock;
+
+	deactivate_task(src, p, 0);
+	set_task_cpu(p, that);
+	src->force_idled_core_pick = NULL;
+	activate_task(dst, p, 0);
+
+	resched_curr(dst);
+
+	success = true;
+unlock:
+	double_rq_unlock(src, dst);
+	local_irq_enable();
+
+	return success;
+}
+
+static bool push_unmatch_task(struct task_struct *p, int cpu, struct sched_domain *sd)
+{
+	int i;
+	struct cpumask mask;
+
+	cpumask_and(&mask, sched_domain_span(sd), &p->cpus_mask);
+	for_each_cpu_wrap(i, &mask, cpu) {
+		if (i == cpu)
+			continue;
+
+		if (need_resched())
+			break;
+
+		if (try_push_unmatch_task(p, cpu, i))
+			return true;
+	}
+	return false;
+}
+
 static void sched_core_balance(struct rq *rq)
 {
 	struct sched_domain *sd;
 	int cpu = cpu_of(rq);
+	struct task_struct *p;
 
 	preempt_disable();
 	rcu_read_lock();
@@ -6303,6 +6360,18 @@  static void sched_core_balance(struct rq *rq)
 		if (steal_cookie_task(cpu, sd))
 			break;
 	}
+
+	p = rq->force_idled_core_pick;	
+	if (!p || p == rq->idle)
+		goto unlock;
+	for_each_domain(cpu, sd) {
+		if (need_resched())
+			break;
+
+		if (push_unmatch_task(p, cpu, sd))
+			break;
+	}
+unlock:
 	raw_spin_rq_lock_irq(rq);
 	rcu_read_unlock();
 	preempt_enable();
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3e8df6d..f9e7988 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1140,6 +1140,7 @@  struct rq {
 	/* per rq */
 	struct rq		*core;
 	struct task_struct	*core_pick;
+	struct task_struct	*force_idled_core_pick;
 	unsigned int		core_enabled;
 	unsigned int		core_sched_seq;
 	struct rb_root		core_tree;