sched: push force idled core_pick task to another cpu
Commit Message
When a task with the max priority of its rq is force
idled because of unmatched cookie, we'd better to find
a suitable cpu for it to run as soon as possible, which
is idle and cookie matched. In order to achieve this
goal, we push the task in sched_core_balance(), after
steal_cookie_task().
Signed-off-by: Cruz Zhao <CruzZhao@linux.alibaba.com>
---
kernel/sched/core.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++--
kernel/sched/sched.h | 1 +
2 files changed, 72 insertions(+), 2 deletions(-)
Comments
ping...
As core pick task is the max priority task, if it's forced
idle, it's better to find a suitable and idle cpu to run
rather than waiting for other cpus to steal. BTW, there's
no chance for uncookie'd tasks to be stolen.
Consider the following scenario:
Task A is cookie'd a, task B1 and B2 are cookie'd b.
A and B1 are running on core1, and B2 is running on core2
with sibling idle.
There's no chance for B1 to migrate to ht1 of B2 immidiately,
and will cause a lot of force idle.
core1 core2
ht0 ht1 ht0 ht1
A force idle B2 real idle
force idle B1 B2 real idle
A force idle B2 real idle
force idle B1 B2 real idle
After applying this patch, B1 will be pushed immidiately, and
force idle will decrase.
在 2023/3/6 下午8:41, Cruz Zhao 写道:
> When a task with the max priority of its rq is force
> idled because of unmatched cookie, we'd better to find
> a suitable cpu for it to run as soon as possible, which
> is idle and cookie matched. In order to achieve this
> goal, we push the task in sched_core_balance(), after
> steal_cookie_task().
>
> Signed-off-by: Cruz Zhao <CruzZhao@linux.alibaba.com>
> ---
> kernel/sched/core.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++--
> kernel/sched/sched.h | 1 +
> 2 files changed, 72 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index a3f5147..2a2005a 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -246,6 +246,8 @@ void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags)
> {
> rq->core->core_task_seq++;
>
> + if (p == rq->force_idled_core_pick)
> + rq->force_idled_core_pick = NULL;
> if (sched_core_enqueued(p)) {
> rb_erase(&p->core_node, &rq->core_tree);
> RB_CLEAR_NODE(&p->core_node);
> @@ -346,9 +348,10 @@ static void __sched_core_flip(bool enabled)
>
> sched_core_lock(cpu, &flags);
>
> - for_each_cpu(t, smt_mask)
> + for_each_cpu(t, smt_mask) {
> cpu_rq(t)->core_enabled = enabled;
> -
> + cpu_rq(t)->force_idled_core_pick = NULL;
> + }
> cpu_rq(cpu)->core->core_forceidle_start = 0;
>
> sched_core_unlock(cpu, &flags);
> @@ -6085,6 +6088,7 @@ static inline struct task_struct *pick_task(struct rq *rq)
> next = pick_task(rq);
> if (!next->core_cookie) {
> rq->core_pick = NULL;
> + rq->force_idled_core_pick = NULL;
> /*
> * For robustness, update the min_vruntime_fi for
> * unconstrained picks as well.
> @@ -6135,6 +6139,8 @@ static inline struct task_struct *pick_task(struct rq *rq)
> p = idle_sched_class.pick_task(rq_i);
> }
>
> + if (p != rq_i->core_pick)
> + rq_i->force_idled_core_pick = rq_i->core_pick;
> rq_i->core_pick = p;
>
> if (p == rq_i->idle) {
> @@ -6288,10 +6294,61 @@ static bool steal_cookie_task(int cpu, struct sched_domain *sd)
> return false;
> }
>
> +static bool try_push_unmatch_task(struct task_struct *p, int this, int that)
> +{
> + struct rq *src = cpu_rq(this), *dst = cpu_rq(that);
> + bool success = false;
> +
> + local_irq_disable();
> + double_rq_lock(src, dst);
> + if (!available_idle_cpu(that))
> + goto unlock;
> + if (!cpumask_test_cpu(that, &p->cpus_mask))
> + goto unlock;
> + if (!sched_cpu_cookie_match(dst, p))
> + goto unlock;
> + if (p->core_occupation > dst->idle->core_occupation)
> + goto unlock;
> +
> + deactivate_task(src, p, 0);
> + set_task_cpu(p, that);
> + src->force_idled_core_pick = NULL;
> + activate_task(dst, p, 0);
> +
> + resched_curr(dst);
> +
> + success = true;
> +unlock:
> + double_rq_unlock(src, dst);
> + local_irq_enable();
> +
> + return success;
> +}
> +
> +static bool push_unmatch_task(struct task_struct *p, int cpu, struct sched_domain *sd)
> +{
> + int i;
> + struct cpumask mask;
> +
> + cpumask_and(&mask, sched_domain_span(sd), &p->cpus_mask);
> + for_each_cpu_wrap(i, &mask, cpu) {
> + if (i == cpu)
> + continue;
> +
> + if (need_resched())
> + break;
> +
> + if (try_push_unmatch_task(p, cpu, i))
> + return true;
> + }
> + return false;
> +}
> +
> static void sched_core_balance(struct rq *rq)
> {
> struct sched_domain *sd;
> int cpu = cpu_of(rq);
> + struct task_struct *p;
>
> preempt_disable();
> rcu_read_lock();
> @@ -6303,6 +6360,18 @@ static void sched_core_balance(struct rq *rq)
> if (steal_cookie_task(cpu, sd))
> break;
> }
> +
> + p = rq->force_idled_core_pick;
> + if (!p || p == rq->idle)
> + goto unlock;
> + for_each_domain(cpu, sd) {
> + if (need_resched())
> + break;
> +
> + if (push_unmatch_task(p, cpu, sd))
> + break;
> + }
> +unlock:
> raw_spin_rq_lock_irq(rq);
> rcu_read_unlock();
> preempt_enable();
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 3e8df6d..f9e7988 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1140,6 +1140,7 @@ struct rq {
> /* per rq */
> struct rq *core;
> struct task_struct *core_pick;
> + struct task_struct *force_idled_core_pick;
> unsigned int core_enabled;
> unsigned int core_sched_seq;
> struct rb_root core_tree;
@@ -246,6 +246,8 @@ void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags)
{
rq->core->core_task_seq++;
+ if (p == rq->force_idled_core_pick)
+ rq->force_idled_core_pick = NULL;
if (sched_core_enqueued(p)) {
rb_erase(&p->core_node, &rq->core_tree);
RB_CLEAR_NODE(&p->core_node);
@@ -346,9 +348,10 @@ static void __sched_core_flip(bool enabled)
sched_core_lock(cpu, &flags);
- for_each_cpu(t, smt_mask)
+ for_each_cpu(t, smt_mask) {
cpu_rq(t)->core_enabled = enabled;
-
+ cpu_rq(t)->force_idled_core_pick = NULL;
+ }
cpu_rq(cpu)->core->core_forceidle_start = 0;
sched_core_unlock(cpu, &flags);
@@ -6085,6 +6088,7 @@ static inline struct task_struct *pick_task(struct rq *rq)
next = pick_task(rq);
if (!next->core_cookie) {
rq->core_pick = NULL;
+ rq->force_idled_core_pick = NULL;
/*
* For robustness, update the min_vruntime_fi for
* unconstrained picks as well.
@@ -6135,6 +6139,8 @@ static inline struct task_struct *pick_task(struct rq *rq)
p = idle_sched_class.pick_task(rq_i);
}
+ if (p != rq_i->core_pick)
+ rq_i->force_idled_core_pick = rq_i->core_pick;
rq_i->core_pick = p;
if (p == rq_i->idle) {
@@ -6288,10 +6294,61 @@ static bool steal_cookie_task(int cpu, struct sched_domain *sd)
return false;
}
+static bool try_push_unmatch_task(struct task_struct *p, int this, int that)
+{
+ struct rq *src = cpu_rq(this), *dst = cpu_rq(that);
+ bool success = false;
+
+ local_irq_disable();
+ double_rq_lock(src, dst);
+ if (!available_idle_cpu(that))
+ goto unlock;
+ if (!cpumask_test_cpu(that, &p->cpus_mask))
+ goto unlock;
+ if (!sched_cpu_cookie_match(dst, p))
+ goto unlock;
+ if (p->core_occupation > dst->idle->core_occupation)
+ goto unlock;
+
+ deactivate_task(src, p, 0);
+ set_task_cpu(p, that);
+ src->force_idled_core_pick = NULL;
+ activate_task(dst, p, 0);
+
+ resched_curr(dst);
+
+ success = true;
+unlock:
+ double_rq_unlock(src, dst);
+ local_irq_enable();
+
+ return success;
+}
+
+static bool push_unmatch_task(struct task_struct *p, int cpu, struct sched_domain *sd)
+{
+ int i;
+ struct cpumask mask;
+
+ cpumask_and(&mask, sched_domain_span(sd), &p->cpus_mask);
+ for_each_cpu_wrap(i, &mask, cpu) {
+ if (i == cpu)
+ continue;
+
+ if (need_resched())
+ break;
+
+ if (try_push_unmatch_task(p, cpu, i))
+ return true;
+ }
+ return false;
+}
+
static void sched_core_balance(struct rq *rq)
{
struct sched_domain *sd;
int cpu = cpu_of(rq);
+ struct task_struct *p;
preempt_disable();
rcu_read_lock();
@@ -6303,6 +6360,18 @@ static void sched_core_balance(struct rq *rq)
if (steal_cookie_task(cpu, sd))
break;
}
+
+ p = rq->force_idled_core_pick;
+ if (!p || p == rq->idle)
+ goto unlock;
+ for_each_domain(cpu, sd) {
+ if (need_resched())
+ break;
+
+ if (push_unmatch_task(p, cpu, sd))
+ break;
+ }
+unlock:
raw_spin_rq_lock_irq(rq);
rcu_read_unlock();
preempt_enable();
@@ -1140,6 +1140,7 @@ struct rq {
/* per rq */
struct rq *core;
struct task_struct *core_pick;
+ struct task_struct *force_idled_core_pick;
unsigned int core_enabled;
unsigned int core_sched_seq;
struct rb_root core_tree;