[v2] sched/core: Reduce cost of sched_move_task when config autogroup

Message ID 20230321064459.39421-1-wuchi.zero@gmail.com
State New
Headers
Series [v2] sched/core: Reduce cost of sched_move_task when config autogroup |

Commit Message

wuchi March 21, 2023, 6:44 a.m. UTC
  Some sched_move_task calls are useless because that
task_struct->sched_task_group maybe not changed (equals task_group
of cpu_cgroup) when system enable autogroup. So do some checks in
sched_move_task.

sched_move_task eg:
task A belongs to cpu_cgroup0 and autogroup0, it will always belong
to cpu_cgroup0 when do_exit. So there is no need to do {de|en}queue.
The call graph is as follow.

do_exit
  sched_autogroup_exit_task
    sched_move_task
      dequeue_task
        sched_change_group
	  A.sched_task_group = sched_get_task_group (=cpu_cgroup0)
      enqueue_task

Performance results:
===========================
1. env
        cpu: bogomips=4600.00
     kernel: 6.3.0-rc3
 cpu_cgroup: 6:cpu,cpuacct:/user.slice

2. cmds
do_exit script:
```
for i in {0..10000}; do
    sleep 0 &
    done
wait
```
Run the above script, then use the following bpftrace cmd to get
the cost of sched_move_task:

bpftrace -e 'k:sched_move_task { @ts[tid] = nsecs; }
             kr:sched_move_task /@ts[tid]/
                { @ns += nsecs - @ts[tid]; delete(@ts[tid]); }'

3. cost time(ns):
 without patch: 43528033
 with    patch: 18541416
          diff:-24986617  -57.4%

As the result show, the patch will save 57.4% in the scenario.

Signed-off-by: wuchi <wuchi.zero@gmail.com>
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/sched/core.c | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)
  

Comments

Peter Zijlstra March 21, 2023, 9:36 a.m. UTC | #1
On Tue, Mar 21, 2023 at 02:44:59PM +0800, wuchi wrote:

>  kernel/sched/core.c | 30 ++++++++++++++++++++++++++++--
>  1 file changed, 28 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index a380f34789a2..1e7d6a8c3455 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -10330,7 +10330,7 @@ void sched_release_group(struct task_group *tg)
>  	spin_unlock_irqrestore(&task_group_lock, flags);
>  }
>  
> -static void sched_change_group(struct task_struct *tsk)
> +static struct task_group *sched_get_task_group(struct task_struct *tsk)
>  {
>  	struct task_group *tg;
>  
> @@ -10342,7 +10342,28 @@ static void sched_change_group(struct task_struct *tsk)
>  	tg = container_of(task_css_check(tsk, cpu_cgrp_id, true),
>  			  struct task_group, css);
>  	tg = autogroup_task_group(tsk, tg);
> -	tsk->sched_task_group = tg;
> +
> +	return tg;
> +}
> +
> +static bool sched_task_group_changed(struct task_struct *tsk)
> +{
> +	/*
> +	 * Some sched_move_task calls are useless because that
> +	 * task_struct->sched_task_group maybe not changed (equals
> +	 * task_group of cpu_cgroup) when system enable autogroup.
> +	 * So do some checks in sched_move_task.
> +	 */
> +#ifdef CONFIG_SCHED_AUTOGROUP
> +	return sched_get_task_group(tsk) != tsk->sched_task_group;
> +#else
> +	return true;
> +#endif /* CONFIG_SCHED_AUTOGROUP */
> +}
> +
> +static void sched_change_group(struct task_struct *tsk)
> +{
> +	tsk->sched_task_group = sched_get_task_group(tsk);
>  
>  #ifdef CONFIG_FAIR_GROUP_SCHED
>  	if (tsk->sched_class->task_change_group)
> @@ -10367,6 +10388,10 @@ void sched_move_task(struct task_struct *tsk)
>  	struct rq *rq;
>  
>  	rq = task_rq_lock(tsk, &rf);
> +
> +	if (!sched_task_group_changed(tsk))
> +		goto unlock;
> +
>  	update_rq_clock(rq);
>  
>  	running = task_current(rq, tsk);
> @@ -10391,6 +10416,7 @@ void sched_move_task(struct task_struct *tsk)
>  		resched_curr(rq);
>  	}
>  
> +unlock:
>  	task_rq_unlock(rq, tsk, &rf);
>  }

Would you mind terribly if I change it like so?

---
Subject: sched/core: Reduce cost of sched_move_task when config autogroup
From: wuchi <wuchi.zero@gmail.com>
Date: Tue, 21 Mar 2023 14:44:59 +0800

From: wuchi <wuchi.zero@gmail.com>

Some sched_move_task calls are useless because that
task_struct->sched_task_group maybe not changed (equals task_group
of cpu_cgroup) when system enable autogroup. So do some checks in
sched_move_task.

sched_move_task eg:
task A belongs to cpu_cgroup0 and autogroup0, it will always belong
to cpu_cgroup0 when do_exit. So there is no need to do {de|en}queue.
The call graph is as follow.

  do_exit
    sched_autogroup_exit_task
      sched_move_task
	dequeue_task
	  sched_change_group
	    A.sched_task_group = sched_get_task_group (=cpu_cgroup0)
	enqueue_task

Performance results:
===========================
1. env
        cpu: bogomips=4600.00
     kernel: 6.3.0-rc3
 cpu_cgroup: 6:cpu,cpuacct:/user.slice

2. cmds
do_exit script:

  for i in {0..10000}; do
      sleep 0 &
      done
  wait

Run the above script, then use the following bpftrace cmd to get
the cost of sched_move_task:

  bpftrace -e 'k:sched_move_task { @ts[tid] = nsecs; }
	       kr:sched_move_task /@ts[tid]/
		  { @ns += nsecs - @ts[tid]; delete(@ts[tid]); }'

3. cost time(ns):
  without patch: 43528033
  with    patch: 18541416
           diff:-24986617  -57.4%

As the result show, the patch will save 57.4% in the scenario.

Signed-off-by: wuchi <wuchi.zero@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230321064459.39421-1-wuchi.zero@gmail.com
---
 kernel/sched/core.c |   22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10351,7 +10351,7 @@ void sched_release_group(struct task_gro
 	spin_unlock_irqrestore(&task_group_lock, flags);
 }
 
-static void sched_change_group(struct task_struct *tsk)
+static struct task_group *sched_get_task_group(struct task_struct *tsk)
 {
 	struct task_group *tg;
 
@@ -10363,7 +10363,13 @@ static void sched_change_group(struct ta
 	tg = container_of(task_css_check(tsk, cpu_cgrp_id, true),
 			  struct task_group, css);
 	tg = autogroup_task_group(tsk, tg);
-	tsk->sched_task_group = tg;
+
+	return tg;
+}
+
+static void sched_change_group(struct task_struct *tsk, struct task_group *group)
+{
+	tsk->sched_task_group = group;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	if (tsk->sched_class->task_change_group)
@@ -10384,10 +10390,19 @@ void sched_move_task(struct task_struct
 {
 	int queued, running, queue_flags =
 		DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
+	struct task_group *group;
 	struct rq_flags rf;
 	struct rq *rq;
 
 	rq = task_rq_lock(tsk, &rf);
+	/*
+	 * Esp. with SCHED_AUTOGROUP enabled it is possible to get superfluous
+	 * group changes.
+	 */
+	group = sched_get_task_group(tsk);
+	if (group == tsk->sched_task_group)
+		goto unlock;
+
 	update_rq_clock(rq);
 
 	running = task_current(rq, tsk);
@@ -10398,7 +10413,7 @@ void sched_move_task(struct task_struct
 	if (running)
 		put_prev_task(rq, tsk);
 
-	sched_change_group(tsk);
+	sched_change_group(tsk, group);
 
 	if (queued)
 		enqueue_task(rq, tsk, queue_flags);
@@ -10412,6 +10427,7 @@ void sched_move_task(struct task_struct
 		resched_curr(rq);
 	}
 
+unlock:
 	task_rq_unlock(rq, tsk, &rf);
 }
  
wuchi March 21, 2023, 11:51 a.m. UTC | #2
Peter Zijlstra <peterz@infradead.org> 于2023年3月21日周二 17:36写道:
>
> On Tue, Mar 21, 2023 at 02:44:59PM +0800, wuchi wrote:
>
> >  kernel/sched/core.c | 30 ++++++++++++++++++++++++++++--
> >  1 file changed, 28 insertions(+), 2 deletions(-)
> >
> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index a380f34789a2..1e7d6a8c3455 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -10330,7 +10330,7 @@ void sched_release_group(struct task_group *tg)
> >       spin_unlock_irqrestore(&task_group_lock, flags);
> >  }
> >
> > -static void sched_change_group(struct task_struct *tsk)
> > +static struct task_group *sched_get_task_group(struct task_struct *tsk)
> >  {
> >       struct task_group *tg;
> >
> > @@ -10342,7 +10342,28 @@ static void sched_change_group(struct task_struct *tsk)
> >       tg = container_of(task_css_check(tsk, cpu_cgrp_id, true),
> >                         struct task_group, css);
> >       tg = autogroup_task_group(tsk, tg);
> > -     tsk->sched_task_group = tg;
> > +
> > +     return tg;
> > +}
> > +
> > +static bool sched_task_group_changed(struct task_struct *tsk)
> > +{
> > +     /*
> > +      * Some sched_move_task calls are useless because that
> > +      * task_struct->sched_task_group maybe not changed (equals
> > +      * task_group of cpu_cgroup) when system enable autogroup.
> > +      * So do some checks in sched_move_task.
> > +      */
> > +#ifdef CONFIG_SCHED_AUTOGROUP
> > +     return sched_get_task_group(tsk) != tsk->sched_task_group;
> > +#else
> > +     return true;
> > +#endif /* CONFIG_SCHED_AUTOGROUP */
> > +}
> > +
> > +static void sched_change_group(struct task_struct *tsk)
> > +{
> > +     tsk->sched_task_group = sched_get_task_group(tsk);
> >
> >  #ifdef CONFIG_FAIR_GROUP_SCHED
> >       if (tsk->sched_class->task_change_group)
> > @@ -10367,6 +10388,10 @@ void sched_move_task(struct task_struct *tsk)
> >       struct rq *rq;
> >
> >       rq = task_rq_lock(tsk, &rf);
> > +
> > +     if (!sched_task_group_changed(tsk))
> > +             goto unlock;
> > +
> >       update_rq_clock(rq);
> >
> >       running = task_current(rq, tsk);
> > @@ -10391,6 +10416,7 @@ void sched_move_task(struct task_struct *tsk)
> >               resched_curr(rq);
> >       }
> >
> > +unlock:
> >       task_rq_unlock(rq, tsk, &rf);
> >  }
>
> Would you mind terribly if I change it like so?
>
> ---
> Subject: sched/core: Reduce cost of sched_move_task when config autogroup
> From: wuchi <wuchi.zero@gmail.com>
> Date: Tue, 21 Mar 2023 14:44:59 +0800
>
> From: wuchi <wuchi.zero@gmail.com>
>
> Some sched_move_task calls are useless because that
> task_struct->sched_task_group maybe not changed (equals task_group
> of cpu_cgroup) when system enable autogroup. So do some checks in
> sched_move_task.
>
> sched_move_task eg:
> task A belongs to cpu_cgroup0 and autogroup0, it will always belong
> to cpu_cgroup0 when do_exit. So there is no need to do {de|en}queue.
> The call graph is as follow.
>
>   do_exit
>     sched_autogroup_exit_task
>       sched_move_task
>         dequeue_task
>           sched_change_group
>             A.sched_task_group = sched_get_task_group (=cpu_cgroup0)
>         enqueue_task
>
> Performance results:
> ===========================
> 1. env
>         cpu: bogomips=4600.00
>      kernel: 6.3.0-rc3
>  cpu_cgroup: 6:cpu,cpuacct:/user.slice
>
> 2. cmds
> do_exit script:
>
>   for i in {0..10000}; do
>       sleep 0 &
>       done
>   wait
>
> Run the above script, then use the following bpftrace cmd to get
> the cost of sched_move_task:
>
>   bpftrace -e 'k:sched_move_task { @ts[tid] = nsecs; }
>                kr:sched_move_task /@ts[tid]/
>                   { @ns += nsecs - @ts[tid]; delete(@ts[tid]); }'
>
> 3. cost time(ns):
>   without patch: 43528033
>   with    patch: 18541416
>            diff:-24986617  -57.4%
>
> As the result show, the patch will save 57.4% in the scenario.
>
> Signed-off-by: wuchi <wuchi.zero@gmail.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Link: https://lkml.kernel.org/r/20230321064459.39421-1-wuchi.zero@gmail.com
> ---
>  kernel/sched/core.c |   22 +++++++++++++++++++---
>  1 file changed, 19 insertions(+), 3 deletions(-)
>
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -10351,7 +10351,7 @@ void sched_release_group(struct task_gro
>         spin_unlock_irqrestore(&task_group_lock, flags);
>  }
>
> -static void sched_change_group(struct task_struct *tsk)
> +static struct task_group *sched_get_task_group(struct task_struct *tsk)
>  {
>         struct task_group *tg;
>
> @@ -10363,7 +10363,13 @@ static void sched_change_group(struct ta
>         tg = container_of(task_css_check(tsk, cpu_cgrp_id, true),
>                           struct task_group, css);
>         tg = autogroup_task_group(tsk, tg);
> -       tsk->sched_task_group = tg;
> +
> +       return tg;
> +}
> +
> +static void sched_change_group(struct task_struct *tsk, struct task_group *group)
> +{
> +       tsk->sched_task_group = group;
>
>  #ifdef CONFIG_FAIR_GROUP_SCHED
>         if (tsk->sched_class->task_change_group)
> @@ -10384,10 +10390,19 @@ void sched_move_task(struct task_struct
>  {
>         int queued, running, queue_flags =
>                 DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
> +       struct task_group *group;
>         struct rq_flags rf;
>         struct rq *rq;
>
>         rq = task_rq_lock(tsk, &rf);
> +       /*
> +        * Esp. with SCHED_AUTOGROUP enabled it is possible to get superfluous
> +        * group changes.
> +        */
> +       group = sched_get_task_group(tsk);
> +       if (group == tsk->sched_task_group)
> +               goto unlock;
> +
>         update_rq_clock(rq);
>
>         running = task_current(rq, tsk);
> @@ -10398,7 +10413,7 @@ void sched_move_task(struct task_struct
>         if (running)
>                 put_prev_task(rq, tsk);
>
> -       sched_change_group(tsk);
> +       sched_change_group(tsk, group);
>
>         if (queued)
>                 enqueue_task(rq, tsk, queue_flags);
> @@ -10412,6 +10427,7 @@ void sched_move_task(struct task_struct
>                 resched_curr(rq);
>         }
>
> +unlock:
>         task_rq_unlock(rq, tsk, &rf);
>  }
>

It‘s more efficient and concise, I learn more from the code. Thanks!
  

Patch

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a380f34789a2..1e7d6a8c3455 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10330,7 +10330,7 @@  void sched_release_group(struct task_group *tg)
 	spin_unlock_irqrestore(&task_group_lock, flags);
 }
 
-static void sched_change_group(struct task_struct *tsk)
+static struct task_group *sched_get_task_group(struct task_struct *tsk)
 {
 	struct task_group *tg;
 
@@ -10342,7 +10342,28 @@  static void sched_change_group(struct task_struct *tsk)
 	tg = container_of(task_css_check(tsk, cpu_cgrp_id, true),
 			  struct task_group, css);
 	tg = autogroup_task_group(tsk, tg);
-	tsk->sched_task_group = tg;
+
+	return tg;
+}
+
+static bool sched_task_group_changed(struct task_struct *tsk)
+{
+	/*
+	 * Some sched_move_task calls are useless because that
+	 * task_struct->sched_task_group maybe not changed (equals
+	 * task_group of cpu_cgroup) when system enable autogroup.
+	 * So do some checks in sched_move_task.
+	 */
+#ifdef CONFIG_SCHED_AUTOGROUP
+	return sched_get_task_group(tsk) != tsk->sched_task_group;
+#else
+	return true;
+#endif /* CONFIG_SCHED_AUTOGROUP */
+}
+
+static void sched_change_group(struct task_struct *tsk)
+{
+	tsk->sched_task_group = sched_get_task_group(tsk);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	if (tsk->sched_class->task_change_group)
@@ -10367,6 +10388,10 @@  void sched_move_task(struct task_struct *tsk)
 	struct rq *rq;
 
 	rq = task_rq_lock(tsk, &rf);
+
+	if (!sched_task_group_changed(tsk))
+		goto unlock;
+
 	update_rq_clock(rq);
 
 	running = task_current(rq, tsk);
@@ -10391,6 +10416,7 @@  void sched_move_task(struct task_struct *tsk)
 		resched_curr(rq);
 	}
 
+unlock:
 	task_rq_unlock(rq, tsk, &rf);
 }