[v2,09/22] sched/fair: Use IPC class score to select a busiest runqueue

Message ID 20221128132100.30253-10-ricardo.neri-calderon@linux.intel.com
State New
Headers
Series sched: Introduce IPC classes for load balance |

Commit Message

Ricardo Neri Nov. 28, 2022, 1:20 p.m. UTC
  For two runqueues of equal priority and equal number of running of tasks,
select the one whose current task would have the highest IPC class score
if placed on the destination CPU.

Cc: Ben Segall <bsegall@google.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim C. Chen <tim.c.chen@intel.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: x86@kernel.org
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
---
Changes since v1:
 * Fixed a bug when selecting a busiest runqueue: when comparing two
   runqueues with equal nr_running, we must compute the IPCC score delta
   of both.
 * Renamed local variables to improve the layout of the code block.
   (PeterZ)
 * Used the new interface names.
---
 kernel/sched/fair.c | 54 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)
  

Comments

Ionela Voinescu Dec. 8, 2022, 8:51 a.m. UTC | #1
Hi,

On Monday 28 Nov 2022 at 05:20:47 (-0800), Ricardo Neri wrote:
> For two runqueues of equal priority and equal number of running of tasks,
> select the one whose current task would have the highest IPC class score
> if placed on the destination CPU.
> 
[..]
> +static int ipcc_score_delta(struct task_struct *p, int alt_cpu)
> +{
> +	unsigned long ipcc = p->ipcc;
> +
> +	if (!sched_ipcc_enabled())
> +		return INT_MIN;
> +
> +	return arch_get_ipcc_score(ipcc, alt_cpu) -
> +	       arch_get_ipcc_score(ipcc, task_cpu(p));

Nit: arch_get_ipcc_score() return values are never checked for error.

> +}
> +
>  #else /* CONFIG_IPC_CLASSES */
>  static void update_sg_lb_ipcc_stats(struct sg_lb_ipcc_stats *sgcs,
>  				    struct rq *rq)
> @@ -9258,6 +9276,11 @@ static bool sched_asym_ipcc_pick(struct sched_group *a,
>  	return false;
>  }
>  
> +static int ipcc_score_delta(struct task_struct *p, int alt_cpu)
> +{
> +	return INT_MIN;
> +}
> +
>  #endif /* CONFIG_IPC_CLASSES */
>  
>  /**
> @@ -10419,8 +10442,8 @@ static struct rq *find_busiest_queue(struct lb_env *env,
>  {
>  	struct rq *busiest = NULL, *rq;
>  	unsigned long busiest_util = 0, busiest_load = 0, busiest_capacity = 1;
> +	int i, busiest_ipcc_delta = INT_MIN;
>  	unsigned int busiest_nr = 0;
> -	int i;
>  
>  	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
>  		unsigned long capacity, load, util;
> @@ -10526,8 +10549,37 @@ static struct rq *find_busiest_queue(struct lb_env *env,
>  
>  		case migrate_task:
>  			if (busiest_nr < nr_running) {
> +				struct task_struct *curr;
> +
>  				busiest_nr = nr_running;
>  				busiest = rq;
> +
> +				/*
> +				 * Remember the IPC score delta of busiest::curr.
> +				 * We may need it to break a tie with other queues
> +				 * with equal nr_running.
> +				 */
> +				curr = rcu_dereference(busiest->curr);
> +				busiest_ipcc_delta = ipcc_score_delta(curr,
> +								      env->dst_cpu);
> +			/*
> +			 * If rq and busiest have the same number of running
> +			 * tasks, pick rq if doing so would give rq::curr a
> +			 * bigger IPC boost on dst_cpu.
> +			 */
> +			} else if (sched_ipcc_enabled() &&
> +				   busiest_nr == nr_running) {
> +				struct task_struct *curr;
> +				int delta;
> +
> +				curr = rcu_dereference(rq->curr);
> +				delta = ipcc_score_delta(curr, env->dst_cpu);
> +
> +				if (busiest_ipcc_delta < delta) {
> +					busiest_ipcc_delta = delta;
> +					busiest_nr = nr_running;
> +					busiest = rq;
> +				}
>  			}
>  			break;
>  

While in the commit message you describe this as breaking a tie for
asym_packing, the code here does not only affect asym_packing. If
another architecture would have sched_ipcc_enabled() it would use this
as generic policy, and that might not be desired.

Hope it helps,
Ionela.

> -- 
> 2.25.1
> 
>
  
Ricardo Neri Dec. 14, 2022, 12:32 a.m. UTC | #2
On Thu, Dec 08, 2022 at 08:51:03AM +0000, Ionela Voinescu wrote:
> Hi,
> 
> On Monday 28 Nov 2022 at 05:20:47 (-0800), Ricardo Neri wrote:
> > For two runqueues of equal priority and equal number of running of tasks,
> > select the one whose current task would have the highest IPC class score
> > if placed on the destination CPU.
> > 
> [..]
> > +static int ipcc_score_delta(struct task_struct *p, int alt_cpu)
> > +{
> > +	unsigned long ipcc = p->ipcc;
> > +
> > +	if (!sched_ipcc_enabled())
> > +		return INT_MIN;
> > +
> > +	return arch_get_ipcc_score(ipcc, alt_cpu) -
> > +	       arch_get_ipcc_score(ipcc, task_cpu(p));
> 
> Nit: arch_get_ipcc_score() return values are never checked for error.

Fair point. I will handle error values.

> 
> > +}
> > +
> >  #else /* CONFIG_IPC_CLASSES */
> >  static void update_sg_lb_ipcc_stats(struct sg_lb_ipcc_stats *sgcs,
> >  				    struct rq *rq)
> > @@ -9258,6 +9276,11 @@ static bool sched_asym_ipcc_pick(struct sched_group *a,
> >  	return false;
> >  }
> >  
> > +static int ipcc_score_delta(struct task_struct *p, int alt_cpu)
> > +{
> > +	return INT_MIN;
> > +}
> > +
> >  #endif /* CONFIG_IPC_CLASSES */
> >  
> >  /**
> > @@ -10419,8 +10442,8 @@ static struct rq *find_busiest_queue(struct lb_env *env,
> >  {
> >  	struct rq *busiest = NULL, *rq;
> >  	unsigned long busiest_util = 0, busiest_load = 0, busiest_capacity = 1;
> > +	int i, busiest_ipcc_delta = INT_MIN;
> >  	unsigned int busiest_nr = 0;
> > -	int i;
> >  
> >  	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
> >  		unsigned long capacity, load, util;
> > @@ -10526,8 +10549,37 @@ static struct rq *find_busiest_queue(struct lb_env *env,
> >  
> >  		case migrate_task:
> >  			if (busiest_nr < nr_running) {
> > +				struct task_struct *curr;
> > +
> >  				busiest_nr = nr_running;
> >  				busiest = rq;
> > +
> > +				/*
> > +				 * Remember the IPC score delta of busiest::curr.
> > +				 * We may need it to break a tie with other queues
> > +				 * with equal nr_running.
> > +				 */
> > +				curr = rcu_dereference(busiest->curr);
> > +				busiest_ipcc_delta = ipcc_score_delta(curr,
> > +								      env->dst_cpu);
> > +			/*
> > +			 * If rq and busiest have the same number of running
> > +			 * tasks, pick rq if doing so would give rq::curr a
> > +			 * bigger IPC boost on dst_cpu.
> > +			 */
> > +			} else if (sched_ipcc_enabled() &&
> > +				   busiest_nr == nr_running) {
> > +				struct task_struct *curr;
> > +				int delta;
> > +
> > +				curr = rcu_dereference(rq->curr);
> > +				delta = ipcc_score_delta(curr, env->dst_cpu);
> > +
> > +				if (busiest_ipcc_delta < delta) {
> > +					busiest_ipcc_delta = delta;
> > +					busiest_nr = nr_running;
> > +					busiest = rq;
> > +				}
> >  			}
> >  			break;
> >  
> 
> While in the commit message you describe this as breaking a tie for
> asym_packing,

Are you referring to the overall series or this specific patch? I checked
commit message and I do not see references to asym_packing.

> the code here does not only affect asym_packing. If
> another architecture would have sched_ipcc_enabled() it would use this
> as generic policy, and that might not be desired.

Indeed, the patchset implements support to use IPCC classes for asym_packing,
but it is not limited to it.

It is true that I don't check here for asym_packing, but it should not be a
problem, IMO. I compare two runqueues with equal nr_running, either runqueue
is a good choice. This tie breaker is an overall improvement, no?

Thanks and BR,
Ricardo
  
Ionela Voinescu Dec. 14, 2022, 11:16 p.m. UTC | #3
Hi Ricardo,

On Tuesday 13 Dec 2022 at 16:32:43 (-0800), Ricardo Neri wrote:
[..]
> > >  /**
> > > @@ -10419,8 +10442,8 @@ static struct rq *find_busiest_queue(struct lb_env *env,
> > >  {
> > >  	struct rq *busiest = NULL, *rq;
> > >  	unsigned long busiest_util = 0, busiest_load = 0, busiest_capacity = 1;
> > > +	int i, busiest_ipcc_delta = INT_MIN;
> > >  	unsigned int busiest_nr = 0;
> > > -	int i;
> > >  
> > >  	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
> > >  		unsigned long capacity, load, util;
> > > @@ -10526,8 +10549,37 @@ static struct rq *find_busiest_queue(struct lb_env *env,
> > >  
> > >  		case migrate_task:
> > >  			if (busiest_nr < nr_running) {
> > > +				struct task_struct *curr;
> > > +
> > >  				busiest_nr = nr_running;
> > >  				busiest = rq;
> > > +
> > > +				/*
> > > +				 * Remember the IPC score delta of busiest::curr.
> > > +				 * We may need it to break a tie with other queues
> > > +				 * with equal nr_running.
> > > +				 */
> > > +				curr = rcu_dereference(busiest->curr);
> > > +				busiest_ipcc_delta = ipcc_score_delta(curr,
> > > +								      env->dst_cpu);
> > > +			/*
> > > +			 * If rq and busiest have the same number of running
> > > +			 * tasks, pick rq if doing so would give rq::curr a
> > > +			 * bigger IPC boost on dst_cpu.
> > > +			 */
> > > +			} else if (sched_ipcc_enabled() &&
> > > +				   busiest_nr == nr_running) {
> > > +				struct task_struct *curr;
> > > +				int delta;
> > > +
> > > +				curr = rcu_dereference(rq->curr);
> > > +				delta = ipcc_score_delta(curr, env->dst_cpu);
> > > +
> > > +				if (busiest_ipcc_delta < delta) {
> > > +					busiest_ipcc_delta = delta;
> > > +					busiest_nr = nr_running;
> > > +					busiest = rq;
> > > +				}
> > >  			}
> > >  			break;
> > >  
> > 
> > While in the commit message you describe this as breaking a tie for
> > asym_packing,
> 
> Are you referring to the overall series or this specific patch? I checked
> commit message and I do not see references to asym_packing.

Sorry, my bad, I was thinking about the cover letter, not the commit
message. It's under "+++ Balancing load using classes of tasks. Theory
of operation".

> 
> > the code here does not only affect asym_packing. If
> > another architecture would have sched_ipcc_enabled() it would use this
> > as generic policy, and that might not be desired.
> 
> Indeed, the patchset implements support to use IPCC classes for asym_packing,
> but it is not limited to it.
> 

So is your current intention to support IPC classes only for asym_packing
for now? What would be the impact on you if you were to limit the
functionality in this patch to asym_packing only?

> It is true that I don't check here for asym_packing, but it should not be a
> problem, IMO. I compare two runqueues with equal nr_running, either runqueue
> is a good choice. This tie breaker is an overall improvement, no?
> 

It could be, but equally there could be other better policies as well -
other ways to consider IPC class information to break the tie.

If other architectures start having sched_ipcc_enabled() they would
automatically use the policy you've decided on here. If other policies
are better for those architectures this generic policy would be difficult
to modify to ensure there are no regressions for all other architectures
that use it, or it would be difficult to work around it.

For this and for future support of IPC classes I am just wondering if we
can better design how we enable different architectures to have different
policies.

Thanks,
Ionela.

> Thanks and BR,
> Ricardo
  
Ricardo Neri Dec. 16, 2022, 11:24 p.m. UTC | #4
On Wed, Dec 14, 2022 at 11:16:39PM +0000, Ionela Voinescu wrote:
> Hi Ricardo,
> 
> On Tuesday 13 Dec 2022 at 16:32:43 (-0800), Ricardo Neri wrote:
> [..]
> > > >  /**
> > > > @@ -10419,8 +10442,8 @@ static struct rq *find_busiest_queue(struct lb_env *env,
> > > >  {
> > > >  	struct rq *busiest = NULL, *rq;
> > > >  	unsigned long busiest_util = 0, busiest_load = 0, busiest_capacity = 1;
> > > > +	int i, busiest_ipcc_delta = INT_MIN;
> > > >  	unsigned int busiest_nr = 0;
> > > > -	int i;
> > > >  
> > > >  	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
> > > >  		unsigned long capacity, load, util;
> > > > @@ -10526,8 +10549,37 @@ static struct rq *find_busiest_queue(struct lb_env *env,
> > > >  
> > > >  		case migrate_task:
> > > >  			if (busiest_nr < nr_running) {
> > > > +				struct task_struct *curr;
> > > > +
> > > >  				busiest_nr = nr_running;
> > > >  				busiest = rq;
> > > > +
> > > > +				/*
> > > > +				 * Remember the IPC score delta of busiest::curr.
> > > > +				 * We may need it to break a tie with other queues
> > > > +				 * with equal nr_running.
> > > > +				 */
> > > > +				curr = rcu_dereference(busiest->curr);
> > > > +				busiest_ipcc_delta = ipcc_score_delta(curr,
> > > > +								      env->dst_cpu);
> > > > +			/*
> > > > +			 * If rq and busiest have the same number of running
> > > > +			 * tasks, pick rq if doing so would give rq::curr a
> > > > +			 * bigger IPC boost on dst_cpu.
> > > > +			 */
> > > > +			} else if (sched_ipcc_enabled() &&
> > > > +				   busiest_nr == nr_running) {
> > > > +				struct task_struct *curr;
> > > > +				int delta;
> > > > +
> > > > +				curr = rcu_dereference(rq->curr);
> > > > +				delta = ipcc_score_delta(curr, env->dst_cpu);
> > > > +
> > > > +				if (busiest_ipcc_delta < delta) {
> > > > +					busiest_ipcc_delta = delta;
> > > > +					busiest_nr = nr_running;
> > > > +					busiest = rq;
> > > > +				}
> > > >  			}
> > > >  			break;
> > > >  
> > > 
> > > While in the commit message you describe this as breaking a tie for
> > > asym_packing,
> > 
> > Are you referring to the overall series or this specific patch? I checked
> > commit message and I do not see references to asym_packing.
> 
> Sorry, my bad, I was thinking about the cover letter, not the commit
> message. It's under "+++ Balancing load using classes of tasks. Theory
> of operation".
> 
> > 
> > > the code here does not only affect asym_packing. If
> > > another architecture would have sched_ipcc_enabled() it would use this
> > > as generic policy, and that might not be desired.
> > 
> > Indeed, the patchset implements support to use IPCC classes for asym_packing,
> > but it is not limited to it.
> > 
> 
> So is your current intention to support IPC classes only for asym_packing
> for now?

My intention is to introduce IPC classes in general and make it available
to other policies or architectures. I use asym_packing as use case.


> What would be the impact on you if you were to limit the
> functionality in this patch to asym_packing only?

There would not be any adverse impact.

> 
> > It is true that I don't check here for asym_packing, but it should not be a
> > problem, IMO. I compare two runqueues with equal nr_running, either runqueue
> > is a good choice. This tie breaker is an overall improvement, no?
> > 
> 
> It could be, but equally there could be other better policies as well -
> other ways to consider IPC class information to break the tie.
> 
> If other architectures start having sched_ipcc_enabled() they would
> automatically use the policy you've decided on here. If other policies
> are better for those architectures this generic policy would be difficult
> to modify to ensure there are no regressions for all other architectures
> that use it, or it would be difficult to work around it.
> 
> For this and for future support of IPC classes I am just wondering if we
> can better design how we enable different architectures to have different
> policies.

I see your point. I agree that other architectures may want to implement
policies differently. I'll add an extra check for env->sd & SD_ASYM_PACKING.

Thanks and BR,
Ricardo
  

Patch

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e8b181c31842..113470bbd7a5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9233,6 +9233,24 @@  static bool sched_asym_ipcc_pick(struct sched_group *a,
 	return sched_asym_ipcc_prefer(a_stats, b_stats);
 }
 
+/**
+ * ipcc_score_delta - Get the IPCC score delta on a different CPU
+ * @p:		A task
+ * @alt_cpu:	A prospective CPU to place @p
+ *
+ * Returns: The IPCC score delta that @p would get if placed on @alt_cpu
+ */
+static int ipcc_score_delta(struct task_struct *p, int alt_cpu)
+{
+	unsigned long ipcc = p->ipcc;
+
+	if (!sched_ipcc_enabled())
+		return INT_MIN;
+
+	return arch_get_ipcc_score(ipcc, alt_cpu) -
+	       arch_get_ipcc_score(ipcc, task_cpu(p));
+}
+
 #else /* CONFIG_IPC_CLASSES */
 static void update_sg_lb_ipcc_stats(struct sg_lb_ipcc_stats *sgcs,
 				    struct rq *rq)
@@ -9258,6 +9276,11 @@  static bool sched_asym_ipcc_pick(struct sched_group *a,
 	return false;
 }
 
+static int ipcc_score_delta(struct task_struct *p, int alt_cpu)
+{
+	return INT_MIN;
+}
+
 #endif /* CONFIG_IPC_CLASSES */
 
 /**
@@ -10419,8 +10442,8 @@  static struct rq *find_busiest_queue(struct lb_env *env,
 {
 	struct rq *busiest = NULL, *rq;
 	unsigned long busiest_util = 0, busiest_load = 0, busiest_capacity = 1;
+	int i, busiest_ipcc_delta = INT_MIN;
 	unsigned int busiest_nr = 0;
-	int i;
 
 	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
 		unsigned long capacity, load, util;
@@ -10526,8 +10549,37 @@  static struct rq *find_busiest_queue(struct lb_env *env,
 
 		case migrate_task:
 			if (busiest_nr < nr_running) {
+				struct task_struct *curr;
+
 				busiest_nr = nr_running;
 				busiest = rq;
+
+				/*
+				 * Remember the IPC score delta of busiest::curr.
+				 * We may need it to break a tie with other queues
+				 * with equal nr_running.
+				 */
+				curr = rcu_dereference(busiest->curr);
+				busiest_ipcc_delta = ipcc_score_delta(curr,
+								      env->dst_cpu);
+			/*
+			 * If rq and busiest have the same number of running
+			 * tasks, pick rq if doing so would give rq::curr a
+			 * bigger IPC boost on dst_cpu.
+			 */
+			} else if (sched_ipcc_enabled() &&
+				   busiest_nr == nr_running) {
+				struct task_struct *curr;
+				int delta;
+
+				curr = rcu_dereference(rq->curr);
+				delta = ipcc_score_delta(curr, env->dst_cpu);
+
+				if (busiest_ipcc_delta < delta) {
+					busiest_ipcc_delta = delta;
+					busiest_nr = nr_running;
+					busiest = rq;
+				}
 			}
 			break;