[v2,07/22] sched/fair: Compute IPC class scores for load balancing

Message ID 20221128132100.30253-8-ricardo.neri-calderon@linux.intel.com
State New
Headers
Series sched: Introduce IPC classes for load balance |

Commit Message

Ricardo Neri Nov. 28, 2022, 1:20 p.m. UTC
  Compute the joint total (both current and prospective) IPC class score of
a scheduling group and the local scheduling group.

These IPCC statistics are used during asym_packing load balancing. It
implies that the candidate sched group will have one fewer busy CPU after
load balancing. This observation is important for physical cores with
SMT support.

The IPCC score of scheduling groups composed of SMT siblings needs to
consider that the siblings share CPU resources. When computing the total
IPCC score of the scheduling group, divide score from each sibilng by
the number of busy siblings.

Cc: Ben Segall <bsegall@google.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim C. Chen <tim.c.chen@intel.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: x86@kernel.org
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
---
Changes since v1:
 * Implemented cleanups and reworks from PeterZ. I took all his
   suggestions, except the computation of the  IPC score before and after
   load balancing. We are computing not the average score, but the *total*.
 * Check for the SD_SHARE_CPUCAPACITY to compute the throughput of the SMT
   siblings of a physical core.
 * Used the new interface names.
 * Reworded commit message for clarity.
---
 kernel/sched/fair.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
  

Patch

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3a1d6c50a19b..e333f9623b3a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8766,6 +8766,10 @@  struct sg_lb_stats {
 	unsigned int nr_numa_running;
 	unsigned int nr_preferred_running;
 #endif
+#ifdef CONFIG_IPC_CLASSES
+	long ipcc_score_after; /* Prospective IPCC score after load balancing */
+	long ipcc_score_before; /* IPCC score before load balancing */
+#endif
 };
 
 /*
@@ -9140,6 +9144,38 @@  static void update_sg_lb_ipcc_stats(struct sg_lb_ipcc_stats *sgcs,
 	}
 }
 
+static void update_sg_lb_stats_scores(struct sg_lb_ipcc_stats *sgcs,
+				      struct sg_lb_stats *sgs,
+				      struct sched_group *sg,
+				      int dst_cpu)
+{
+	int busy_cpus, score_on_dst_cpu;
+	long before, after;
+
+	if (!sched_ipcc_enabled())
+		return;
+
+	busy_cpus = sgs->group_weight - sgs->idle_cpus;
+	/* No busy CPUs in the group. No tasks to move. */
+	if (!busy_cpus)
+		return;
+
+	score_on_dst_cpu = arch_get_ipcc_score(sgcs->min_ipcc, dst_cpu);
+
+	before = sgcs->sum_score;
+	after = before - sgcs->min_score;
+
+	/* SMT siblings share throughput. */
+	if (busy_cpus > 1 && sg->flags & SD_SHARE_CPUCAPACITY) {
+		before /= busy_cpus;
+		/* One sibling will become idle after load balance. */
+		after /= busy_cpus - 1;
+	}
+
+	sgs->ipcc_score_after = after + score_on_dst_cpu;
+	sgs->ipcc_score_before = before;
+}
+
 #else /* CONFIG_IPC_CLASSES */
 static void update_sg_lb_ipcc_stats(struct sg_lb_ipcc_stats *sgcs,
 				    struct rq *rq)
@@ -9149,6 +9185,14 @@  static void update_sg_lb_ipcc_stats(struct sg_lb_ipcc_stats *sgcs,
 static void init_rq_ipcc_stats(struct sg_lb_ipcc_stats *class_sgs)
 {
 }
+
+static void update_sg_lb_stats_scores(struct sg_lb_ipcc_stats *sgcs,
+				      struct sg_lb_stats *sgs,
+				      struct sched_group *sg,
+				      int dst_cpu)
+{
+}
+
 #endif /* CONFIG_IPC_CLASSES */
 
 /**
@@ -9329,6 +9373,7 @@  static inline void update_sg_lb_stats(struct lb_env *env,
 	if (!local_group && env->sd->flags & SD_ASYM_PACKING &&
 	    env->idle != CPU_NOT_IDLE && sgs->sum_h_nr_running &&
 	    sched_asym(env, sds, sgs, group)) {
+		update_sg_lb_stats_scores(&sgcs, sgs, group, env->dst_cpu);
 		sgs->group_asym_packing = 1;
 	}