[v2,1/2] sched/fair: Add EAS checks before updating overutilized

Message ID 20240228071621.602596-2-sshegde@linux.ibm.com
State New
Headers
Series sched/fair: Limit access to overutilized |

Commit Message

Shrikanth Hegde Feb. 28, 2024, 7:16 a.m. UTC
  Overutilized field of root domain is only used for EAS(energy aware scheduler)
to decide whether to do regular load balance or EAS aware load balance. It
is not used if EAS not possible.

Currently enqueue_task_fair and task_tick_fair accesses, sometime updates
this field. In update_sd_lb_stats it is updated often.
Which causes cache contention due to load/store tearing and burns
a lot of cycles. Hence add EAS check before updating this field.
EAS check is optimized at compile time or it is static branch.
Hence it shouldn't cost much.

With the patch, both enqueue_task_fair and newidle_balance don't show
up as hot routines in perf profile.

6.8-rc4:
7.18%  swapper          [kernel.vmlinux]              [k] enqueue_task_fair
6.78%  s                [kernel.vmlinux]              [k] newidle_balance
+patch:
0.14%  swapper          [kernel.vmlinux]              [k] enqueue_task_fair
0.00%  swapper          [kernel.vmlinux]              [k] newidle_balance

Minor change; trace_sched_overutilized_tp expect that second argument to
be bool. So do a int to bool conversion for that.

Fixes: 2802bf3cd936 ("sched/fair: Add over-utilization/tipping point indicator")
Signed-off-by: Shrikanth Hegde <sshegde@linux.ibm.com>
---
 kernel/sched/fair.c | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

--
2.39.3
  

Comments

Pierre Gondois Feb. 28, 2024, 3:58 p.m. UTC | #1
It is nice to avoid calling effective_cpu_util() through the following
when EAS is not enabled:

cpu_overutilized()
\-util_fits_cpu()
   \- ...
     \-effective_cpu_util()

On 2/28/24 08:16, Shrikanth Hegde wrote:
> Overutilized field of root domain is only used for EAS(energy aware scheduler)
> to decide whether to do regular load balance or EAS aware load balance. It
> is not used if EAS not possible.
> 
> Currently enqueue_task_fair and task_tick_fair accesses, sometime updates
> this field. In update_sd_lb_stats it is updated often.
> Which causes cache contention due to load/store tearing and burns
> a lot of cycles. Hence add EAS check before updating this field.
> EAS check is optimized at compile time or it is static branch.
> Hence it shouldn't cost much.
> 
> With the patch, both enqueue_task_fair and newidle_balance don't show
> up as hot routines in perf profile.
> 
> 6.8-rc4:
> 7.18%  swapper          [kernel.vmlinux]              [k] enqueue_task_fair
> 6.78%  s                [kernel.vmlinux]              [k] newidle_balance
> +patch:
> 0.14%  swapper          [kernel.vmlinux]              [k] enqueue_task_fair
> 0.00%  swapper          [kernel.vmlinux]              [k] newidle_balance
> 
> Minor change; trace_sched_overutilized_tp expect that second argument to
> be bool. So do a int to bool conversion for that.
> 
> Fixes: 2802bf3cd936 ("sched/fair: Add over-utilization/tipping point indicator")
> Signed-off-by: Shrikanth Hegde <sshegde@linux.ibm.com>
> ---
>   kernel/sched/fair.c | 35 ++++++++++++++++++++++++-----------
>   1 file changed, 24 insertions(+), 11 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 8e30e2bb77a0..3105fb08b87e 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6670,15 +6670,30 @@ static inline bool cpu_overutilized(int cpu)
>   	return !util_fits_cpu(cpu_util_cfs(cpu), rq_util_min, rq_util_max, cpu);
>   }
> 
> -static inline void update_overutilized_status(struct rq *rq)
> +static inline void update_rd_overutilized_status(struct root_domain *rd,
> +						 int status)
>   {
> -	if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) {
> -		WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED);
> -		trace_sched_overutilized_tp(rq->rd, SG_OVERUTILIZED);
> +	if (sched_energy_enabled()) {
> +		WRITE_ONCE(rd->overutilized, status);
> +		trace_sched_overutilized_tp(rd, !!status);
> +	}
> +}

NIT:
When called from check_update_overutilized_status(),
sched_energy_enabled() will be checked twice.

> +
> +static inline void check_update_overutilized_status(struct rq *rq)
> +{
> +	/*
> +	 * overutilized field is used for load balancing decisions only
> +	 * if energy aware scheduler is being used
> +	 */
> +	if (sched_energy_enabled()) {
> +		if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu))
> +			update_rd_overutilized_status(rq->rd, SG_OVERUTILIZED);
>   	}
>   }
>   #else
> -static inline void update_overutilized_status(struct rq *rq) { }
> +static inline void check_update_overutilized_status(struct rq *rq) { }
> +static inline void update_rd_overutilized_status(struct root_domain *rd,
> +						 bool status) { }
>   #endif
> 
>   /* Runqueue only has SCHED_IDLE tasks enqueued */
> @@ -6779,7 +6794,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>   	 * and the following generally works well enough in practice.
>   	 */
>   	if (!task_new)
> -		update_overutilized_status(rq);
> +		check_update_overutilized_status(rq);
> 
>   enqueue_throttle:
>   	assert_list_leaf_cfs_rq(rq);
> @@ -10613,13 +10628,11 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
>   		WRITE_ONCE(rd->overload, sg_status & SG_OVERLOAD);
> 
>   		/* Update over-utilization (tipping point, U >= 0) indicator */
> -		WRITE_ONCE(rd->overutilized, sg_status & SG_OVERUTILIZED);
> -		trace_sched_overutilized_tp(rd, sg_status & SG_OVERUTILIZED);
> +		update_rd_overutilized_status(rd, sg_status & SG_OVERUTILIZED);
>   	} else if (sg_status & SG_OVERUTILIZED) {
>   		struct root_domain *rd = env->dst_rq->rd;
> 
> -		WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED);
> -		trace_sched_overutilized_tp(rd, SG_OVERUTILIZED);
> +		update_rd_overutilized_status(rd, SG_OVERUTILIZED);
>   	}
> 
>   	update_idle_cpu_scan(env, sum_util);
> @@ -12625,7 +12638,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
>   		task_tick_numa(rq, curr);
> 
>   	update_misfit_status(curr, rq);
> -	update_overutilized_status(task_rq(curr));
> +	check_update_overutilized_status(task_rq(curr));
> 
>   	task_tick_core(rq, curr);
>   }
> --
> 2.39.3
>
  
Shrikanth Hegde Feb. 28, 2024, 5:24 p.m. UTC | #2
On 2/28/24 9:28 PM, Pierre Gondois wrote:

Hi Pierre, Thanks for taking a look.

> It is nice to avoid calling effective_cpu_util() through the following
> when EAS is not enabled:
> I think we are avoiding calling cpu_overutilized except in update_sg_lb_stats. 
I didnt want to put a EAS check in cpu_overutilized as it could be useful 
function in non-EAS cases in future. calling cpu_overutilized alone doesnt 
do any access to root_domain's overutilized field. So we are okay w.r.t to 
cache issues. 
But we will do some extra computation currently and then not use it if it 
Non-EAS case in update_sg_lb_stats

Would something like this makes sense?
@@ -9925,7 +9925,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
                if (nr_running > 1)
                        *sg_status |= SG_OVERLOAD;
 
-               if (cpu_overutilized(i))
+               if (sched_energy_enabled() && cpu_overutilized(i))
                        *sg_status |= SG_OVERUTILIZED;
 



I didnt find how would util_fits_cpu ends up calling effective_cpu_util. 
Could you please elaborate? 

> cpu_overutilized()
> \-util_fits_cpu()
>   \- ...
>     \-effective_cpu_util()
> 
> On 2/28/24 08:16, Shrikanth Hegde wrote:
>> Overutilized field of root domain is only used for EAS(energy aware
>> scheduler)
>> to decide whether to do regular load balance or EAS aware load
>> balance. It
>> is not used if EAS not possible.
>>
>> Currently enqueue_task_fair and task_tick_fair accesses, sometime updates
>> this field. In update_sd_lb_stats it is updated often.
>> Which causes cache contention due to load/store tearing and burns
>> a lot of cycles. Hence add EAS check before updating this field.
>> EAS check is optimized at compile time or it is static branch.
>> Hence it shouldn't cost much.
>>
>> With the patch, both enqueue_task_fair and newidle_balance don't show
>> up as hot routines in perf profile.
>>
>> 6.8-rc4:
>> 7.18%  swapper          [kernel.vmlinux]              [k]
>> enqueue_task_fair
>> 6.78%  s                [kernel.vmlinux]              [k] newidle_balance
>> +patch:
>> 0.14%  swapper          [kernel.vmlinux]              [k]
>> enqueue_task_fair
>> 0.00%  swapper          [kernel.vmlinux]              [k] newidle_balance
>>
>> Minor change; trace_sched_overutilized_tp expect that second argument to
>> be bool. So do a int to bool conversion for that.
>>
>> Fixes: 2802bf3cd936 ("sched/fair: Add over-utilization/tipping point
>> indicator")
>> Signed-off-by: Shrikanth Hegde <sshegde@linux.ibm.com>
>> ---
>>   kernel/sched/fair.c | 35 ++++++++++++++++++++++++-----------
>>   1 file changed, 24 insertions(+), 11 deletions(-)
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index 8e30e2bb77a0..3105fb08b87e 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -6670,15 +6670,30 @@ static inline bool cpu_overutilized(int cpu)
>>       return !util_fits_cpu(cpu_util_cfs(cpu), rq_util_min,
>> rq_util_max, cpu);
>>   }
>>
>> -static inline void update_overutilized_status(struct rq *rq)
>> +static inline void update_rd_overutilized_status(struct root_domain *rd,
>> +                         int status)
>>   {
>> -    if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) {
>> -        WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED);
>> -        trace_sched_overutilized_tp(rq->rd, SG_OVERUTILIZED);
>> +    if (sched_energy_enabled()) {
>> +        WRITE_ONCE(rd->overutilized, status);
>> +        trace_sched_overutilized_tp(rd, !!status);
>> +    }
>> +}
> 
> NIT:
> When called from check_update_overutilized_status(),
> sched_energy_enabled() will be checked twice.
Yes. 
But, I think that's okay since it is a static branch check at best. 
This way it keeps the code simpler. 

> 
>> +
>> +static inline void check_update_overutilized_status(struct rq *rq)
>> +{
>> +    /*
>> +     * overutilized field is used for load balancing decisions only
>> +     * if energy aware scheduler is being used
>> +     */
>> +    if (sched_energy_enabled()) {
>> +        if (!READ_ONCE(rq->rd->overutilized) &&
>> cpu_overutilized(rq->cpu))
>> +            update_rd_overutilized_status(rq->rd, SG_OVERUTILIZED);
>>       }
>>   }
>>   #else
>> -static inline void update_overutilized_status(struct rq *rq) { }
>> +static inline void check_update_overutilized_status(struct rq *rq) { }
>> +static inline void update_rd_overutilized_status(struct root_domain *rd,
>> +                         bool status) { }
>>   #endif
>>
>>   /* Runqueue only has SCHED_IDLE tasks enqueued */
>> @@ -6779,7 +6794,7 @@ enqueue_task_fair(struct rq *rq, struct
>> task_struct *p, int flags)
>>        * and the following generally works well enough in practice.
>>        */
>>       if (!task_new)
>> -        update_overutilized_status(rq);
>> +        check_update_overutilized_status(rq);
>>
>>   enqueue_throttle:
>>       assert_list_leaf_cfs_rq(rq);
>> @@ -10613,13 +10628,11 @@ static inline void update_sd_lb_stats(struct
>> lb_env *env, struct sd_lb_stats *sd
>>           WRITE_ONCE(rd->overload, sg_status & SG_OVERLOAD);
>>
>>           /* Update over-utilization (tipping point, U >= 0) indicator */
>> -        WRITE_ONCE(rd->overutilized, sg_status & SG_OVERUTILIZED);
>> -        trace_sched_overutilized_tp(rd, sg_status & SG_OVERUTILIZED);
>> +        update_rd_overutilized_status(rd, sg_status & SG_OVERUTILIZED);
>>       } else if (sg_status & SG_OVERUTILIZED) {
>>           struct root_domain *rd = env->dst_rq->rd;
>>
>> -        WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED);
>> -        trace_sched_overutilized_tp(rd, SG_OVERUTILIZED);
>> +        update_rd_overutilized_status(rd, SG_OVERUTILIZED);
>>       }
>>
>>       update_idle_cpu_scan(env, sum_util);
>> @@ -12625,7 +12638,7 @@ static void task_tick_fair(struct rq *rq,
>> struct task_struct *curr, int queued)
>>           task_tick_numa(rq, curr);
>>
>>       update_misfit_status(curr, rq);
>> -    update_overutilized_status(task_rq(curr));
>> +    check_update_overutilized_status(task_rq(curr));
>>
>>       task_tick_core(rq, curr);
>>   }
>> -- 
>> 2.39.3
>>
  
Dietmar Eggemann Feb. 28, 2024, 11:34 p.m. UTC | #3
On 28/02/2024 18:24, Shrikanth Hegde wrote:
> 
> 
> On 2/28/24 9:28 PM, Pierre Gondois wrote:

[...]

> But we will do some extra computation currently and then not use it if it 
> Non-EAS case in update_sg_lb_stats
> 
> Would something like this makes sense?
> @@ -9925,7 +9925,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
>                 if (nr_running > 1)
>                         *sg_status |= SG_OVERLOAD;
>  
> -               if (cpu_overutilized(i))
> +               if (sched_energy_enabled() && cpu_overutilized(i))
>                         *sg_status |= SG_OVERUTILIZED;

Yes, we could also disable the setting of OU in load_balance in the none
!EAS case.

[...]

>> NIT:
>> When called from check_update_overutilized_status(),
>> sched_energy_enabled() will be checked twice.
> Yes. 
> But, I think that's okay since it is a static branch check at best. 
> This way it keeps the code simpler. 

You could keep the ched_energy_enabled() outside of the new
set_overutilized_status() to avoid this:

-->8--

---
 kernel/sched/fair.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 32bc98d9123d..c82164bf45f3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6676,12 +6676,19 @@ static inline bool cpu_overutilized(int cpu)
 	return !util_fits_cpu(cpu_util_cfs(cpu), rq_util_min, rq_util_max, cpu);
 }
 
+static inline void set_overutilized_status(struct rq *rq, unsigned int val)
+{
+	WRITE_ONCE(rq->rd->overutilized, val);
+	trace_sched_overutilized_tp(rq->rd, val);
+}
+
 static inline void update_overutilized_status(struct rq *rq)
 {
-	if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) {
-		WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED);
-		trace_sched_overutilized_tp(rq->rd, SG_OVERUTILIZED);
-	}
+	if (!sched_energy_enabled())
+		return;
+
+	if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu))
+		set_overutilized_status(rq, SG_OVERUTILIZED);
 }
 #else
 static inline void update_overutilized_status(struct rq *rq) { }
@@ -10755,19 +10762,16 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 		env->fbq_type = fbq_classify_group(&sds->busiest_stat);
 
 	if (!env->sd->parent) {
-		struct root_domain *rd = env->dst_rq->rd;
-
 		/* update overload indicator if we are at root domain */
-		WRITE_ONCE(rd->overload, sg_status & SG_OVERLOAD);
+		WRITE_ONCE(env->dst_rq->rd->overload, sg_status & SG_OVERLOAD);
 
 		/* Update over-utilization (tipping point, U >= 0) indicator */
-		WRITE_ONCE(rd->overutilized, sg_status & SG_OVERUTILIZED);
-		trace_sched_overutilized_tp(rd, sg_status & SG_OVERUTILIZED);
-	} else if (sg_status & SG_OVERUTILIZED) {
-		struct root_domain *rd = env->dst_rq->rd;
-
-		WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED);
-		trace_sched_overutilized_tp(rd, SG_OVERUTILIZED);
+		if (sched_energy_enabled()) {
+			set_overutilized_status(env->dst_rq,
+						sg_status & SG_OVERUTILIZED);
+		}
+	} else if (sched_energy_enabled() && sg_status & SG_OVERUTILIZED) {
+		set_overutilized_status(env->dst_rq, SG_OVERUTILIZED);
 	}
 
 	update_idle_cpu_scan(env, sum_util);
  
Shrikanth Hegde Feb. 29, 2024, 4:30 a.m. UTC | #4
On 2/29/24 5:04 AM, Dietmar Eggemann wrote:
> On 28/02/2024 18:24, Shrikanth Hegde wrote:
> 

Thank you Dietmar, for taking a look. 

> [...]
> 
>> But we will do some extra computation currently and then not use it if it 
>> Non-EAS case in update_sg_lb_stats
>>
>> Would something like this makes sense?
>> @@ -9925,7 +9925,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
>>                 if (nr_running > 1)
>>                         *sg_status |= SG_OVERLOAD;
>>  
>> -               if (cpu_overutilized(i))
>> +               if (sched_energy_enabled() && cpu_overutilized(i))
>>                         *sg_status |= SG_OVERUTILIZED;
> 
> Yes, we could also disable the setting of OU in load_balance in the none
> !EAS case.
> 
> [...]

Ok. I will add this change. I don't see any other place where we need to do EAS 
check w.r.t to overutilized. This should cover all cases then. 

> 
>>> NIT:
>>> When called from check_update_overutilized_status(),
>>> sched_energy_enabled() will be checked twice.
>> Yes. 
>> But, I think that's okay since it is a static branch check at best. 
>> This way it keeps the code simpler. 
> 
> You could keep the ched_energy_enabled() outside of the new
> set_overutilized_status() to avoid this:
> 
> -->8--

Ok. We can do this as well.  I will incorporate this and send out v3 soon. 


> 
> ---
>  kernel/sched/fair.c | 32 ++++++++++++++++++--------------
>  1 file changed, 18 insertions(+), 14 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 32bc98d9123d..c82164bf45f3 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6676,12 +6676,19 @@ static inline bool cpu_overutilized(int cpu)
>  	return !util_fits_cpu(cpu_util_cfs(cpu), rq_util_min, rq_util_max, cpu);
>  }
>  
> +static inline void set_overutilized_status(struct rq *rq, unsigned int val)
> +{
> +	WRITE_ONCE(rq->rd->overutilized, val);
> +	trace_sched_overutilized_tp(rq->rd, val);
> +}
> +
>  static inline void update_overutilized_status(struct rq *rq)
>  {
> -	if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) {
> -		WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED);
> -		trace_sched_overutilized_tp(rq->rd, SG_OVERUTILIZED);
> -	}
> +	if (!sched_energy_enabled())
> +		return;
> +
> +	if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu))
> +		set_overutilized_status(rq, SG_OVERUTILIZED);
>  }
>  #else
>  static inline void update_overutilized_status(struct rq *rq) { }
> @@ -10755,19 +10762,16 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
>  		env->fbq_type = fbq_classify_group(&sds->busiest_stat);
>  
>  	if (!env->sd->parent) {
> -		struct root_domain *rd = env->dst_rq->rd;
> -
>  		/* update overload indicator if we are at root domain */
> -		WRITE_ONCE(rd->overload, sg_status & SG_OVERLOAD);
> +		WRITE_ONCE(env->dst_rq->rd->overload, sg_status & SG_OVERLOAD);
>  
>  		/* Update over-utilization (tipping point, U >= 0) indicator */
> -		WRITE_ONCE(rd->overutilized, sg_status & SG_OVERUTILIZED);
> -		trace_sched_overutilized_tp(rd, sg_status & SG_OVERUTILIZED);
> -	} else if (sg_status & SG_OVERUTILIZED) {
> -		struct root_domain *rd = env->dst_rq->rd;
> -
> -		WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED);
> -		trace_sched_overutilized_tp(rd, SG_OVERUTILIZED);
> +		if (sched_energy_enabled()) {
> +			set_overutilized_status(env->dst_rq,
> +						sg_status & SG_OVERUTILIZED);
> +		}
> +	} else if (sched_energy_enabled() && sg_status & SG_OVERUTILIZED) {
> +		set_overutilized_status(env->dst_rq, SG_OVERUTILIZED);
>  	}
>  
>  	update_idle_cpu_scan(env, sum_util);
  
Pierre Gondois Feb. 29, 2024, 9:11 a.m. UTC | #5
Hello Shrikanth,

On 2/28/24 18:24, Shrikanth Hegde wrote:
> 
> 
> On 2/28/24 9:28 PM, Pierre Gondois wrote:
> 
> Hi Pierre, Thanks for taking a look.
> 
>> It is nice to avoid calling effective_cpu_util() through the following
>> when EAS is not enabled:
>> I think we are avoiding calling cpu_overutilized except in update_sg_lb_stats.
> I didnt want to put a EAS check in cpu_overutilized as it could be useful
> function in non-EAS cases in future. calling cpu_overutilized alone doesnt
> do any access to root_domain's overutilized field. So we are okay w.r.t to
> cache issues.
> But we will do some extra computation currently and then not use it if it
> Non-EAS case in update_sg_lb_stats
> 
> Would something like this makes sense?
> @@ -9925,7 +9925,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
>                  if (nr_running > 1)
>                          *sg_status |= SG_OVERLOAD;
>   
> -               if (cpu_overutilized(i))
> +               if (sched_energy_enabled() && cpu_overutilized(i))
>                          *sg_status |= SG_OVERUTILIZED;
>   

Yes right. I think that what Dietmar suggested is also a good idea
which could be used instead.

> 
> 
> 
> I didnt find how would util_fits_cpu ends up calling effective_cpu_util.
> Could you please elaborate?

Sorry I meant this path:
cpu_overutilized()
\-cpu_util_cfs()
   \-cpu_util()

effective_cpu_util() is effectively not involved.

> 
>> cpu_overutilized()
>> \-util_fits_cpu()
>>    \- ...
>>      \-effective_cpu_util()
>>

[snip]

Regards,
Pierre
  

Patch

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8e30e2bb77a0..3105fb08b87e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6670,15 +6670,30 @@  static inline bool cpu_overutilized(int cpu)
 	return !util_fits_cpu(cpu_util_cfs(cpu), rq_util_min, rq_util_max, cpu);
 }

-static inline void update_overutilized_status(struct rq *rq)
+static inline void update_rd_overutilized_status(struct root_domain *rd,
+						 int status)
 {
-	if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) {
-		WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED);
-		trace_sched_overutilized_tp(rq->rd, SG_OVERUTILIZED);
+	if (sched_energy_enabled()) {
+		WRITE_ONCE(rd->overutilized, status);
+		trace_sched_overutilized_tp(rd, !!status);
+	}
+}
+
+static inline void check_update_overutilized_status(struct rq *rq)
+{
+	/*
+	 * overutilized field is used for load balancing decisions only
+	 * if energy aware scheduler is being used
+	 */
+	if (sched_energy_enabled()) {
+		if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu))
+			update_rd_overutilized_status(rq->rd, SG_OVERUTILIZED);
 	}
 }
 #else
-static inline void update_overutilized_status(struct rq *rq) { }
+static inline void check_update_overutilized_status(struct rq *rq) { }
+static inline void update_rd_overutilized_status(struct root_domain *rd,
+						 bool status) { }
 #endif

 /* Runqueue only has SCHED_IDLE tasks enqueued */
@@ -6779,7 +6794,7 @@  enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	 * and the following generally works well enough in practice.
 	 */
 	if (!task_new)
-		update_overutilized_status(rq);
+		check_update_overutilized_status(rq);

 enqueue_throttle:
 	assert_list_leaf_cfs_rq(rq);
@@ -10613,13 +10628,11 @@  static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 		WRITE_ONCE(rd->overload, sg_status & SG_OVERLOAD);

 		/* Update over-utilization (tipping point, U >= 0) indicator */
-		WRITE_ONCE(rd->overutilized, sg_status & SG_OVERUTILIZED);
-		trace_sched_overutilized_tp(rd, sg_status & SG_OVERUTILIZED);
+		update_rd_overutilized_status(rd, sg_status & SG_OVERUTILIZED);
 	} else if (sg_status & SG_OVERUTILIZED) {
 		struct root_domain *rd = env->dst_rq->rd;

-		WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED);
-		trace_sched_overutilized_tp(rd, SG_OVERUTILIZED);
+		update_rd_overutilized_status(rd, SG_OVERUTILIZED);
 	}

 	update_idle_cpu_scan(env, sum_util);
@@ -12625,7 +12638,7 @@  static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 		task_tick_numa(rq, curr);

 	update_misfit_status(curr, rq);
-	update_overutilized_status(task_rq(curr));
+	check_update_overutilized_status(task_rq(curr));

 	task_tick_core(rq, curr);
 }