[1/2] cpufreq/schedutil: Add fast_switch callback check

Message ID 20230410095250.14908-1-wyes.karny@amd.com
State New
Headers
Series cpufreq/schedutil: Fix null pointer dereference in sugov_update_single_freq |

Commit Message

Wyes Karny April 10, 2023, 9:52 a.m. UTC
  The set value of `fast_switch_enabled` flag doesn't guarantee that
fast_switch callback is set. For some drivers such as amd_pstate, the
adjust_perf callback is used but it still sets `fast_switch_possible`
flag. This is not wrong because this flag doesn't imply fast_switch
callback is set, it implies whether the driver can guarantee that
frequency can be changed on any CPU sharing the policy and that the
change will affect all of the policy CPUs without the need to send any
IPIs or issue callbacks from the notifier chain.  Therefore add an extra
NULL check before calling fast_switch in sugov_update_single_freq
function.

Ideally `sugov_update_single_freq` function should not be called with
amd_pstate. But in a corner case scenario, when aperf/mperf overflow
occurs, kernel disables frequency invariance calculation which causes
schedutil to fallback to sugov_update_single_freq which currently relies
on the fast_switch callback.

Normal flow:
  sugov_update_single_perf
    cpufreq_driver_adjust_perf
      cpufreq_driver->adjust_perf

Error case flow:
  sugov_update_single_perf
    sugov_update_single_freq  <-- This is chosen because the freq invariant is disabled due to aperf/mperf overflow
      cpufreq_driver_fast_switch
         cpufreq_driver->fast_switch <-- Here NULL pointer dereference is happening, because fast_switch is not set

Fix this NULL pointer dereference issue by doing a NULL check.

Fixes: a61dec744745 ("cpufreq: schedutil: Avoid missing updates for one-CPU policies")
Signed-off-by: Wyes Karny <wyes.karny@amd.com>

Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/cpufreq/cpufreq.c        | 11 +++++++++++
 include/linux/cpufreq.h          |  1 +
 kernel/sched/cpufreq_schedutil.c |  2 +-
 3 files changed, 13 insertions(+), 1 deletion(-)
  

Comments

Rafael J. Wysocki April 11, 2023, 3:55 p.m. UTC | #1
On Mon, Apr 10, 2023 at 11:53 AM Wyes Karny <wyes.karny@amd.com> wrote:
>
> The set value of `fast_switch_enabled` flag doesn't guarantee that
> fast_switch callback is set. For some drivers such as amd_pstate, the
> adjust_perf callback is used but it still sets `fast_switch_possible`
> flag. This is not wrong because this flag doesn't imply fast_switch
> callback is set, it implies whether the driver can guarantee that
> frequency can be changed on any CPU sharing the policy and that the
> change will affect all of the policy CPUs without the need to send any
> IPIs or issue callbacks from the notifier chain.  Therefore add an extra
> NULL check before calling fast_switch in sugov_update_single_freq
> function.
>
> Ideally `sugov_update_single_freq` function should not be called with
> amd_pstate. But in a corner case scenario, when aperf/mperf overflow
> occurs, kernel disables frequency invariance calculation which causes
> schedutil to fallback to sugov_update_single_freq which currently relies
> on the fast_switch callback.

Yes, it does.  Which is why that callback must be provided if the
driver sets fast_switch_enabled.

Overall, adjust_perf is optional, but fast_switch_enabled can only be
set if fast_switch is actually present.

Please fix the driver.

>
> Normal flow:
>   sugov_update_single_perf
>     cpufreq_driver_adjust_perf
>       cpufreq_driver->adjust_perf
>
> Error case flow:
>   sugov_update_single_perf
>     sugov_update_single_freq  <-- This is chosen because the freq invariant is disabled due to aperf/mperf overflow
>       cpufreq_driver_fast_switch
>          cpufreq_driver->fast_switch <-- Here NULL pointer dereference is happening, because fast_switch is not set
>
> Fix this NULL pointer dereference issue by doing a NULL check.
>
> Fixes: a61dec744745 ("cpufreq: schedutil: Avoid missing updates for one-CPU policies")
> Signed-off-by: Wyes Karny <wyes.karny@amd.com>
>
> Cc: "Rafael J. Wysocki" <rafael@kernel.org>
> Cc: stable@vger.kernel.org
> ---
>  drivers/cpufreq/cpufreq.c        | 11 +++++++++++
>  include/linux/cpufreq.h          |  1 +
>  kernel/sched/cpufreq_schedutil.c |  2 +-
>  3 files changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 6d8fd3b8dcb5..364d31b55380 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -2138,6 +2138,17 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
>  }
>  EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
>
> +/**
> + * cpufreq_driver_has_fast_switch - Check "fast switch" callback.
> + *
> + * Return 'true' if the ->fast_switch callback is present for the
> + * current driver or 'false' otherwise.
> + */
> +bool cpufreq_driver_has_fast_switch(void)
> +{
> +       return !!cpufreq_driver->fast_switch;
> +}
> +
>  /**
>   * cpufreq_driver_adjust_perf - Adjust CPU performance level in one go.
>   * @cpu: Target CPU.
> diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> index 65623233ab2f..8a9286fc718b 100644
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -604,6 +604,7 @@ struct cpufreq_governor {
>  /* Pass a target to the cpufreq driver */
>  unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
>                                         unsigned int target_freq);
> +bool cpufreq_driver_has_fast_switch(void);
>  void cpufreq_driver_adjust_perf(unsigned int cpu,
>                                 unsigned long min_perf,
>                                 unsigned long target_perf,
> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
> index e3211455b203..a1c449525ac2 100644
> --- a/kernel/sched/cpufreq_schedutil.c
> +++ b/kernel/sched/cpufreq_schedutil.c
> @@ -364,7 +364,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
>          * concurrently on two different CPUs for the same target and it is not
>          * necessary to acquire the lock in the fast switch case.
>          */
> -       if (sg_policy->policy->fast_switch_enabled) {
> +       if (sg_policy->policy->fast_switch_enabled && cpufreq_driver_has_fast_switch()) {
>                 cpufreq_driver_fast_switch(sg_policy->policy, next_f);
>         } else {
>                 raw_spin_lock(&sg_policy->update_lock);
> --
> 2.34.1
>
  
Wyes Karny April 12, 2023, 6:08 a.m. UTC | #2
Hi Rafael,

On 11 Apr 17:55, Rafael J. Wysocki wrote:
> On Mon, Apr 10, 2023 at 11:53 AM Wyes Karny <wyes.karny@amd.com> wrote:
> >
> > The set value of `fast_switch_enabled` flag doesn't guarantee that
> > fast_switch callback is set. For some drivers such as amd_pstate, the
> > adjust_perf callback is used but it still sets `fast_switch_possible`
> > flag. This is not wrong because this flag doesn't imply fast_switch
> > callback is set, it implies whether the driver can guarantee that
> > frequency can be changed on any CPU sharing the policy and that the
> > change will affect all of the policy CPUs without the need to send any
> > IPIs or issue callbacks from the notifier chain.  Therefore add an extra
> > NULL check before calling fast_switch in sugov_update_single_freq
> > function.
> >
> > Ideally `sugov_update_single_freq` function should not be called with
> > amd_pstate. But in a corner case scenario, when aperf/mperf overflow
> > occurs, kernel disables frequency invariance calculation which causes
> > schedutil to fallback to sugov_update_single_freq which currently relies
> > on the fast_switch callback.
> 
> Yes, it does.  Which is why that callback must be provided if the
> driver sets fast_switch_enabled.
> 
> Overall, adjust_perf is optional, but fast_switch_enabled can only be
> set if fast_switch is actually present.

Then should the below logic be changed in sugov_start function?

'''
        else if (policy->fast_switch_enabled && cpufreq_driver_has_adjust_perf())
                 uu = sugov_update_single_perf;
'''

This logic restricts the selection of adjust_perf function based on
fast_switch_enabled flag. If this fast_switch_enabled check is removed
then amd_pstate driver can disable this flag and shedutil can select
adjust_perf without this dependency.

Thanks,
Wyes
> 
> Please fix the driver.
> 
> >
> > Normal flow:
> >   sugov_update_single_perf
> >     cpufreq_driver_adjust_perf
> >       cpufreq_driver->adjust_perf
> >
> > Error case flow:
> >   sugov_update_single_perf
> >     sugov_update_single_freq  <-- This is chosen because the freq invariant is disabled due to aperf/mperf overflow
> >       cpufreq_driver_fast_switch
> >          cpufreq_driver->fast_switch <-- Here NULL pointer dereference is happening, because fast_switch is not set
> >
> > Fix this NULL pointer dereference issue by doing a NULL check.
> >
> > Fixes: a61dec744745 ("cpufreq: schedutil: Avoid missing updates for one-CPU policies")
> > Signed-off-by: Wyes Karny <wyes.karny@amd.com>
> >
> > Cc: "Rafael J. Wysocki" <rafael@kernel.org>
> > Cc: stable@vger.kernel.org
> > ---
> >  drivers/cpufreq/cpufreq.c        | 11 +++++++++++
> >  include/linux/cpufreq.h          |  1 +
> >  kernel/sched/cpufreq_schedutil.c |  2 +-
> >  3 files changed, 13 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> > index 6d8fd3b8dcb5..364d31b55380 100644
> > --- a/drivers/cpufreq/cpufreq.c
> > +++ b/drivers/cpufreq/cpufreq.c
> > @@ -2138,6 +2138,17 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
> >  }
> >  EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
> >
> > +/**
> > + * cpufreq_driver_has_fast_switch - Check "fast switch" callback.
> > + *
> > + * Return 'true' if the ->fast_switch callback is present for the
> > + * current driver or 'false' otherwise.
> > + */
> > +bool cpufreq_driver_has_fast_switch(void)
> > +{
> > +       return !!cpufreq_driver->fast_switch;
> > +}
> > +
> >  /**
> >   * cpufreq_driver_adjust_perf - Adjust CPU performance level in one go.
> >   * @cpu: Target CPU.
> > diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> > index 65623233ab2f..8a9286fc718b 100644
> > --- a/include/linux/cpufreq.h
> > +++ b/include/linux/cpufreq.h
> > @@ -604,6 +604,7 @@ struct cpufreq_governor {
> >  /* Pass a target to the cpufreq driver */
> >  unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
> >                                         unsigned int target_freq);
> > +bool cpufreq_driver_has_fast_switch(void);
> >  void cpufreq_driver_adjust_perf(unsigned int cpu,
> >                                 unsigned long min_perf,
> >                                 unsigned long target_perf,
> > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
> > index e3211455b203..a1c449525ac2 100644
> > --- a/kernel/sched/cpufreq_schedutil.c
> > +++ b/kernel/sched/cpufreq_schedutil.c
> > @@ -364,7 +364,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
> >          * concurrently on two different CPUs for the same target and it is not
> >          * necessary to acquire the lock in the fast switch case.
> >          */
> > -       if (sg_policy->policy->fast_switch_enabled) {
> > +       if (sg_policy->policy->fast_switch_enabled && cpufreq_driver_has_fast_switch()) {
> >                 cpufreq_driver_fast_switch(sg_policy->policy, next_f);
> >         } else {
> >                 raw_spin_lock(&sg_policy->update_lock);
> > --
> > 2.34.1
> >
  
Wyes Karny April 24, 2023, 6:03 a.m. UTC | #3
Hi Rafael,

On 11 Apr 17:55, Rafael J. Wysocki wrote:
> On Mon, Apr 10, 2023 at 11:53 AM Wyes Karny <wyes.karny@amd.com> wrote:
> >
> > The set value of `fast_switch_enabled` flag doesn't guarantee that
> > fast_switch callback is set. For some drivers such as amd_pstate, the
> > adjust_perf callback is used but it still sets `fast_switch_possible`
> > flag. This is not wrong because this flag doesn't imply fast_switch
> > callback is set, it implies whether the driver can guarantee that
> > frequency can be changed on any CPU sharing the policy and that the
> > change will affect all of the policy CPUs without the need to send any
> > IPIs or issue callbacks from the notifier chain.  Therefore add an extra
> > NULL check before calling fast_switch in sugov_update_single_freq
> > function.
> >
> > Ideally `sugov_update_single_freq` function should not be called with
> > amd_pstate. But in a corner case scenario, when aperf/mperf overflow
> > occurs, kernel disables frequency invariance calculation which causes
> > schedutil to fallback to sugov_update_single_freq which currently relies
> > on the fast_switch callback.
> 
> Yes, it does.  Which is why that callback must be provided if the
> driver sets fast_switch_enabled.
> 
> Overall, adjust_perf is optional, but fast_switch_enabled can only be
> set if fast_switch is actually present.
>
> Please fix the driver.

FYI this issue is not exclusive to amd_pstate driver. Even intel_pstate
driver sets fast_switch_possible = ture without setting fast_switch
callback. If the driver only has adjust_perf even then
fast_switch_possible = ture is necessary because without this flag sugov
won't choose `sugov_update_single_perf`.

Thanks,
Wyes

> 
> >
> > Normal flow:
> >   sugov_update_single_perf
> >     cpufreq_driver_adjust_perf
> >       cpufreq_driver->adjust_perf
> >
> > Error case flow:
> >   sugov_update_single_perf
> >     sugov_update_single_freq  <-- This is chosen because the freq invariant is disabled due to aperf/mperf overflow
> >       cpufreq_driver_fast_switch
> >          cpufreq_driver->fast_switch <-- Here NULL pointer dereference is happening, because fast_switch is not set
> >
> > Fix this NULL pointer dereference issue by doing a NULL check.
> >
> > Fixes: a61dec744745 ("cpufreq: schedutil: Avoid missing updates for one-CPU policies")
> > Signed-off-by: Wyes Karny <wyes.karny@amd.com>
> >
> > Cc: "Rafael J. Wysocki" <rafael@kernel.org>
> > Cc: stable@vger.kernel.org
> > ---
> >  drivers/cpufreq/cpufreq.c        | 11 +++++++++++
> >  include/linux/cpufreq.h          |  1 +
> >  kernel/sched/cpufreq_schedutil.c |  2 +-
> >  3 files changed, 13 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> > index 6d8fd3b8dcb5..364d31b55380 100644
> > --- a/drivers/cpufreq/cpufreq.c
> > +++ b/drivers/cpufreq/cpufreq.c
> > @@ -2138,6 +2138,17 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
> >  }
> >  EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
> >
> > +/**
> > + * cpufreq_driver_has_fast_switch - Check "fast switch" callback.
> > + *
> > + * Return 'true' if the ->fast_switch callback is present for the
> > + * current driver or 'false' otherwise.
> > + */
> > +bool cpufreq_driver_has_fast_switch(void)
> > +{
> > +       return !!cpufreq_driver->fast_switch;
> > +}
> > +
> >  /**
> >   * cpufreq_driver_adjust_perf - Adjust CPU performance level in one go.
> >   * @cpu: Target CPU.
> > diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> > index 65623233ab2f..8a9286fc718b 100644
> > --- a/include/linux/cpufreq.h
> > +++ b/include/linux/cpufreq.h
> > @@ -604,6 +604,7 @@ struct cpufreq_governor {
> >  /* Pass a target to the cpufreq driver */
> >  unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
> >                                         unsigned int target_freq);
> > +bool cpufreq_driver_has_fast_switch(void);
> >  void cpufreq_driver_adjust_perf(unsigned int cpu,
> >                                 unsigned long min_perf,
> >                                 unsigned long target_perf,
> > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
> > index e3211455b203..a1c449525ac2 100644
> > --- a/kernel/sched/cpufreq_schedutil.c
> > +++ b/kernel/sched/cpufreq_schedutil.c
> > @@ -364,7 +364,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
> >          * concurrently on two different CPUs for the same target and it is not
> >          * necessary to acquire the lock in the fast switch case.
> >          */
> > -       if (sg_policy->policy->fast_switch_enabled) {
> > +       if (sg_policy->policy->fast_switch_enabled && cpufreq_driver_has_fast_switch()) {
> >                 cpufreq_driver_fast_switch(sg_policy->policy, next_f);
> >         } else {
> >                 raw_spin_lock(&sg_policy->update_lock);
> > --
> > 2.34.1
> >
  

Patch

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 6d8fd3b8dcb5..364d31b55380 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2138,6 +2138,17 @@  unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
 }
 EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
 
+/**
+ * cpufreq_driver_has_fast_switch - Check "fast switch" callback.
+ *
+ * Return 'true' if the ->fast_switch callback is present for the
+ * current driver or 'false' otherwise.
+ */
+bool cpufreq_driver_has_fast_switch(void)
+{
+	return !!cpufreq_driver->fast_switch;
+}
+
 /**
  * cpufreq_driver_adjust_perf - Adjust CPU performance level in one go.
  * @cpu: Target CPU.
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 65623233ab2f..8a9286fc718b 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -604,6 +604,7 @@  struct cpufreq_governor {
 /* Pass a target to the cpufreq driver */
 unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
 					unsigned int target_freq);
+bool cpufreq_driver_has_fast_switch(void);
 void cpufreq_driver_adjust_perf(unsigned int cpu,
 				unsigned long min_perf,
 				unsigned long target_perf,
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index e3211455b203..a1c449525ac2 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -364,7 +364,7 @@  static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
 	 * concurrently on two different CPUs for the same target and it is not
 	 * necessary to acquire the lock in the fast switch case.
 	 */
-	if (sg_policy->policy->fast_switch_enabled) {
+	if (sg_policy->policy->fast_switch_enabled && cpufreq_driver_has_fast_switch()) {
 		cpufreq_driver_fast_switch(sg_policy->policy, next_f);
 	} else {
 		raw_spin_lock(&sg_policy->update_lock);