[V8,5/7] cpufreq: amd-pstate: Update amd-pstate preferred core ranking dynamically

Message ID 20231009024932.2563622-6-li.meng@amd.com
State New
Headers
Series amd-pstate preferred core |

Commit Message

Meng Li Oct. 9, 2023, 2:49 a.m. UTC
  Preferred core rankings can be changed dynamically by the
platform based on the workload and platform conditions and
accounting for thermals and aging.
When this occurs, cpu priority need to be set.

Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Wyes Karny <wyes.karny@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Meng Li <li.meng@amd.com>
---
 drivers/cpufreq/amd-pstate.c | 34 ++++++++++++++++++++++++++++++++--
 include/linux/amd-pstate.h   |  6 ++++++
 2 files changed, 38 insertions(+), 2 deletions(-)
  

Comments

Wyes Karny Oct. 9, 2023, 6:19 a.m. UTC | #1
Hi Meng Li,

On 09 Oct 10:49, Meng Li wrote:
> Preferred core rankings can be changed dynamically by the
> platform based on the workload and platform conditions and
> accounting for thermals and aging.
> When this occurs, cpu priority need to be set.
> 
> Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
> Reviewed-by: Wyes Karny <wyes.karny@amd.com>
> Reviewed-by: Huang Rui <ray.huang@amd.com>
> Signed-off-by: Meng Li <li.meng@amd.com>
> ---
>  drivers/cpufreq/amd-pstate.c | 34 ++++++++++++++++++++++++++++++++--
>  include/linux/amd-pstate.h   |  6 ++++++
>  2 files changed, 38 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 6ac8939fce5a..d3369247c6c9 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -313,6 +313,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
>  	WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
>  	WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
>  	WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
> +	WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
>  
>  	return 0;
>  }
> @@ -334,6 +335,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
>  	WRITE_ONCE(cpudata->lowest_nonlinear_perf,
>  		   cppc_perf.lowest_nonlinear_perf);
>  	WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
> +	WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
>  
>  	if (cppc_state == AMD_PSTATE_ACTIVE)
>  		return 0;
> @@ -540,7 +542,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
>  	if (target_perf < capacity)
>  		des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
>  
> -	min_perf = READ_ONCE(cpudata->highest_perf);
> +	min_perf = READ_ONCE(cpudata->lowest_perf);

This seems to be a fix. So, this could be a separate patch.

>  	if (_min_perf < capacity)
>  		min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
>  
> @@ -760,6 +762,32 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
>  	}
>  }
>  
> +static void amd_pstate_update_highest_perf(unsigned int cpu)
> +{
> +	struct cpufreq_policy *policy;
> +	struct amd_cpudata *cpudata;
> +	u32 prev_high = 0, cur_high = 0;
> +	int ret;
> +
> +	if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
> +		return;
> +
> +	ret = amd_pstate_get_highest_perf(cpu, &cur_high);
> +	if (ret)
> +		return;
> +
> +	policy = cpufreq_cpu_get(cpu);
> +	cpudata = policy->driver_data;
> +	prev_high = READ_ONCE(cpudata->prefcore_ranking);
> +
> +	if (prev_high != cur_high) {
> +		WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
> +		sched_set_itmt_core_prio(cur_high, cpu);
> +	}
> +
> +	cpufreq_cpu_put(policy);
> +}
> +
>  static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
>  {
>  	int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
> @@ -926,7 +954,7 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
>  	u32 perf;
>  	struct amd_cpudata *cpudata = policy->driver_data;
>  
> -	perf = READ_ONCE(cpudata->highest_perf);
> +	perf = READ_ONCE(cpudata->prefcore_ranking);

I think this should show cpudata->highest_perf.

Thanks,
Wyes
>  
>  	return sysfs_emit(buf, "%u\n", perf);
>  }
> @@ -1502,6 +1530,7 @@ static struct cpufreq_driver amd_pstate_driver = {
>  	.suspend	= amd_pstate_cpu_suspend,
>  	.resume		= amd_pstate_cpu_resume,
>  	.set_boost	= amd_pstate_set_boost,
> +	.update_highest_perf	= amd_pstate_update_highest_perf,
>  	.name		= "amd-pstate",
>  	.attr		= amd_pstate_attr,
>  };
> @@ -1516,6 +1545,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
>  	.online		= amd_pstate_epp_cpu_online,
>  	.suspend	= amd_pstate_epp_suspend,
>  	.resume		= amd_pstate_epp_resume,
> +	.update_highest_perf	= amd_pstate_update_highest_perf,
>  	.name		= "amd-pstate-epp",
>  	.attr		= amd_pstate_epp_attr,
>  };
> diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
> index 87e140e9e6db..426822612373 100644
> --- a/include/linux/amd-pstate.h
> +++ b/include/linux/amd-pstate.h
> @@ -39,11 +39,16 @@ struct amd_aperf_mperf {
>   * @cppc_req_cached: cached performance request hints
>   * @highest_perf: the maximum performance an individual processor may reach,
>   *		  assuming ideal conditions
> + *		  For platforms that do not support the preferred core feature, the
> + *		  highest_pef may be configured with 166 or 255, to avoid max frequency
> + *		  calculated wrongly. we take the fixed value as the highest_perf.
>   * @nominal_perf: the maximum sustained performance level of the processor,
>   *		  assuming ideal operating conditions
>   * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
>   *			   savings are achieved
>   * @lowest_perf: the absolute lowest performance level of the processor
> + * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
> + * 		  priority.
>   * @max_freq: the frequency that mapped to highest_perf
>   * @min_freq: the frequency that mapped to lowest_perf
>   * @nominal_freq: the frequency that mapped to nominal_perf
> @@ -73,6 +78,7 @@ struct amd_cpudata {
>  	u32	nominal_perf;
>  	u32	lowest_nonlinear_perf;
>  	u32	lowest_perf;
> +	u32     prefcore_ranking;
>  
>  	u32	max_freq;
>  	u32	min_freq;
> -- 
> 2.34.1
>
  
Meng Li Oct. 9, 2023, 8:08 a.m. UTC | #2
[AMD Official Use Only - General]

Hi Wyes:

> -----Original Message-----
> From: Karny, Wyes <Wyes.Karny@amd.com>
> Sent: Monday, October 9, 2023 2:19 PM
> To: Meng, Li (Jassmine) <Li.Meng@amd.com>
> Cc: Karny, Wyes <Wyes.Karny@amd.com>; Rafael J . Wysocki
> <rafael.j.wysocki@intel.com>; Huang, Ray <Ray.Huang@amd.com>; linux-
> pm@vger.kernel.org; linux-kernel@vger.kernel.org; x86@kernel.org; linux-
> acpi@vger.kernel.org; Shuah Khan <skhan@linuxfoundation.org>; linux-
> kselftest@vger.kernel.org; Fontenot, Nathan
> <Nathan.Fontenot@amd.com>; Sharma, Deepak
> <Deepak.Sharma@amd.com>; Deucher, Alexander
> <Alexander.Deucher@amd.com>; Limonciello, Mario
> <Mario.Limonciello@amd.com>; Huang, Shimmer
> <Shimmer.Huang@amd.com>; Yuan, Perry <Perry.Yuan@amd.com>; Du,
> Xiaojian <Xiaojian.Du@amd.com>; Viresh Kumar <viresh.kumar@linaro.org>;
> Borislav Petkov <bp@alien8.de>
> Subject: Re: [PATCH V8 5/7] cpufreq: amd-pstate: Update amd-pstate
> preferred core ranking dynamically
>
> Hi Meng Li,
>
> On 09 Oct 10:49, Meng Li wrote:
> > Preferred core rankings can be changed dynamically by the platform
> > based on the workload and platform conditions and accounting for
> > thermals and aging.
> > When this occurs, cpu priority need to be set.
> >
> > Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
> > Reviewed-by: Wyes Karny <wyes.karny@amd.com>
> > Reviewed-by: Huang Rui <ray.huang@amd.com>
> > Signed-off-by: Meng Li <li.meng@amd.com>
> > ---
> >  drivers/cpufreq/amd-pstate.c | 34
> ++++++++++++++++++++++++++++++++--
> >  include/linux/amd-pstate.h   |  6 ++++++
> >  2 files changed, 38 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/cpufreq/amd-pstate.c
> > b/drivers/cpufreq/amd-pstate.c index 6ac8939fce5a..d3369247c6c9 100644
> > --- a/drivers/cpufreq/amd-pstate.c
> > +++ b/drivers/cpufreq/amd-pstate.c
> > @@ -313,6 +313,7 @@ static int pstate_init_perf(struct amd_cpudata
> *cpudata)
> >     WRITE_ONCE(cpudata->nominal_perf,
> AMD_CPPC_NOMINAL_PERF(cap1));
> >     WRITE_ONCE(cpudata->lowest_nonlinear_perf,
> AMD_CPPC_LOWNONLIN_PERF(cap1));
> >     WRITE_ONCE(cpudata->lowest_perf,
> AMD_CPPC_LOWEST_PERF(cap1));
> > +   WRITE_ONCE(cpudata->prefcore_ranking,
> AMD_CPPC_HIGHEST_PERF(cap1));
> >
> >     return 0;
> >  }
> > @@ -334,6 +335,7 @@ static int cppc_init_perf(struct amd_cpudata
> *cpudata)
> >     WRITE_ONCE(cpudata->lowest_nonlinear_perf,
> >                cppc_perf.lowest_nonlinear_perf);
> >     WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
> > +   WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
> >
> >     if (cppc_state == AMD_PSTATE_ACTIVE)
> >             return 0;
> > @@ -540,7 +542,7 @@ static void amd_pstate_adjust_perf(unsigned int
> cpu,
> >     if (target_perf < capacity)
> >             des_perf = DIV_ROUND_UP(cap_perf * target_perf,
> capacity);
> >
> > -   min_perf = READ_ONCE(cpudata->highest_perf);
> > +   min_perf = READ_ONCE(cpudata->lowest_perf);
>
> This seems to be a fix. So, this could be a separate patch.
[Meng, Li (Jassmine)] Thanks, I will modify it.
>
> >     if (_min_perf < capacity)
> >             min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
> >
> > @@ -760,6 +762,32 @@ static void amd_pstate_init_prefcore(struct
> amd_cpudata *cpudata)
> >     }
> >  }
> >
> > +static void amd_pstate_update_highest_perf(unsigned int cpu) {
> > +   struct cpufreq_policy *policy;
> > +   struct amd_cpudata *cpudata;
> > +   u32 prev_high = 0, cur_high = 0;
> > +   int ret;
> > +
> > +   if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
> > +           return;
> > +
> > +   ret = amd_pstate_get_highest_perf(cpu, &cur_high);
> > +   if (ret)
> > +           return;
> > +
> > +   policy = cpufreq_cpu_get(cpu);
> > +   cpudata = policy->driver_data;
> > +   prev_high = READ_ONCE(cpudata->prefcore_ranking);
> > +
> > +   if (prev_high != cur_high) {
> > +           WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
> > +           sched_set_itmt_core_prio(cur_high, cpu);
> > +   }
> > +
> > +   cpufreq_cpu_put(policy);
> > +}
> > +
> >  static int amd_pstate_cpu_init(struct cpufreq_policy *policy)  {
> >     int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
> @@
> > -926,7 +954,7 @@ static ssize_t show_amd_pstate_highest_perf(struct
> cpufreq_policy *policy,
> >     u32 perf;
> >     struct amd_cpudata *cpudata = policy->driver_data;
> >
> > -   perf = READ_ONCE(cpudata->highest_perf);
> > +   perf = READ_ONCE(cpudata->prefcore_ranking);
>
> I think this should show cpudata->highest_perf.
[Meng, Li (Jassmine)] Thanks, I will modify it.
Add a new function for prefcore_ranking.
>
> Thanks,
> Wyes
> >
> >     return sysfs_emit(buf, "%u\n", perf);  } @@ -1502,6 +1530,7 @@
> > static struct cpufreq_driver amd_pstate_driver = {
> >     .suspend        = amd_pstate_cpu_suspend,
> >     .resume         = amd_pstate_cpu_resume,
> >     .set_boost      = amd_pstate_set_boost,
> > +   .update_highest_perf    = amd_pstate_update_highest_perf,
> >     .name           = "amd-pstate",
> >     .attr           = amd_pstate_attr,
> >  };
> > @@ -1516,6 +1545,7 @@ static struct cpufreq_driver
> amd_pstate_epp_driver = {
> >     .online         = amd_pstate_epp_cpu_online,
> >     .suspend        = amd_pstate_epp_suspend,
> >     .resume         = amd_pstate_epp_resume,
> > +   .update_highest_perf    = amd_pstate_update_highest_perf,
> >     .name           = "amd-pstate-epp",
> >     .attr           = amd_pstate_epp_attr,
> >  };
> > diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
> > index 87e140e9e6db..426822612373 100644
> > --- a/include/linux/amd-pstate.h
> > +++ b/include/linux/amd-pstate.h
> > @@ -39,11 +39,16 @@ struct amd_aperf_mperf {
> >   * @cppc_req_cached: cached performance request hints
> >   * @highest_perf: the maximum performance an individual processor may
> reach,
> >   *           assuming ideal conditions
> > + *           For platforms that do not support the preferred core
> feature, the
> > + *           highest_pef may be configured with 166 or 255, to avoid
> max frequency
> > + *           calculated wrongly. we take the fixed value as the
> highest_perf.
> >   * @nominal_perf: the maximum sustained performance level of the
> processor,
> >   *           assuming ideal operating conditions
> >   * @lowest_nonlinear_perf: the lowest performance level at which
> nonlinear power
> >   *                    savings are achieved
> >   * @lowest_perf: the absolute lowest performance level of the
> > processor
> > + * @prefcore_ranking: the preferred core ranking, the higher value
> indicates a higher
> > + *                   priority.
> >   * @max_freq: the frequency that mapped to highest_perf
> >   * @min_freq: the frequency that mapped to lowest_perf
> >   * @nominal_freq: the frequency that mapped to nominal_perf @@ -73,6
> > +78,7 @@ struct amd_cpudata {
> >     u32     nominal_perf;
> >     u32     lowest_nonlinear_perf;
> >     u32     lowest_perf;
> > +   u32     prefcore_ranking;
> >
> >     u32     max_freq;
> >     u32     min_freq;
> > --
> > 2.34.1
> >
  

Patch

diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 6ac8939fce5a..d3369247c6c9 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -313,6 +313,7 @@  static int pstate_init_perf(struct amd_cpudata *cpudata)
 	WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
 	WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
 	WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
+	WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
 
 	return 0;
 }
@@ -334,6 +335,7 @@  static int cppc_init_perf(struct amd_cpudata *cpudata)
 	WRITE_ONCE(cpudata->lowest_nonlinear_perf,
 		   cppc_perf.lowest_nonlinear_perf);
 	WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
+	WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
 
 	if (cppc_state == AMD_PSTATE_ACTIVE)
 		return 0;
@@ -540,7 +542,7 @@  static void amd_pstate_adjust_perf(unsigned int cpu,
 	if (target_perf < capacity)
 		des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
 
-	min_perf = READ_ONCE(cpudata->highest_perf);
+	min_perf = READ_ONCE(cpudata->lowest_perf);
 	if (_min_perf < capacity)
 		min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
 
@@ -760,6 +762,32 @@  static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
 	}
 }
 
+static void amd_pstate_update_highest_perf(unsigned int cpu)
+{
+	struct cpufreq_policy *policy;
+	struct amd_cpudata *cpudata;
+	u32 prev_high = 0, cur_high = 0;
+	int ret;
+
+	if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
+		return;
+
+	ret = amd_pstate_get_highest_perf(cpu, &cur_high);
+	if (ret)
+		return;
+
+	policy = cpufreq_cpu_get(cpu);
+	cpudata = policy->driver_data;
+	prev_high = READ_ONCE(cpudata->prefcore_ranking);
+
+	if (prev_high != cur_high) {
+		WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
+		sched_set_itmt_core_prio(cur_high, cpu);
+	}
+
+	cpufreq_cpu_put(policy);
+}
+
 static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
 {
 	int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
@@ -926,7 +954,7 @@  static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
 	u32 perf;
 	struct amd_cpudata *cpudata = policy->driver_data;
 
-	perf = READ_ONCE(cpudata->highest_perf);
+	perf = READ_ONCE(cpudata->prefcore_ranking);
 
 	return sysfs_emit(buf, "%u\n", perf);
 }
@@ -1502,6 +1530,7 @@  static struct cpufreq_driver amd_pstate_driver = {
 	.suspend	= amd_pstate_cpu_suspend,
 	.resume		= amd_pstate_cpu_resume,
 	.set_boost	= amd_pstate_set_boost,
+	.update_highest_perf	= amd_pstate_update_highest_perf,
 	.name		= "amd-pstate",
 	.attr		= amd_pstate_attr,
 };
@@ -1516,6 +1545,7 @@  static struct cpufreq_driver amd_pstate_epp_driver = {
 	.online		= amd_pstate_epp_cpu_online,
 	.suspend	= amd_pstate_epp_suspend,
 	.resume		= amd_pstate_epp_resume,
+	.update_highest_perf	= amd_pstate_update_highest_perf,
 	.name		= "amd-pstate-epp",
 	.attr		= amd_pstate_epp_attr,
 };
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
index 87e140e9e6db..426822612373 100644
--- a/include/linux/amd-pstate.h
+++ b/include/linux/amd-pstate.h
@@ -39,11 +39,16 @@  struct amd_aperf_mperf {
  * @cppc_req_cached: cached performance request hints
  * @highest_perf: the maximum performance an individual processor may reach,
  *		  assuming ideal conditions
+ *		  For platforms that do not support the preferred core feature, the
+ *		  highest_pef may be configured with 166 or 255, to avoid max frequency
+ *		  calculated wrongly. we take the fixed value as the highest_perf.
  * @nominal_perf: the maximum sustained performance level of the processor,
  *		  assuming ideal operating conditions
  * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
  *			   savings are achieved
  * @lowest_perf: the absolute lowest performance level of the processor
+ * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
+ * 		  priority.
  * @max_freq: the frequency that mapped to highest_perf
  * @min_freq: the frequency that mapped to lowest_perf
  * @nominal_freq: the frequency that mapped to nominal_perf
@@ -73,6 +78,7 @@  struct amd_cpudata {
 	u32	nominal_perf;
 	u32	lowest_nonlinear_perf;
 	u32	lowest_perf;
+	u32     prefcore_ranking;
 
 	u32	max_freq;
 	u32	min_freq;