[v4,3/6] cpufreq: amd_pstate: Add guided autonomous mode
Commit Message
From ACPI spec below 3 modes for CPPC can be defined:
1. Non autonomous: OS scaling governor specifies operating frequency/
performance level through `Desired Performance` register and platform
follows that.
2. Guided autonomous: OS scaling governor specifies min and max
frequencies/ performance levels through `Minimum Performance` and
`Maximum Performance` register, and platform can autonomously select an
operating frequency in this range.
3. Fully autonomous: OS only hints (via EPP) to platform for the required
energy performance preference for the workload and platform autonomously
scales the frequency.
Currently (1) is supported by amd_pstate as passive mode, and (3) is
implemented by EPP support. This change is to support (2).
In guided autonomous mode the min_perf is based on the input from the
scaling governor. For example, in case of schedutil this value depends
on the current utilization. And max_perf is set to max capacity.
To activate guided auto mode ``amd_pstate=guided`` command line
parameter has to be passed in the kernel.
Signed-off-by: Wyes Karny <wyes.karny@amd.com>
---
.../admin-guide/kernel-parameters.txt | 15 +++++---
drivers/cpufreq/amd-pstate.c | 34 +++++++++++++++----
include/linux/amd-pstate.h | 2 ++
3 files changed, 40 insertions(+), 11 deletions(-)
Comments
On 1/30/23 23:21, Wyes Karny wrote:
> From ACPI spec below 3 modes for CPPC can be defined:
> 1. Non autonomous: OS scaling governor specifies operating frequency/
> performance level through `Desired Performance` register and platform
> follows that.
> 2. Guided autonomous: OS scaling governor specifies min and max
> frequencies/ performance levels through `Minimum Performance` and
> `Maximum Performance` register, and platform can autonomously select an
> operating frequency in this range.
> 3. Fully autonomous: OS only hints (via EPP) to platform for the required
> energy performance preference for the workload and platform autonomously
> scales the frequency.
>
> Currently (1) is supported by amd_pstate as passive mode, and (3) is
> implemented by EPP support. This change is to support (2).
>
> In guided autonomous mode the min_perf is based on the input from the
> scaling governor. For example, in case of schedutil this value depends
> on the current utilization. And max_perf is set to max capacity.
>
> To activate guided auto mode ``amd_pstate=guided`` command line
> parameter has to be passed in the kernel.
>
> Signed-off-by: Wyes Karny <wyes.karny@amd.com>
> ---
> .../admin-guide/kernel-parameters.txt | 15 +++++---
> drivers/cpufreq/amd-pstate.c | 34 +++++++++++++++----
> include/linux/amd-pstate.h | 2 ++
> 3 files changed, 40 insertions(+), 11 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index e3618dfdb36a..0d8486325c9a 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -7015,11 +7015,11 @@
> Do not enable amd_pstate as the default
> scaling driver for the supported processors
> passive
> - Use amd_pstate as a scaling driver, driver requests a
> - desired performance on this abstract scale and the power
> - management firmware translates the requests into actual
> - hardware states (core frequency, data fabric and memory
> - clocks etc.)
> + Use amd_pstate with passive mode as a scaling driver.
> + In this mode autonomous selection is disabled.
> + Driver requests a desired performance level and platform
> + tires to match the same performance level (if it is
s/tires/tries/
I don't think you need the () either in this sentence.
> + satisfied by guaranteed performance level).
> active
> Use amd_pstate_epp driver instance as the scaling driver,
> driver provides a hint to the hardware if software wants
> @@ -7027,3 +7027,8 @@
> to the CPPC firmware. then CPPC power algorithm will
> calculate the runtime workload and adjust the realtime cores
> frequency.
> + guided
> + Activate guided autonomous mode. Driver requests minimum and
> + maximum performance level and the platform autonomously
> + selects a performance level in this range and appropriate
> + to the current workload.
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index c5731fefcbef..48ab4684c3a5 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -267,7 +267,22 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
> cppc_perf.lowest_nonlinear_perf);
> WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
>
> - return 0;
> + if (cppc_state == AMD_PSTATE_ACTIVE)
> + return 0;
> +
> + ret = cppc_get_auto_sel_caps(cpudata->cpu, &cppc_perf);
> + if (ret) {
> + pr_warn("failed to get auto_sel, ret: %d\n", ret);
> + return 0;
> + }
> +
> + ret = cppc_set_auto_sel(cpudata->cpu,
> + (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1);
> +
> + if (ret)
> + pr_warn("failed to set auto_sel, ret: %d\n", ret);
> +
> + return ret;
> }
>
> DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf);
> @@ -344,12 +359,18 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
> }
>
> static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
> - u32 des_perf, u32 max_perf, bool fast_switch)
> + u32 des_perf, u32 max_perf, bool fast_switch, int guv_flags)
You meant "gov_flags" not "guv_flags" right?
> {
> u64 prev = READ_ONCE(cpudata->cppc_req_cached);
> u64 value = prev;
>
> des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
> +
> + if ((cppc_state == AMD_PSTATE_GUIDED) && (guv_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
> + min_perf = des_perf;
> + des_perf = 0;
> + }
> +
> value &= ~AMD_CPPC_MIN_PERF(~0L);
> value |= AMD_CPPC_MIN_PERF(min_perf);
>
> @@ -404,7 +425,7 @@ static int amd_pstate_target(struct cpufreq_policy *policy,
>
> cpufreq_freq_transition_begin(policy, &freqs);
> amd_pstate_update(cpudata, min_perf, des_perf,
> - max_perf, false);
> + max_perf, false, policy->governor->flags);
> cpufreq_freq_transition_end(policy, &freqs, false);
>
> return 0;
> @@ -438,7 +459,8 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
> if (max_perf < min_perf)
> max_perf = min_perf;
>
> - amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true);
> + amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
> + policy->governor->flags);
> cpufreq_cpu_put(policy);
> }
>
> @@ -1238,7 +1260,7 @@ static int __init amd_pstate_init(void)
> /* capability check */
> if (boot_cpu_has(X86_FEATURE_CPPC)) {
> pr_debug("AMD CPPC MSR based functionality is supported\n");
> - if (cppc_state == AMD_PSTATE_PASSIVE)
> + if (cppc_state != AMD_PSTATE_ACTIVE)
> current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
> } else {
> pr_debug("AMD CPPC shared memory based functionality is supported\n");
> @@ -1300,7 +1322,7 @@ static int __init amd_pstate_param(char *str)
> if (cppc_state == AMD_PSTATE_ACTIVE)
> current_pstate_driver = &amd_pstate_epp_driver;
>
> - if (cppc_state == AMD_PSTATE_PASSIVE)
> + if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
> current_pstate_driver = &amd_pstate_driver;
>
> return 0;
> diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
> index e9e291b7c2f9..85e980b8e0ac 100644
> --- a/include/linux/amd-pstate.h
> +++ b/include/linux/amd-pstate.h
> @@ -92,6 +92,7 @@ enum amd_pstate_mode {
> AMD_PSTATE_DISABLE = 0,
> AMD_PSTATE_PASSIVE,
> AMD_PSTATE_ACTIVE,
> + AMD_PSTATE_GUIDED,
> AMD_PSTATE_MAX,
> };
>
> @@ -99,6 +100,7 @@ static const char * const amd_pstate_mode_string[] = {
> [AMD_PSTATE_DISABLE] = "disable",
> [AMD_PSTATE_PASSIVE] = "passive",
> [AMD_PSTATE_ACTIVE] = "active",
> + [AMD_PSTATE_GUIDED] = "guided",
> NULL,
> };
>
With two nits fixed
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
On 31 Jan 07:37, Mario Limonciello wrote:
> On 1/30/23 23:21, Wyes Karny wrote:
> > From ACPI spec below 3 modes for CPPC can be defined:
> > 1. Non autonomous: OS scaling governor specifies operating frequency/
> > performance level through `Desired Performance` register and platform
> > follows that.
> > 2. Guided autonomous: OS scaling governor specifies min and max
> > frequencies/ performance levels through `Minimum Performance` and
> > `Maximum Performance` register, and platform can autonomously select an
> > operating frequency in this range.
> > 3. Fully autonomous: OS only hints (via EPP) to platform for the required
> > energy performance preference for the workload and platform autonomously
> > scales the frequency.
> >
> > Currently (1) is supported by amd_pstate as passive mode, and (3) is
> > implemented by EPP support. This change is to support (2).
> >
> > In guided autonomous mode the min_perf is based on the input from the
> > scaling governor. For example, in case of schedutil this value depends
> > on the current utilization. And max_perf is set to max capacity.
> >
> > To activate guided auto mode ``amd_pstate=guided`` command line
> > parameter has to be passed in the kernel.
> >
> > Signed-off-by: Wyes Karny <wyes.karny@amd.com>
> > ---
> > .../admin-guide/kernel-parameters.txt | 15 +++++---
> > drivers/cpufreq/amd-pstate.c | 34 +++++++++++++++----
> > include/linux/amd-pstate.h | 2 ++
> > 3 files changed, 40 insertions(+), 11 deletions(-)
> >
> > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> > index e3618dfdb36a..0d8486325c9a 100644
> > --- a/Documentation/admin-guide/kernel-parameters.txt
> > +++ b/Documentation/admin-guide/kernel-parameters.txt
> > @@ -7015,11 +7015,11 @@
> > Do not enable amd_pstate as the default
> > scaling driver for the supported processors
> > passive
> > - Use amd_pstate as a scaling driver, driver requests a
> > - desired performance on this abstract scale and the power
> > - management firmware translates the requests into actual
> > - hardware states (core frequency, data fabric and memory
> > - clocks etc.)
> > + Use amd_pstate with passive mode as a scaling driver.
> > + In this mode autonomous selection is disabled.
> > + Driver requests a desired performance level and platform
> > + tires to match the same performance level (if it is
>
> s/tires/tries/
>
> I don't think you need the () either in this sentence.
>
> > + satisfied by guaranteed performance level).
> > active
> > Use amd_pstate_epp driver instance as the scaling driver,
> > driver provides a hint to the hardware if software wants
> > @@ -7027,3 +7027,8 @@
> > to the CPPC firmware. then CPPC power algorithm will
> > calculate the runtime workload and adjust the realtime cores
> > frequency.
> > + guided
> > + Activate guided autonomous mode. Driver requests minimum and
> > + maximum performance level and the platform autonomously
> > + selects a performance level in this range and appropriate
> > + to the current workload.
> > diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> > index c5731fefcbef..48ab4684c3a5 100644
> > --- a/drivers/cpufreq/amd-pstate.c
> > +++ b/drivers/cpufreq/amd-pstate.c
> > @@ -267,7 +267,22 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
> > cppc_perf.lowest_nonlinear_perf);
> > WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
> > - return 0;
> > + if (cppc_state == AMD_PSTATE_ACTIVE)
> > + return 0;
> > +
> > + ret = cppc_get_auto_sel_caps(cpudata->cpu, &cppc_perf);
> > + if (ret) {
> > + pr_warn("failed to get auto_sel, ret: %d\n", ret);
> > + return 0;
> > + }
> > +
> > + ret = cppc_set_auto_sel(cpudata->cpu,
> > + (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1);
> > +
> > + if (ret)
> > + pr_warn("failed to set auto_sel, ret: %d\n", ret);
> > +
> > + return ret;
> > }
> > DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf);
> > @@ -344,12 +359,18 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
> > }
> > static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
> > - u32 des_perf, u32 max_perf, bool fast_switch)
> > + u32 des_perf, u32 max_perf, bool fast_switch, int guv_flags)
>
> You meant "gov_flags" not "guv_flags" right?
>
> > {
> > u64 prev = READ_ONCE(cpudata->cppc_req_cached);
> > u64 value = prev;
> > des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
> > +
> > + if ((cppc_state == AMD_PSTATE_GUIDED) && (guv_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
> > + min_perf = des_perf;
> > + des_perf = 0;
> > + }
> > +
> > value &= ~AMD_CPPC_MIN_PERF(~0L);
> > value |= AMD_CPPC_MIN_PERF(min_perf);
> > @@ -404,7 +425,7 @@ static int amd_pstate_target(struct cpufreq_policy *policy,
> > cpufreq_freq_transition_begin(policy, &freqs);
> > amd_pstate_update(cpudata, min_perf, des_perf,
> > - max_perf, false);
> > + max_perf, false, policy->governor->flags);
> > cpufreq_freq_transition_end(policy, &freqs, false);
> > return 0;
> > @@ -438,7 +459,8 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
> > if (max_perf < min_perf)
> > max_perf = min_perf;
> > - amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true);
> > + amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
> > + policy->governor->flags);
> > cpufreq_cpu_put(policy);
> > }
> > @@ -1238,7 +1260,7 @@ static int __init amd_pstate_init(void)
> > /* capability check */
> > if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > pr_debug("AMD CPPC MSR based functionality is supported\n");
> > - if (cppc_state == AMD_PSTATE_PASSIVE)
> > + if (cppc_state != AMD_PSTATE_ACTIVE)
> > current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
> > } else {
> > pr_debug("AMD CPPC shared memory based functionality is supported\n");
> > @@ -1300,7 +1322,7 @@ static int __init amd_pstate_param(char *str)
> > if (cppc_state == AMD_PSTATE_ACTIVE)
> > current_pstate_driver = &amd_pstate_epp_driver;
> > - if (cppc_state == AMD_PSTATE_PASSIVE)
> > + if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
> > current_pstate_driver = &amd_pstate_driver;
> > return 0;
> > diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
> > index e9e291b7c2f9..85e980b8e0ac 100644
> > --- a/include/linux/amd-pstate.h
> > +++ b/include/linux/amd-pstate.h
> > @@ -92,6 +92,7 @@ enum amd_pstate_mode {
> > AMD_PSTATE_DISABLE = 0,
> > AMD_PSTATE_PASSIVE,
> > AMD_PSTATE_ACTIVE,
> > + AMD_PSTATE_GUIDED,
> > AMD_PSTATE_MAX,
> > };
> > @@ -99,6 +100,7 @@ static const char * const amd_pstate_mode_string[] = {
> > [AMD_PSTATE_DISABLE] = "disable",
> > [AMD_PSTATE_PASSIVE] = "passive",
> > [AMD_PSTATE_ACTIVE] = "active",
> > + [AMD_PSTATE_GUIDED] = "guided",
> > NULL,
> > };
>
> With two nits fixed
>
> Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Thanks! I'll fix those.
>
@@ -7015,11 +7015,11 @@
Do not enable amd_pstate as the default
scaling driver for the supported processors
passive
- Use amd_pstate as a scaling driver, driver requests a
- desired performance on this abstract scale and the power
- management firmware translates the requests into actual
- hardware states (core frequency, data fabric and memory
- clocks etc.)
+ Use amd_pstate with passive mode as a scaling driver.
+ In this mode autonomous selection is disabled.
+ Driver requests a desired performance level and platform
+ tires to match the same performance level (if it is
+ satisfied by guaranteed performance level).
active
Use amd_pstate_epp driver instance as the scaling driver,
driver provides a hint to the hardware if software wants
@@ -7027,3 +7027,8 @@
to the CPPC firmware. then CPPC power algorithm will
calculate the runtime workload and adjust the realtime cores
frequency.
+ guided
+ Activate guided autonomous mode. Driver requests minimum and
+ maximum performance level and the platform autonomously
+ selects a performance level in this range and appropriate
+ to the current workload.
@@ -267,7 +267,22 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
cppc_perf.lowest_nonlinear_perf);
WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
- return 0;
+ if (cppc_state == AMD_PSTATE_ACTIVE)
+ return 0;
+
+ ret = cppc_get_auto_sel_caps(cpudata->cpu, &cppc_perf);
+ if (ret) {
+ pr_warn("failed to get auto_sel, ret: %d\n", ret);
+ return 0;
+ }
+
+ ret = cppc_set_auto_sel(cpudata->cpu,
+ (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1);
+
+ if (ret)
+ pr_warn("failed to set auto_sel, ret: %d\n", ret);
+
+ return ret;
}
DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf);
@@ -344,12 +359,18 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
}
static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
- u32 des_perf, u32 max_perf, bool fast_switch)
+ u32 des_perf, u32 max_perf, bool fast_switch, int guv_flags)
{
u64 prev = READ_ONCE(cpudata->cppc_req_cached);
u64 value = prev;
des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
+
+ if ((cppc_state == AMD_PSTATE_GUIDED) && (guv_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
+ min_perf = des_perf;
+ des_perf = 0;
+ }
+
value &= ~AMD_CPPC_MIN_PERF(~0L);
value |= AMD_CPPC_MIN_PERF(min_perf);
@@ -404,7 +425,7 @@ static int amd_pstate_target(struct cpufreq_policy *policy,
cpufreq_freq_transition_begin(policy, &freqs);
amd_pstate_update(cpudata, min_perf, des_perf,
- max_perf, false);
+ max_perf, false, policy->governor->flags);
cpufreq_freq_transition_end(policy, &freqs, false);
return 0;
@@ -438,7 +459,8 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
if (max_perf < min_perf)
max_perf = min_perf;
- amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true);
+ amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
+ policy->governor->flags);
cpufreq_cpu_put(policy);
}
@@ -1238,7 +1260,7 @@ static int __init amd_pstate_init(void)
/* capability check */
if (boot_cpu_has(X86_FEATURE_CPPC)) {
pr_debug("AMD CPPC MSR based functionality is supported\n");
- if (cppc_state == AMD_PSTATE_PASSIVE)
+ if (cppc_state != AMD_PSTATE_ACTIVE)
current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
} else {
pr_debug("AMD CPPC shared memory based functionality is supported\n");
@@ -1300,7 +1322,7 @@ static int __init amd_pstate_param(char *str)
if (cppc_state == AMD_PSTATE_ACTIVE)
current_pstate_driver = &amd_pstate_epp_driver;
- if (cppc_state == AMD_PSTATE_PASSIVE)
+ if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
current_pstate_driver = &amd_pstate_driver;
return 0;
@@ -92,6 +92,7 @@ enum amd_pstate_mode {
AMD_PSTATE_DISABLE = 0,
AMD_PSTATE_PASSIVE,
AMD_PSTATE_ACTIVE,
+ AMD_PSTATE_GUIDED,
AMD_PSTATE_MAX,
};
@@ -99,6 +100,7 @@ static const char * const amd_pstate_mode_string[] = {
[AMD_PSTATE_DISABLE] = "disable",
[AMD_PSTATE_PASSIVE] = "passive",
[AMD_PSTATE_ACTIVE] = "active",
+ [AMD_PSTATE_GUIDED] = "guided",
NULL,
};