[1/4] cpufreq: amd_pstate: Add guided autonomous mode
Commit Message
From ACPI spec below 3 modes for CPPC can be defined:
1. Non autonomous: OS scaling governor specifies operating frequency/
performance level through `Desired Performance` register and PMFW
follows that.
2. Guided autonomous: OS scaling governor specifies min and max
frequencies/ performance levels through `Minimum Performance` and
`Maximum Performance` register, and PMFW can autonomously select an
operating frequency in this range.
3. Fully autonomous: OS only hints (via EPP) to PMFW for the required
energy performance preference for the workload and PMFW autonomously
scales the frequency.
Currently (1) is supported by amd_pstate as passive mode, and (3) is
implemented by EPP support. This change is to support (2).
In guided autonomous mode the min_perf is based on the input from the
scaling governor. For example, in case of schedutil this value depends
on the current utilization. And max_perf is set to max capacity.
To activate guided auto mode ``amd_pstate=guided`` command line
parameter has to be passed in the kernel.
Signed-off-by: Wyes Karny <wyes.karny@amd.com>
---
.../admin-guide/kernel-parameters.txt | 4 ++
drivers/cpufreq/amd-pstate.c | 60 +++++++++++++++----
2 files changed, 53 insertions(+), 11 deletions(-)
Comments
On Wed, Dec 07, 2022 at 11:46:45PM +0800, Karny, Wyes wrote:
> From ACPI spec below 3 modes for CPPC can be defined:
> 1. Non autonomous: OS scaling governor specifies operating frequency/
> performance level through `Desired Performance` register and PMFW
> follows that.
> 2. Guided autonomous: OS scaling governor specifies min and max
> frequencies/ performance levels through `Minimum Performance` and
> `Maximum Performance` register, and PMFW can autonomously select an
> operating frequency in this range.
> 3. Fully autonomous: OS only hints (via EPP) to PMFW for the required
> energy performance preference for the workload and PMFW autonomously
> scales the frequency.
>
> Currently (1) is supported by amd_pstate as passive mode, and (3) is
> implemented by EPP support. This change is to support (2).
>
> In guided autonomous mode the min_perf is based on the input from the
> scaling governor. For example, in case of schedutil this value depends
> on the current utilization. And max_perf is set to max capacity.
>
> To activate guided auto mode ``amd_pstate=guided`` command line
> parameter has to be passed in the kernel.
>
> Signed-off-by: Wyes Karny <wyes.karny@amd.com>
> ---
> .../admin-guide/kernel-parameters.txt | 4 ++
> drivers/cpufreq/amd-pstate.c | 60 +++++++++++++++----
> 2 files changed, 53 insertions(+), 11 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 42af9ca0127e..75e57afba77e 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -6970,3 +6970,7 @@
> management firmware translates the requests into actual
> hardware states (core frequency, data fabric and memory
> clocks etc.)
> + guided
> + Activate guided autonomous mode. Driver requests minimum
> + performance and maximum performance and the PMFW autonomously
> + selects frequencies in this range.
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 204e39006dda..05e4003a77ee 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -50,6 +50,20 @@
> #define AMD_PSTATE_TRANSITION_LATENCY 20000
> #define AMD_PSTATE_TRANSITION_DELAY 1000
>
> +enum amd_pstate_mode {
> + CPPC_DISABLE = 0,
> + CPPC_PASSIVE,
> + CPPC_GUIDED,
> + CPPC_MODE_MAX,
> +};
> +
> +static const char * const amd_pstate_mode_string[] = {
> + [CPPC_DISABLE] = "disable",
> + [CPPC_PASSIVE] = "passive",
> + [CPPC_GUIDED] = "guided",
> + NULL,
> +};
> +
> /*
> * TODO: We need more time to fine tune processors with shared memory solution
> * with community together.
> @@ -60,7 +74,18 @@
> * module parameter to be able to enable it manually for debugging.
> */
> static struct cpufreq_driver amd_pstate_driver;
> -static int cppc_load __initdata;
> +static int cppc_state = CPPC_DISABLE;
> +
> +static inline int get_mode_idx_from_str(const char *str, size_t size)
> +{
> + int i = 0;
> +
> + for (; i < CPPC_MODE_MAX; ++i) {
> + if (!strncmp(str, amd_pstate_mode_string[i], size))
> + return i;
> + }
> + return -EINVAL;
> +}
>
> static inline int pstate_enable(bool enable)
> {
> @@ -212,12 +237,18 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
> }
>
> static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
> - u32 des_perf, u32 max_perf, bool fast_switch)
> + u32 des_perf, u32 max_perf, bool fast_switch, int flags)
> {
> u64 prev = READ_ONCE(cpudata->cppc_req_cached);
> u64 value = prev;
>
> des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
> +
> + if (cppc_state == CPPC_GUIDED && flags & CPUFREQ_GOV_DYNAMIC_SWITCHING) {
> + min_perf = des_perf;
> + des_perf = 0;
> + }
Since we would like to modify the min_perf on share memory processors as
well. The current cppc_set_perf() in cppc_acpi doesn't provide the MIN/MAX
values. Could you please add the max_perf/min_perf in cppc_acpi.c as well?
Then the APIs will be available on the share memory processors like Rome.
Thanks,
Ray
> +
> value &= ~AMD_CPPC_MIN_PERF(~0L);
> value |= AMD_CPPC_MIN_PERF(min_perf);
>
> @@ -272,7 +303,7 @@ static int amd_pstate_target(struct cpufreq_policy *policy,
>
> cpufreq_freq_transition_begin(policy, &freqs);
> amd_pstate_update(cpudata, min_perf, des_perf,
> - max_perf, false);
> + max_perf, false, policy->governor->flags);
> cpufreq_freq_transition_end(policy, &freqs, false);
>
> return 0;
> @@ -306,7 +337,8 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
> if (max_perf < min_perf)
> max_perf = min_perf;
>
> - amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true);
> + amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
> + policy->governor->flags);
> }
>
> static int amd_get_min_freq(struct amd_cpudata *cpudata)
> @@ -627,7 +659,7 @@ static int __init amd_pstate_init(void)
> * enable the amd_pstate passive mode driver explicitly
> * with amd_pstate=passive in kernel command line
> */
> - if (!cppc_load) {
> + if (cppc_state == CPPC_DISABLE) {
> pr_debug("driver load is disabled, boot with amd_pstate=passive to enable this\n");
> return -ENODEV;
> }
> @@ -670,16 +702,22 @@ device_initcall(amd_pstate_init);
>
> static int __init amd_pstate_param(char *str)
> {
> + int size, mode_idx;
> +
> if (!str)
> return -EINVAL;
>
> - if (!strcmp(str, "disable")) {
> - cppc_load = 0;
> - pr_info("driver is explicitly disabled\n");
> - } else if (!strcmp(str, "passive"))
> - cppc_load = 1;
> + size = strlen(str);
> + mode_idx = get_mode_idx_from_str(str, size);
>
> - return 0;
> + if (mode_idx >= CPPC_DISABLE && mode_idx < CPPC_MODE_MAX) {
> + cppc_state = mode_idx;
> + if (cppc_state == CPPC_DISABLE)
> + pr_info("driver is explicitly disabled\n");
> + return 0;
> + }
> +
> + return -EINVAL;
> }
> early_param("amd_pstate", amd_pstate_param);
>
> --
> 2.34.1
>
Hi Ray,
On 12/9/2022 1:13 PM, Huang Rui wrote:
> On Wed, Dec 07, 2022 at 11:46:45PM +0800, Karny, Wyes wrote:
>> From ACPI spec below 3 modes for CPPC can be defined:
>> 1. Non autonomous: OS scaling governor specifies operating frequency/
>> performance level through `Desired Performance` register and PMFW
>> follows that.
>> 2. Guided autonomous: OS scaling governor specifies min and max
>> frequencies/ performance levels through `Minimum Performance` and
>> `Maximum Performance` register, and PMFW can autonomously select an
>> operating frequency in this range.
>> 3. Fully autonomous: OS only hints (via EPP) to PMFW for the required
>> energy performance preference for the workload and PMFW autonomously
>> scales the frequency.
>>
>> Currently (1) is supported by amd_pstate as passive mode, and (3) is
>> implemented by EPP support. This change is to support (2).
>>
>> In guided autonomous mode the min_perf is based on the input from the
>> scaling governor. For example, in case of schedutil this value depends
>> on the current utilization. And max_perf is set to max capacity.
>>
>> To activate guided auto mode ``amd_pstate=guided`` command line
>> parameter has to be passed in the kernel.
>>
>> Signed-off-by: Wyes Karny <wyes.karny@amd.com>
>> ---
>> .../admin-guide/kernel-parameters.txt | 4 ++
>> drivers/cpufreq/amd-pstate.c | 60 +++++++++++++++----
>> 2 files changed, 53 insertions(+), 11 deletions(-)
>>
>> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
>> index 42af9ca0127e..75e57afba77e 100644
>> --- a/Documentation/admin-guide/kernel-parameters.txt
>> +++ b/Documentation/admin-guide/kernel-parameters.txt
>> @@ -6970,3 +6970,7 @@
>> management firmware translates the requests into actual
>> hardware states (core frequency, data fabric and memory
>> clocks etc.)
>> + guided
>> + Activate guided autonomous mode. Driver requests minimum
>> + performance and maximum performance and the PMFW autonomously
>> + selects frequencies in this range.
>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>> index 204e39006dda..05e4003a77ee 100644
>> --- a/drivers/cpufreq/amd-pstate.c
>> +++ b/drivers/cpufreq/amd-pstate.c
>> @@ -50,6 +50,20 @@
>> #define AMD_PSTATE_TRANSITION_LATENCY 20000
>> #define AMD_PSTATE_TRANSITION_DELAY 1000
>>
>> +enum amd_pstate_mode {
>> + CPPC_DISABLE = 0,
>> + CPPC_PASSIVE,
>> + CPPC_GUIDED,
>> + CPPC_MODE_MAX,
>> +};
>> +
>> +static const char * const amd_pstate_mode_string[] = {
>> + [CPPC_DISABLE] = "disable",
>> + [CPPC_PASSIVE] = "passive",
>> + [CPPC_GUIDED] = "guided",
>> + NULL,
>> +};
>> +
>> /*
>> * TODO: We need more time to fine tune processors with shared memory solution
>> * with community together.
>> @@ -60,7 +74,18 @@
>> * module parameter to be able to enable it manually for debugging.
>> */
>> static struct cpufreq_driver amd_pstate_driver;
>> -static int cppc_load __initdata;
>> +static int cppc_state = CPPC_DISABLE;
>> +
>> +static inline int get_mode_idx_from_str(const char *str, size_t size)
>> +{
>> + int i = 0;
>> +
>> + for (; i < CPPC_MODE_MAX; ++i) {
>> + if (!strncmp(str, amd_pstate_mode_string[i], size))
>> + return i;
>> + }
>> + return -EINVAL;
>> +}
>>
>> static inline int pstate_enable(bool enable)
>> {
>> @@ -212,12 +237,18 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
>> }
>>
>> static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
>> - u32 des_perf, u32 max_perf, bool fast_switch)
>> + u32 des_perf, u32 max_perf, bool fast_switch, int flags)
>> {
>> u64 prev = READ_ONCE(cpudata->cppc_req_cached);
>> u64 value = prev;
>>
>> des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
>> +
>> + if (cppc_state == CPPC_GUIDED && flags & CPUFREQ_GOV_DYNAMIC_SWITCHING) {
>> + min_perf = des_perf;
>> + des_perf = 0;
>> + }
>
> Since we would like to modify the min_perf on share memory processors as
> well. The current cppc_set_perf() in cppc_acpi doesn't provide the MIN/MAX
> values. Could you please add the max_perf/min_perf in cppc_acpi.c as well?
> Then the APIs will be available on the share memory processors like Rome.
Sure. Thanks for pointing this out.
>
> Thanks,
> Ray
>
>> +
>> value &= ~AMD_CPPC_MIN_PERF(~0L);
>> value |= AMD_CPPC_MIN_PERF(min_perf);
>>
>> @@ -272,7 +303,7 @@ static int amd_pstate_target(struct cpufreq_policy *policy,
>>
>> cpufreq_freq_transition_begin(policy, &freqs);
>> amd_pstate_update(cpudata, min_perf, des_perf,
>> - max_perf, false);
>> + max_perf, false, policy->governor->flags);
>> cpufreq_freq_transition_end(policy, &freqs, false);
>>
>> return 0;
>> @@ -306,7 +337,8 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
>> if (max_perf < min_perf)
>> max_perf = min_perf;
>>
>> - amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true);
>> + amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
>> + policy->governor->flags);
>> }
>>
>> static int amd_get_min_freq(struct amd_cpudata *cpudata)
>> @@ -627,7 +659,7 @@ static int __init amd_pstate_init(void)
>> * enable the amd_pstate passive mode driver explicitly
>> * with amd_pstate=passive in kernel command line
>> */
>> - if (!cppc_load) {
>> + if (cppc_state == CPPC_DISABLE) {
>> pr_debug("driver load is disabled, boot with amd_pstate=passive to enable this\n");
>> return -ENODEV;
>> }
>> @@ -670,16 +702,22 @@ device_initcall(amd_pstate_init);
>>
>> static int __init amd_pstate_param(char *str)
>> {
>> + int size, mode_idx;
>> +
>> if (!str)
>> return -EINVAL;
>>
>> - if (!strcmp(str, "disable")) {
>> - cppc_load = 0;
>> - pr_info("driver is explicitly disabled\n");
>> - } else if (!strcmp(str, "passive"))
>> - cppc_load = 1;
>> + size = strlen(str);
>> + mode_idx = get_mode_idx_from_str(str, size);
>>
>> - return 0;
>> + if (mode_idx >= CPPC_DISABLE && mode_idx < CPPC_MODE_MAX) {
>> + cppc_state = mode_idx;
>> + if (cppc_state == CPPC_DISABLE)
>> + pr_info("driver is explicitly disabled\n");
>> + return 0;
>> + }
>> +
>> + return -EINVAL;
>> }
>> early_param("amd_pstate", amd_pstate_param);
>>
>> --
>> 2.34.1
>>
@@ -6970,3 +6970,7 @@
management firmware translates the requests into actual
hardware states (core frequency, data fabric and memory
clocks etc.)
+ guided
+ Activate guided autonomous mode. Driver requests minimum
+ performance and maximum performance and the PMFW autonomously
+ selects frequencies in this range.
@@ -50,6 +50,20 @@
#define AMD_PSTATE_TRANSITION_LATENCY 20000
#define AMD_PSTATE_TRANSITION_DELAY 1000
+enum amd_pstate_mode {
+ CPPC_DISABLE = 0,
+ CPPC_PASSIVE,
+ CPPC_GUIDED,
+ CPPC_MODE_MAX,
+};
+
+static const char * const amd_pstate_mode_string[] = {
+ [CPPC_DISABLE] = "disable",
+ [CPPC_PASSIVE] = "passive",
+ [CPPC_GUIDED] = "guided",
+ NULL,
+};
+
/*
* TODO: We need more time to fine tune processors with shared memory solution
* with community together.
@@ -60,7 +74,18 @@
* module parameter to be able to enable it manually for debugging.
*/
static struct cpufreq_driver amd_pstate_driver;
-static int cppc_load __initdata;
+static int cppc_state = CPPC_DISABLE;
+
+static inline int get_mode_idx_from_str(const char *str, size_t size)
+{
+ int i = 0;
+
+ for (; i < CPPC_MODE_MAX; ++i) {
+ if (!strncmp(str, amd_pstate_mode_string[i], size))
+ return i;
+ }
+ return -EINVAL;
+}
static inline int pstate_enable(bool enable)
{
@@ -212,12 +237,18 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
}
static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
- u32 des_perf, u32 max_perf, bool fast_switch)
+ u32 des_perf, u32 max_perf, bool fast_switch, int flags)
{
u64 prev = READ_ONCE(cpudata->cppc_req_cached);
u64 value = prev;
des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
+
+ if (cppc_state == CPPC_GUIDED && flags & CPUFREQ_GOV_DYNAMIC_SWITCHING) {
+ min_perf = des_perf;
+ des_perf = 0;
+ }
+
value &= ~AMD_CPPC_MIN_PERF(~0L);
value |= AMD_CPPC_MIN_PERF(min_perf);
@@ -272,7 +303,7 @@ static int amd_pstate_target(struct cpufreq_policy *policy,
cpufreq_freq_transition_begin(policy, &freqs);
amd_pstate_update(cpudata, min_perf, des_perf,
- max_perf, false);
+ max_perf, false, policy->governor->flags);
cpufreq_freq_transition_end(policy, &freqs, false);
return 0;
@@ -306,7 +337,8 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
if (max_perf < min_perf)
max_perf = min_perf;
- amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true);
+ amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
+ policy->governor->flags);
}
static int amd_get_min_freq(struct amd_cpudata *cpudata)
@@ -627,7 +659,7 @@ static int __init amd_pstate_init(void)
* enable the amd_pstate passive mode driver explicitly
* with amd_pstate=passive in kernel command line
*/
- if (!cppc_load) {
+ if (cppc_state == CPPC_DISABLE) {
pr_debug("driver load is disabled, boot with amd_pstate=passive to enable this\n");
return -ENODEV;
}
@@ -670,16 +702,22 @@ device_initcall(amd_pstate_init);
static int __init amd_pstate_param(char *str)
{
+ int size, mode_idx;
+
if (!str)
return -EINVAL;
- if (!strcmp(str, "disable")) {
- cppc_load = 0;
- pr_info("driver is explicitly disabled\n");
- } else if (!strcmp(str, "passive"))
- cppc_load = 1;
+ size = strlen(str);
+ mode_idx = get_mode_idx_from_str(str, size);
- return 0;
+ if (mode_idx >= CPPC_DISABLE && mode_idx < CPPC_MODE_MAX) {
+ cppc_state = mode_idx;
+ if (cppc_state == CPPC_DISABLE)
+ pr_info("driver is explicitly disabled\n");
+ return 0;
+ }
+
+ return -EINVAL;
}
early_param("amd_pstate", amd_pstate_param);