[1/4] cpufreq: amd_pstate: Add guided autonomous mode

Message ID 20221207154648.233759-2-wyes.karny@amd.com
State New
Headers
Series amd_pstate: Add guided autonomous mode support |

Commit Message

Wyes Karny Dec. 7, 2022, 3:46 p.m. UTC
  From ACPI spec below 3 modes for CPPC can be defined:
1. Non autonomous: OS scaling governor specifies operating frequency/
   performance level through `Desired Performance` register and PMFW
follows that.
2. Guided autonomous: OS scaling governor specifies min and max
   frequencies/ performance levels through `Minimum Performance` and
`Maximum Performance` register, and PMFW can autonomously select an
operating frequency in this range.
3. Fully autonomous: OS only hints (via EPP) to PMFW for the required
   energy performance preference for the workload and PMFW autonomously
scales the frequency.

Currently (1) is supported by amd_pstate as passive mode, and (3) is
implemented by EPP support. This change is to support (2).

In guided autonomous mode the min_perf is based on the input from the
scaling governor. For example, in case of schedutil this value depends
on the current utilization. And max_perf is set to max capacity.

To activate guided auto mode ``amd_pstate=guided`` command line
parameter has to be passed in the kernel.

Signed-off-by: Wyes Karny <wyes.karny@amd.com>
---
 .../admin-guide/kernel-parameters.txt         |  4 ++
 drivers/cpufreq/amd-pstate.c                  | 60 +++++++++++++++----
 2 files changed, 53 insertions(+), 11 deletions(-)
  

Comments

Huang Rui Dec. 9, 2022, 7:43 a.m. UTC | #1
On Wed, Dec 07, 2022 at 11:46:45PM +0800, Karny, Wyes wrote:
> From ACPI spec below 3 modes for CPPC can be defined:
> 1. Non autonomous: OS scaling governor specifies operating frequency/
>    performance level through `Desired Performance` register and PMFW
> follows that.
> 2. Guided autonomous: OS scaling governor specifies min and max
>    frequencies/ performance levels through `Minimum Performance` and
> `Maximum Performance` register, and PMFW can autonomously select an
> operating frequency in this range.
> 3. Fully autonomous: OS only hints (via EPP) to PMFW for the required
>    energy performance preference for the workload and PMFW autonomously
> scales the frequency.
> 
> Currently (1) is supported by amd_pstate as passive mode, and (3) is
> implemented by EPP support. This change is to support (2).
> 
> In guided autonomous mode the min_perf is based on the input from the
> scaling governor. For example, in case of schedutil this value depends
> on the current utilization. And max_perf is set to max capacity.
> 
> To activate guided auto mode ``amd_pstate=guided`` command line
> parameter has to be passed in the kernel.
> 
> Signed-off-by: Wyes Karny <wyes.karny@amd.com>
> ---
>  .../admin-guide/kernel-parameters.txt         |  4 ++
>  drivers/cpufreq/amd-pstate.c                  | 60 +++++++++++++++----
>  2 files changed, 53 insertions(+), 11 deletions(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 42af9ca0127e..75e57afba77e 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -6970,3 +6970,7 @@
>  			  management firmware translates the requests into actual
>  			  hardware states (core frequency, data fabric and memory
>  			  clocks etc.)
> +			guided
> +			  Activate guided autonomous mode. Driver requests minimum
> +			  performance and maximum performance and the PMFW autonomously
> +			  selects frequencies in this range.
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 204e39006dda..05e4003a77ee 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -50,6 +50,20 @@
>  #define AMD_PSTATE_TRANSITION_LATENCY	20000
>  #define AMD_PSTATE_TRANSITION_DELAY	1000
>  
> +enum amd_pstate_mode {
> +	CPPC_DISABLE = 0,
> +	CPPC_PASSIVE,
> +	CPPC_GUIDED,
> +	CPPC_MODE_MAX,
> +};
> +
> +static const char * const amd_pstate_mode_string[] = {
> +	[CPPC_DISABLE]     = "disable",
> +	[CPPC_PASSIVE]     = "passive",
> +	[CPPC_GUIDED]      = "guided",
> +	NULL,
> +};
> +
>  /*
>   * TODO: We need more time to fine tune processors with shared memory solution
>   * with community together.
> @@ -60,7 +74,18 @@
>   * module parameter to be able to enable it manually for debugging.
>   */
>  static struct cpufreq_driver amd_pstate_driver;
> -static int cppc_load __initdata;
> +static int cppc_state = CPPC_DISABLE;
> +
> +static inline int get_mode_idx_from_str(const char *str, size_t size)
> +{
> +	int i = 0;
> +
> +	for (; i < CPPC_MODE_MAX; ++i) {
> +		if (!strncmp(str, amd_pstate_mode_string[i], size))
> +			return i;
> +	}
> +	return -EINVAL;
> +}
>  
>  static inline int pstate_enable(bool enable)
>  {
> @@ -212,12 +237,18 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
>  }
>  
>  static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
> -			      u32 des_perf, u32 max_perf, bool fast_switch)
> +			      u32 des_perf, u32 max_perf, bool fast_switch, int flags)
>  {
>  	u64 prev = READ_ONCE(cpudata->cppc_req_cached);
>  	u64 value = prev;
>  
>  	des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
> +
> +	if (cppc_state == CPPC_GUIDED && flags & CPUFREQ_GOV_DYNAMIC_SWITCHING) {
> +		min_perf = des_perf;
> +		des_perf = 0;
> +	}

Since we would like to modify the min_perf on share memory processors as
well. The current cppc_set_perf() in cppc_acpi doesn't provide the MIN/MAX
values. Could you please add the max_perf/min_perf in cppc_acpi.c as well?
Then the APIs will be available on the share memory processors like Rome.

Thanks,
Ray

> +
>  	value &= ~AMD_CPPC_MIN_PERF(~0L);
>  	value |= AMD_CPPC_MIN_PERF(min_perf);
>  
> @@ -272,7 +303,7 @@ static int amd_pstate_target(struct cpufreq_policy *policy,
>  
>  	cpufreq_freq_transition_begin(policy, &freqs);
>  	amd_pstate_update(cpudata, min_perf, des_perf,
> -			  max_perf, false);
> +			  max_perf, false, policy->governor->flags);
>  	cpufreq_freq_transition_end(policy, &freqs, false);
>  
>  	return 0;
> @@ -306,7 +337,8 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
>  	if (max_perf < min_perf)
>  		max_perf = min_perf;
>  
> -	amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true);
> +	amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
> +			policy->governor->flags);
>  }
>  
>  static int amd_get_min_freq(struct amd_cpudata *cpudata)
> @@ -627,7 +659,7 @@ static int __init amd_pstate_init(void)
>  	 * enable the amd_pstate passive mode driver explicitly
>  	 * with amd_pstate=passive in kernel command line
>  	 */
> -	if (!cppc_load) {
> +	if (cppc_state == CPPC_DISABLE) {
>  		pr_debug("driver load is disabled, boot with amd_pstate=passive to enable this\n");
>  		return -ENODEV;
>  	}
> @@ -670,16 +702,22 @@ device_initcall(amd_pstate_init);
>  
>  static int __init amd_pstate_param(char *str)
>  {
> +	int size, mode_idx;
> +
>  	if (!str)
>  		return -EINVAL;
>  
> -	if (!strcmp(str, "disable")) {
> -		cppc_load = 0;
> -		pr_info("driver is explicitly disabled\n");
> -	} else if (!strcmp(str, "passive"))
> -		cppc_load = 1;
> +	size = strlen(str);
> +	mode_idx = get_mode_idx_from_str(str, size);
>  
> -	return 0;
> +	if (mode_idx >= CPPC_DISABLE && mode_idx < CPPC_MODE_MAX) {
> +		cppc_state = mode_idx;
> +		if (cppc_state == CPPC_DISABLE)
> +			pr_info("driver is explicitly disabled\n");
> +		return 0;
> +	}
> +
> +	return -EINVAL;
>  }
>  early_param("amd_pstate", amd_pstate_param);
>  
> -- 
> 2.34.1
>
  
Wyes Karny Dec. 9, 2022, 10:04 a.m. UTC | #2
Hi Ray,

On 12/9/2022 1:13 PM, Huang Rui wrote:
> On Wed, Dec 07, 2022 at 11:46:45PM +0800, Karny, Wyes wrote:
>> From ACPI spec below 3 modes for CPPC can be defined:
>> 1. Non autonomous: OS scaling governor specifies operating frequency/
>>    performance level through `Desired Performance` register and PMFW
>> follows that.
>> 2. Guided autonomous: OS scaling governor specifies min and max
>>    frequencies/ performance levels through `Minimum Performance` and
>> `Maximum Performance` register, and PMFW can autonomously select an
>> operating frequency in this range.
>> 3. Fully autonomous: OS only hints (via EPP) to PMFW for the required
>>    energy performance preference for the workload and PMFW autonomously
>> scales the frequency.
>>
>> Currently (1) is supported by amd_pstate as passive mode, and (3) is
>> implemented by EPP support. This change is to support (2).
>>
>> In guided autonomous mode the min_perf is based on the input from the
>> scaling governor. For example, in case of schedutil this value depends
>> on the current utilization. And max_perf is set to max capacity.
>>
>> To activate guided auto mode ``amd_pstate=guided`` command line
>> parameter has to be passed in the kernel.
>>
>> Signed-off-by: Wyes Karny <wyes.karny@amd.com>
>> ---
>>  .../admin-guide/kernel-parameters.txt         |  4 ++
>>  drivers/cpufreq/amd-pstate.c                  | 60 +++++++++++++++----
>>  2 files changed, 53 insertions(+), 11 deletions(-)
>>
>> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
>> index 42af9ca0127e..75e57afba77e 100644
>> --- a/Documentation/admin-guide/kernel-parameters.txt
>> +++ b/Documentation/admin-guide/kernel-parameters.txt
>> @@ -6970,3 +6970,7 @@
>>  			  management firmware translates the requests into actual
>>  			  hardware states (core frequency, data fabric and memory
>>  			  clocks etc.)
>> +			guided
>> +			  Activate guided autonomous mode. Driver requests minimum
>> +			  performance and maximum performance and the PMFW autonomously
>> +			  selects frequencies in this range.
>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>> index 204e39006dda..05e4003a77ee 100644
>> --- a/drivers/cpufreq/amd-pstate.c
>> +++ b/drivers/cpufreq/amd-pstate.c
>> @@ -50,6 +50,20 @@
>>  #define AMD_PSTATE_TRANSITION_LATENCY	20000
>>  #define AMD_PSTATE_TRANSITION_DELAY	1000
>>  
>> +enum amd_pstate_mode {
>> +	CPPC_DISABLE = 0,
>> +	CPPC_PASSIVE,
>> +	CPPC_GUIDED,
>> +	CPPC_MODE_MAX,
>> +};
>> +
>> +static const char * const amd_pstate_mode_string[] = {
>> +	[CPPC_DISABLE]     = "disable",
>> +	[CPPC_PASSIVE]     = "passive",
>> +	[CPPC_GUIDED]      = "guided",
>> +	NULL,
>> +};
>> +
>>  /*
>>   * TODO: We need more time to fine tune processors with shared memory solution
>>   * with community together.
>> @@ -60,7 +74,18 @@
>>   * module parameter to be able to enable it manually for debugging.
>>   */
>>  static struct cpufreq_driver amd_pstate_driver;
>> -static int cppc_load __initdata;
>> +static int cppc_state = CPPC_DISABLE;
>> +
>> +static inline int get_mode_idx_from_str(const char *str, size_t size)
>> +{
>> +	int i = 0;
>> +
>> +	for (; i < CPPC_MODE_MAX; ++i) {
>> +		if (!strncmp(str, amd_pstate_mode_string[i], size))
>> +			return i;
>> +	}
>> +	return -EINVAL;
>> +}
>>  
>>  static inline int pstate_enable(bool enable)
>>  {
>> @@ -212,12 +237,18 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
>>  }
>>  
>>  static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
>> -			      u32 des_perf, u32 max_perf, bool fast_switch)
>> +			      u32 des_perf, u32 max_perf, bool fast_switch, int flags)
>>  {
>>  	u64 prev = READ_ONCE(cpudata->cppc_req_cached);
>>  	u64 value = prev;
>>  
>>  	des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
>> +
>> +	if (cppc_state == CPPC_GUIDED && flags & CPUFREQ_GOV_DYNAMIC_SWITCHING) {
>> +		min_perf = des_perf;
>> +		des_perf = 0;
>> +	}
> 
> Since we would like to modify the min_perf on share memory processors as
> well. The current cppc_set_perf() in cppc_acpi doesn't provide the MIN/MAX
> values. Could you please add the max_perf/min_perf in cppc_acpi.c as well?
> Then the APIs will be available on the share memory processors like Rome.

Sure. Thanks for pointing this out.

> 
> Thanks,
> Ray
> 
>> +
>>  	value &= ~AMD_CPPC_MIN_PERF(~0L);
>>  	value |= AMD_CPPC_MIN_PERF(min_perf);
>>  
>> @@ -272,7 +303,7 @@ static int amd_pstate_target(struct cpufreq_policy *policy,
>>  
>>  	cpufreq_freq_transition_begin(policy, &freqs);
>>  	amd_pstate_update(cpudata, min_perf, des_perf,
>> -			  max_perf, false);
>> +			  max_perf, false, policy->governor->flags);
>>  	cpufreq_freq_transition_end(policy, &freqs, false);
>>  
>>  	return 0;
>> @@ -306,7 +337,8 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
>>  	if (max_perf < min_perf)
>>  		max_perf = min_perf;
>>  
>> -	amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true);
>> +	amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
>> +			policy->governor->flags);
>>  }
>>  
>>  static int amd_get_min_freq(struct amd_cpudata *cpudata)
>> @@ -627,7 +659,7 @@ static int __init amd_pstate_init(void)
>>  	 * enable the amd_pstate passive mode driver explicitly
>>  	 * with amd_pstate=passive in kernel command line
>>  	 */
>> -	if (!cppc_load) {
>> +	if (cppc_state == CPPC_DISABLE) {
>>  		pr_debug("driver load is disabled, boot with amd_pstate=passive to enable this\n");
>>  		return -ENODEV;
>>  	}
>> @@ -670,16 +702,22 @@ device_initcall(amd_pstate_init);
>>  
>>  static int __init amd_pstate_param(char *str)
>>  {
>> +	int size, mode_idx;
>> +
>>  	if (!str)
>>  		return -EINVAL;
>>  
>> -	if (!strcmp(str, "disable")) {
>> -		cppc_load = 0;
>> -		pr_info("driver is explicitly disabled\n");
>> -	} else if (!strcmp(str, "passive"))
>> -		cppc_load = 1;
>> +	size = strlen(str);
>> +	mode_idx = get_mode_idx_from_str(str, size);
>>  
>> -	return 0;
>> +	if (mode_idx >= CPPC_DISABLE && mode_idx < CPPC_MODE_MAX) {
>> +		cppc_state = mode_idx;
>> +		if (cppc_state == CPPC_DISABLE)
>> +			pr_info("driver is explicitly disabled\n");
>> +		return 0;
>> +	}
>> +
>> +	return -EINVAL;
>>  }
>>  early_param("amd_pstate", amd_pstate_param);
>>  
>> -- 
>> 2.34.1
>>
  

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 42af9ca0127e..75e57afba77e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6970,3 +6970,7 @@ 
 			  management firmware translates the requests into actual
 			  hardware states (core frequency, data fabric and memory
 			  clocks etc.)
+			guided
+			  Activate guided autonomous mode. Driver requests minimum
+			  performance and maximum performance and the PMFW autonomously
+			  selects frequencies in this range.
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 204e39006dda..05e4003a77ee 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -50,6 +50,20 @@ 
 #define AMD_PSTATE_TRANSITION_LATENCY	20000
 #define AMD_PSTATE_TRANSITION_DELAY	1000
 
+enum amd_pstate_mode {
+	CPPC_DISABLE = 0,
+	CPPC_PASSIVE,
+	CPPC_GUIDED,
+	CPPC_MODE_MAX,
+};
+
+static const char * const amd_pstate_mode_string[] = {
+	[CPPC_DISABLE]     = "disable",
+	[CPPC_PASSIVE]     = "passive",
+	[CPPC_GUIDED]      = "guided",
+	NULL,
+};
+
 /*
  * TODO: We need more time to fine tune processors with shared memory solution
  * with community together.
@@ -60,7 +74,18 @@ 
  * module parameter to be able to enable it manually for debugging.
  */
 static struct cpufreq_driver amd_pstate_driver;
-static int cppc_load __initdata;
+static int cppc_state = CPPC_DISABLE;
+
+static inline int get_mode_idx_from_str(const char *str, size_t size)
+{
+	int i = 0;
+
+	for (; i < CPPC_MODE_MAX; ++i) {
+		if (!strncmp(str, amd_pstate_mode_string[i], size))
+			return i;
+	}
+	return -EINVAL;
+}
 
 static inline int pstate_enable(bool enable)
 {
@@ -212,12 +237,18 @@  static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
 }
 
 static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
-			      u32 des_perf, u32 max_perf, bool fast_switch)
+			      u32 des_perf, u32 max_perf, bool fast_switch, int flags)
 {
 	u64 prev = READ_ONCE(cpudata->cppc_req_cached);
 	u64 value = prev;
 
 	des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
+
+	if (cppc_state == CPPC_GUIDED && flags & CPUFREQ_GOV_DYNAMIC_SWITCHING) {
+		min_perf = des_perf;
+		des_perf = 0;
+	}
+
 	value &= ~AMD_CPPC_MIN_PERF(~0L);
 	value |= AMD_CPPC_MIN_PERF(min_perf);
 
@@ -272,7 +303,7 @@  static int amd_pstate_target(struct cpufreq_policy *policy,
 
 	cpufreq_freq_transition_begin(policy, &freqs);
 	amd_pstate_update(cpudata, min_perf, des_perf,
-			  max_perf, false);
+			  max_perf, false, policy->governor->flags);
 	cpufreq_freq_transition_end(policy, &freqs, false);
 
 	return 0;
@@ -306,7 +337,8 @@  static void amd_pstate_adjust_perf(unsigned int cpu,
 	if (max_perf < min_perf)
 		max_perf = min_perf;
 
-	amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true);
+	amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
+			policy->governor->flags);
 }
 
 static int amd_get_min_freq(struct amd_cpudata *cpudata)
@@ -627,7 +659,7 @@  static int __init amd_pstate_init(void)
 	 * enable the amd_pstate passive mode driver explicitly
 	 * with amd_pstate=passive in kernel command line
 	 */
-	if (!cppc_load) {
+	if (cppc_state == CPPC_DISABLE) {
 		pr_debug("driver load is disabled, boot with amd_pstate=passive to enable this\n");
 		return -ENODEV;
 	}
@@ -670,16 +702,22 @@  device_initcall(amd_pstate_init);
 
 static int __init amd_pstate_param(char *str)
 {
+	int size, mode_idx;
+
 	if (!str)
 		return -EINVAL;
 
-	if (!strcmp(str, "disable")) {
-		cppc_load = 0;
-		pr_info("driver is explicitly disabled\n");
-	} else if (!strcmp(str, "passive"))
-		cppc_load = 1;
+	size = strlen(str);
+	mode_idx = get_mode_idx_from_str(str, size);
 
-	return 0;
+	if (mode_idx >= CPPC_DISABLE && mode_idx < CPPC_MODE_MAX) {
+		cppc_state = mode_idx;
+		if (cppc_state == CPPC_DISABLE)
+			pr_info("driver is explicitly disabled\n");
+		return 0;
+	}
+
+	return -EINVAL;
 }
 early_param("amd_pstate", amd_pstate_param);