[v8,06/13] cpufreq: amd-pstate: implement Pstate EPP support for the AMD processors
Commit Message
From: Perry Yuan <Perry.Yuan@amd.com>
Add EPP driver support for AMD SoCs which support a dedicated MSR for
CPPC. EPP is used by the DPM controller to configure the frequency that
a core operates at during short periods of activity.
The SoC EPP targets are configured on a scale from 0 to 255 where 0
represents maximum performance and 255 represents maximum efficiency.
The amd-pstate driver exports profile string names to userspace that are
tied to specific EPP values.
The balance_performance string (0x80) provides the best balance for
efficiency versus power on most systems, but users can choose other
strings to meet their needs as well.
$ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences
default performance balance_performance balance_power power
$ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_preference
balance_performance
To enable the driver,it needs to add `amd_pstate=active` to kernel
command line and kernel will load the active mode epp driver
Signed-off-by: Perry Yuan <Perry.Yuan@amd.com>
---
drivers/cpufreq/amd-pstate.c | 447 ++++++++++++++++++++++++++++++++++-
include/linux/amd-pstate.h | 10 +
2 files changed, 451 insertions(+), 6 deletions(-)
Comments
On 12/19/22 00:40, Perry Yuan wrote:
> From: Perry Yuan <Perry.Yuan@amd.com>
>
> Add EPP driver support for AMD SoCs which support a dedicated MSR for
> CPPC. EPP is used by the DPM controller to configure the frequency that
> a core operates at during short periods of activity.
>
> The SoC EPP targets are configured on a scale from 0 to 255 where 0
> represents maximum performance and 255 represents maximum efficiency.
>
> The amd-pstate driver exports profile string names to userspace that are
> tied to specific EPP values.
>
> The balance_performance string (0x80) provides the best balance for
> efficiency versus power on most systems, but users can choose other
> strings to meet their needs as well.
>
> $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences
> default performance balance_performance balance_power power
>
> $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_preference
> balance_performance
>
> To enable the driver,it needs to add `amd_pstate=active` to kernel
> command line and kernel will load the active mode epp driver
>
> Signed-off-by: Perry Yuan <Perry.Yuan@amd.com>
> ---
> drivers/cpufreq/amd-pstate.c | 447 ++++++++++++++++++++++++++++++++++-
> include/linux/amd-pstate.h | 10 +
> 2 files changed, 451 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 861a905f9324..66b39457a312 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -59,7 +59,10 @@
> * we disable it by default to go acpi-cpufreq on these processors and add a
> * module parameter to be able to enable it manually for debugging.
> */
> +static struct cpufreq_driver *default_pstate_driver;
> static struct cpufreq_driver amd_pstate_driver;
> +static struct cpufreq_driver amd_pstate_epp_driver;
> +static struct amd_cpudata **all_cpu_data;
> static int cppc_state = AMD_PSTATE_DISABLE;
>
> static inline int get_mode_idx_from_str(const char *str, size_t size)
> @@ -70,9 +73,128 @@ static inline int get_mode_idx_from_str(const char *str, size_t size)
> if (!strncmp(str, amd_pstate_mode_string[i], size))
> return i;
> }
> +
Unrelated whitespace change.
> return -EINVAL;
> }
>
> +/**
> + * struct amd_pstate_params - global parameters for the performance control
> + * @ cppc_boost_disabled wheher the core performance boost disabled
> + */
> +struct amd_pstate_params {
> + bool cppc_boost_disabled;
> +};
> +
> +static struct amd_pstate_params global_params;
> +
> +static DEFINE_MUTEX(amd_pstate_limits_lock);
> +static DEFINE_MUTEX(amd_pstate_driver_lock);
> +
> +static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
> +{
> + u64 epp;
> + int ret;
> +
> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> + if (!cppc_req_cached) {
> + epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
> + &cppc_req_cached);
> + if (epp)
> + return epp;
> + }
> + epp = (cppc_req_cached >> 24) & 0xFF;
> + } else {
> + ret = cppc_get_epp_perf(cpudata->cpu, &epp);
> + if (ret < 0) {
> + pr_debug("Could not retrieve energy perf value (%d)\n", ret);
> + return -EIO;
> + }
> + }
> +
> + return (s16)(epp & 0xff); > +}
> +
> +static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
> +{
> + s16 epp;
> + int index = -EINVAL;
> +
> + epp = amd_pstate_get_epp(cpudata, 0);
> + if (epp < 0)
> + return epp;
> +
> + switch (epp) {
> + case HWP_EPP_PERFORMANCE:
> + index = EPP_INDEX_PERFORMANCE;
> + break;
> + case HWP_EPP_BALANCE_PERFORMANCE:
> + index = EPP_INDEX_BALANCE_PERFORMANCE;
> + break;
> + case HWP_EPP_BALANCE_POWERSAVE:
> + index = EPP_INDEX_BALANCE_POWERSAVE;
> + break;
> + case HWP_EPP_POWERSAVE:
> + index = EPP_INDEX_POWERSAVE;
> + break;
> + default:
> + break;
Extra tab here
> + }
> +
> + return index;
> +}
> +
> +static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
> +{
> + int ret;
> + struct cppc_perf_ctrls perf_ctrls;
> +
> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> + u64 value = READ_ONCE(cpudata->cppc_req_cached);
> +
> + value &= ~GENMASK_ULL(31, 24);
> + value |= (u64)epp << 24;
> + WRITE_ONCE(cpudata->cppc_req_cached, value);
> +
> + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
> + if (!ret)
> + cpudata->epp_cached = epp;
> + } else {
> + perf_ctrls.energy_perf = epp;
> + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
> + if (ret) {
> + pr_debug("failed to set energy perf value (%d)\n", ret);
> + return ret;
> + }
> + cpudata->epp_cached = epp;
> + }
> +
> + return ret;
> +}
> +
> +static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
> + int pref_index)
> +{
> + int epp = -EINVAL;
> + int ret;
> +
> + if (!pref_index) {
> + pr_debug("EPP pref_index is invalid\n");
> + return -EINVAL > + }
> +
> + if (epp == -EINVAL)
> + epp = epp_values[pref_index];
Didn't you just hardcode epp to -EINVAL at the beginning of function?
> +
> + if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> + pr_debug("EPP cannot be set under performance policy\n");
> + return -EBUSY;
> + }
> +
> + ret = amd_pstate_set_epp(cpudata, epp);
> +
> + return ret;
> +}
> +
> static inline int pstate_enable(bool enable)
> {
> return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable);
> @@ -81,11 +203,21 @@ static inline int pstate_enable(bool enable)
> static int cppc_enable(bool enable)
> {
> int cpu, ret = 0;
> + struct cppc_perf_ctrls perf_ctrls;
>
> for_each_present_cpu(cpu) {
> ret = cppc_set_enable(cpu, enable);
> if (ret)
> return ret;
> +
> + /* Enable autonomous mode for EPP */
> + if (cppc_state == AMD_PSTATE_ACTIVE) {
> + /* Set desired perf as zero to allow EPP firmware control */
> + perf_ctrls.desired_perf = 0;
> + ret = cppc_set_perf(cpu, &perf_ctrls);
> + if (ret)
> + return ret;
> + }
> }
>
> return ret;
> @@ -429,7 +561,7 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
> return;
>
> cpudata->boost_supported = true;
> - amd_pstate_driver.boost_enabled = true;
> + default_pstate_driver->boost_enabled = true;
> }
>
> static void amd_perf_ctl_reset(unsigned int cpu)
> @@ -603,10 +735,61 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
> return sprintf(&buf[0], "%u\n", perf);
> }
>
> +static ssize_t show_energy_performance_available_preferences(
> + struct cpufreq_policy *policy, char *buf)
> +{
> + int i = 0;
> + int offset = 0;
> +
> + while (energy_perf_strings[i] != NULL)
> + offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]);
> +
> + sysfs_emit_at(buf, offset, "\n");
> +
> + return offset;
> +}
> +
> +static ssize_t store_energy_performance_preference(
> + struct cpufreq_policy *policy, const char *buf, size_t count)
> +{
> + struct amd_cpudata *cpudata = policy->driver_data;
> + char str_preference[21];
> + ssize_t ret;
> +
> + ret = sscanf(buf, "%20s", str_preference);
> + if (ret != 1)
> + return -EINVAL;
> +
> + ret = match_string(energy_perf_strings, -1, str_preference);
> + if (ret < 0)
> + return -EINVAL;
> +
> + mutex_lock(&amd_pstate_limits_lock);
> + ret = amd_pstate_set_energy_pref_index(cpudata, ret);
> + mutex_unlock(&amd_pstate_limits_lock);
> +
> + return ret ?: count;
> +}
> +
> +static ssize_t show_energy_performance_preference(
> + struct cpufreq_policy *policy, char *buf)
> +{
> + struct amd_cpudata *cpudata = policy->driver_data;
> + int preference;
> +
> + preference = amd_pstate_get_energy_pref_index(cpudata);
> + if (preference < 0)
> + return preference;
> +
> + return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
> +}
> +
> cpufreq_freq_attr_ro(amd_pstate_max_freq);
> cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
>
> cpufreq_freq_attr_ro(amd_pstate_highest_perf);
> +cpufreq_freq_attr_rw(energy_performance_preference);
> +cpufreq_freq_attr_ro(energy_performance_available_preferences);
>
> static struct freq_attr *amd_pstate_attr[] = {
> &amd_pstate_max_freq,
> @@ -615,6 +798,235 @@ static struct freq_attr *amd_pstate_attr[] = {
> NULL,
> };
>
> +static struct freq_attr *amd_pstate_epp_attr[] = {
> + &amd_pstate_max_freq,
> + &amd_pstate_lowest_nonlinear_freq,
> + &amd_pstate_highest_perf,
> + &energy_performance_preference,
> + &energy_performance_available_preferences,
> + NULL,
> +};
> +
> +static inline void update_boost_state(void)
> +{
> + u64 misc_en;
> + struct amd_cpudata *cpudata;
> +
> + cpudata = all_cpu_data[0];
> + rdmsrl(MSR_K7_HWCR, misc_en);
> + global_params.cppc_boost_disabled = misc_en & BIT_ULL(25);
> +}
> +
> +static int amd_pstate_init_cpu(unsigned int cpunum)
> +{
> + struct amd_cpudata *cpudata;
> +
> + cpudata = all_cpu_data[cpunum];
> + if (!cpudata) {
> + cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
> + if (!cpudata)
> + return -ENOMEM;
> + WRITE_ONCE(all_cpu_data[cpunum], cpudata);
> +
> + cpudata->cpu = cpunum;
> + }
> +
> + cpudata->epp_policy = 0;
> + pr_debug("controlling: cpu %d\n", cpunum);
> + return 0;
> +}
> +
> +static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
> +{
> + int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
> + struct amd_cpudata *cpudata;
> + struct device *dev;
> + int rc;
> + u64 value;
> +
> + rc = amd_pstate_init_cpu(policy->cpu);
> + if (rc)
> + return rc;
> +
> + cpudata = all_cpu_data[policy->cpu];
> +
> + dev = get_cpu_device(policy->cpu);
> + if (!dev)
> + goto free_cpudata1;
> +
> + rc = amd_pstate_init_perf(cpudata);
> + if (rc)
> + goto free_cpudata1;
> +
> + min_freq = amd_get_min_freq(cpudata);
> + max_freq = amd_get_max_freq(cpudata);
> + nominal_freq = amd_get_nominal_freq(cpudata);
> + lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
> + if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
> + dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
> + min_freq, max_freq);
> + ret = -EINVAL;
> + goto free_cpudata1;
> + }
> +
> + policy->min = min_freq;
> + policy->max = max_freq;
> +
> + policy->cpuinfo.min_freq = min_freq;
> + policy->cpuinfo.max_freq = max_freq;
> + /* It will be updated by governor */
> + policy->cur = policy->cpuinfo.min_freq;
> +
> + /* Initial processor data capability frequencies */
> + cpudata->max_freq = max_freq;
> + cpudata->min_freq = min_freq;
> + cpudata->nominal_freq = nominal_freq;
> + cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
> +
> + policy->driver_data = cpudata;
> +
> + cpudata->epp_cached = amd_pstate_get_epp(cpudata, value);
> +
> + policy->min = policy->cpuinfo.min_freq;
> + policy->max = policy->cpuinfo.max_freq;
> +
> + /*
> + * Set the policy to powersave to provide a valid fallback value in case
> + * the default cpufreq governor is neither powersave nor performance.
> + */
> + policy->policy = CPUFREQ_POLICY_POWERSAVE;
> +
> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> + policy->fast_switch_possible = true;
> + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
> + if (ret)
> + return ret;
> + WRITE_ONCE(cpudata->cppc_req_cached, value);
> +
> + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
> + if (ret)
> + return ret;
> + WRITE_ONCE(cpudata->cppc_cap1_cached, value);
> + }
> + amd_pstate_boost_init(cpudata);
> +
> + return 0;
> +
> +free_cpudata1:
> + kfree(cpudata);
> + return ret;
> +}
> +
> +static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
> +{
> + pr_debug("CPU %d exiting\n", policy->cpu);
> + policy->fast_switch_possible = false;
> + return 0;
> +}
> +
> +static void amd_pstate_update_max_freq(unsigned int cpu)
> +{
> + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
> +
> + if (!policy)
> + return;
> +
> + refresh_frequency_limits(policy);
> + cpufreq_cpu_put(policy);
> +}
> +
> +static void amd_pstate_epp_update_limits(unsigned int cpu)
> +{
> + mutex_lock(&amd_pstate_driver_lock);
> + update_boost_state();
> + if (global_params.cppc_boost_disabled) {
> + for_each_possible_cpu(cpu)
> + amd_pstate_update_max_freq(cpu);
> + } else {
> + cpufreq_update_policy(cpu);
> + }
> + mutex_unlock(&amd_pstate_driver_lock);
> +}
> +
> +static void amd_pstate_epp_init(unsigned int cpu)
> +{
> + struct amd_cpudata *cpudata = all_cpu_data[cpu];
> + u32 max_perf, min_perf;
> + u64 value;
> + s16 epp;
> +
> + max_perf = READ_ONCE(cpudata->highest_perf);
> + min_perf = READ_ONCE(cpudata->lowest_perf);
> +
> + value = READ_ONCE(cpudata->cppc_req_cached);
> +
> + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
> + min_perf = max_perf;
> +
> + /* Initial min/max values for CPPC Performance Controls Register */
> + value &= ~AMD_CPPC_MIN_PERF(~0L);
> + value |= AMD_CPPC_MIN_PERF(min_perf);
> +
> + value &= ~AMD_CPPC_MAX_PERF(~0L);
> + value |= AMD_CPPC_MAX_PERF(max_perf);
> +
> + /* CPPC EPP feature require to set zero to the desire perf bit */
> + value &= ~AMD_CPPC_DES_PERF(~0L);
> + value |= AMD_CPPC_DES_PERF(0);
> +
> + if (cpudata->epp_policy == cpudata->policy)
> + goto skip_epp;
> +
> + cpudata->epp_policy = cpudata->policy;
> +
> + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> + epp = amd_pstate_get_epp(cpudata, value);
> + if (epp < 0)
> + goto skip_epp;
> + /* force the epp value to be zero for performance policy */
> + epp = 0;
> + } else {
> + /* Get BIOS pre-defined epp value */
> + epp = amd_pstate_get_epp(cpudata, value);
> + if (epp)
> + goto skip_epp;
> + }
> + /* Set initial EPP value */
> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> + value &= ~GENMASK_ULL(31, 24);
> + value |= (u64)epp << 24;
> + }
> +
> +skip_epp:
> + WRITE_ONCE(cpudata->cppc_req_cached, value);
> + amd_pstate_set_epp(cpudata, epp);
> +}
> +
> +static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
> +{
> + struct amd_cpudata *cpudata;
> +
> + if (!policy->cpuinfo.max_freq)
> + return -ENODEV;
> +
> + pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
> + policy->cpuinfo.max_freq, policy->max);
> +
> + cpudata = all_cpu_data[policy->cpu];
> + cpudata->policy = policy->policy;
> +
> + amd_pstate_epp_init(policy->cpu);
> +
> + return 0;
> +}
> +
> +static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
> +{
> + cpufreq_verify_within_cpu_limits(policy);
> + pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
> + return 0;
> +}
> +
> static struct cpufreq_driver amd_pstate_driver = {
> .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
> .verify = amd_pstate_verify,
> @@ -628,8 +1040,20 @@ static struct cpufreq_driver amd_pstate_driver = {
> .attr = amd_pstate_attr,
> };
>
> +static struct cpufreq_driver amd_pstate_epp_driver = {
> + .flags = CPUFREQ_CONST_LOOPS,
> + .verify = amd_pstate_epp_verify_policy,
> + .setpolicy = amd_pstate_epp_set_policy,
> + .init = amd_pstate_epp_cpu_init,
> + .exit = amd_pstate_epp_cpu_exit,
> + .update_limits = amd_pstate_epp_update_limits,
> + .name = "amd_pstate_epp",
> + .attr = amd_pstate_epp_attr,
> +};
> +
> static int __init amd_pstate_init(void)
> {
> + static struct amd_cpudata **cpudata;
> int ret;
>
> if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
> @@ -656,7 +1080,8 @@ static int __init amd_pstate_init(void)
> /* capability check */
> if (boot_cpu_has(X86_FEATURE_CPPC)) {
> pr_debug("AMD CPPC MSR based functionality is supported\n");
> - amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
> + if (cppc_state == AMD_PSTATE_PASSIVE)
> + default_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
> } else {
> pr_debug("AMD CPPC shared memory based functionality is supported\n");
> static_call_update(amd_pstate_enable, cppc_enable);
> @@ -664,17 +1089,21 @@ static int __init amd_pstate_init(void)
> static_call_update(amd_pstate_update_perf, cppc_update_perf);
> }
>
> + cpudata = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
> + if (!cpudata)
> + return -ENOMEM;
> + WRITE_ONCE(all_cpu_data, cpudata);
> +
> /* enable amd pstate feature */
> ret = amd_pstate_enable(true);
> if (ret) {
> - pr_err("failed to enable amd-pstate with return %d\n", ret);
> + pr_err("failed to enable with return %d\n", ret);
> return ret;
> }
>
> - ret = cpufreq_register_driver(&amd_pstate_driver);
> + ret = cpufreq_register_driver(default_pstate_driver);
> if (ret)
> - pr_err("failed to register amd_pstate_driver with return %d\n",
> - ret);
> + pr_err("failed to register with return %d\n", ret);
>
> return ret;
> }
> @@ -696,6 +1125,12 @@ static int __init amd_pstate_param(char *str)
> if (cppc_state == AMD_PSTATE_DISABLE)
> pr_info("driver is explicitly disabled\n");
>
> + if (cppc_state == AMD_PSTATE_ACTIVE)
> + default_pstate_driver = &amd_pstate_epp_driver;
> +
> + if (cppc_state == AMD_PSTATE_PASSIVE)
> + default_pstate_driver = &amd_pstate_driver;
> +
> return 0;
> }
>
> diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
> index 922d05a13902..fe1aef743c09 100644
> --- a/include/linux/amd-pstate.h
> +++ b/include/linux/amd-pstate.h
> @@ -47,6 +47,10 @@ struct amd_aperf_mperf {
> * @prev: Last Aperf/Mperf/tsc count value read from register
> * @freq: current cpu frequency value
> * @boost_supported: check whether the Processor or SBIOS supports boost mode
> + * @epp_policy: Last saved policy used to set energy-performance preference
> + * @epp_cached: Cached CPPC energy-performance preference value
> + * @policy: Cpufreq policy value
> + * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value
> *
> * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
> * represents all the attributes and goals that AMD P-State requests at runtime.
> @@ -72,6 +76,12 @@ struct amd_cpudata {
>
> u64 freq;
> bool boost_supported;
> +
> + /* EPP feature related attributes*/
> + s16 epp_policy;
> + s16 epp_cached;
> + u32 policy;
> + u64 cppc_cap1_cached;
> };
>
> /**
On Mon, Dec 19, 2022 at 02:40:35PM +0800, Yuan, Perry wrote:
> From: Perry Yuan <Perry.Yuan@amd.com>
>
> Add EPP driver support for AMD SoCs which support a dedicated MSR for
> CPPC. EPP is used by the DPM controller to configure the frequency that
> a core operates at during short periods of activity.
>
> The SoC EPP targets are configured on a scale from 0 to 255 where 0
> represents maximum performance and 255 represents maximum efficiency.
>
> The amd-pstate driver exports profile string names to userspace that are
> tied to specific EPP values.
>
> The balance_performance string (0x80) provides the best balance for
> efficiency versus power on most systems, but users can choose other
> strings to meet their needs as well.
>
> $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences
> default performance balance_performance balance_power power
>
> $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_preference
> balance_performance
>
> To enable the driver,it needs to add `amd_pstate=active` to kernel
> command line and kernel will load the active mode epp driver
>
Please check the comments in V7's reply:
https://lore.kernel.org/lkml/Y6VVr+WYqwWb6XV0@amd.com/
I think the static call is not hard required at this moment.
But the boost/refresh_freq_limits stuff and cpudata may still need some
enhancement. Others, looks good for me right now.
Thanks,
Ray
> Signed-off-by: Perry Yuan <Perry.Yuan@amd.com>
> ---
> drivers/cpufreq/amd-pstate.c | 447 ++++++++++++++++++++++++++++++++++-
> include/linux/amd-pstate.h | 10 +
> 2 files changed, 451 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 861a905f9324..66b39457a312 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -59,7 +59,10 @@
> * we disable it by default to go acpi-cpufreq on these processors and add a
> * module parameter to be able to enable it manually for debugging.
> */
> +static struct cpufreq_driver *default_pstate_driver;
> static struct cpufreq_driver amd_pstate_driver;
> +static struct cpufreq_driver amd_pstate_epp_driver;
> +static struct amd_cpudata **all_cpu_data;
> static int cppc_state = AMD_PSTATE_DISABLE;
>
> static inline int get_mode_idx_from_str(const char *str, size_t size)
> @@ -70,9 +73,128 @@ static inline int get_mode_idx_from_str(const char *str, size_t size)
> if (!strncmp(str, amd_pstate_mode_string[i], size))
> return i;
> }
> +
> return -EINVAL;
> }
>
> +/**
> + * struct amd_pstate_params - global parameters for the performance control
> + * @ cppc_boost_disabled wheher the core performance boost disabled
> + */
> +struct amd_pstate_params {
> + bool cppc_boost_disabled;
> +};
> +
> +static struct amd_pstate_params global_params;
> +
> +static DEFINE_MUTEX(amd_pstate_limits_lock);
> +static DEFINE_MUTEX(amd_pstate_driver_lock);
> +
> +static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
> +{
> + u64 epp;
> + int ret;
> +
> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> + if (!cppc_req_cached) {
> + epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
> + &cppc_req_cached);
> + if (epp)
> + return epp;
> + }
> + epp = (cppc_req_cached >> 24) & 0xFF;
> + } else {
> + ret = cppc_get_epp_perf(cpudata->cpu, &epp);
> + if (ret < 0) {
> + pr_debug("Could not retrieve energy perf value (%d)\n", ret);
> + return -EIO;
> + }
> + }
> +
> + return (s16)(epp & 0xff);
> +}
> +
> +static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
> +{
> + s16 epp;
> + int index = -EINVAL;
> +
> + epp = amd_pstate_get_epp(cpudata, 0);
> + if (epp < 0)
> + return epp;
> +
> + switch (epp) {
> + case HWP_EPP_PERFORMANCE:
> + index = EPP_INDEX_PERFORMANCE;
> + break;
> + case HWP_EPP_BALANCE_PERFORMANCE:
> + index = EPP_INDEX_BALANCE_PERFORMANCE;
> + break;
> + case HWP_EPP_BALANCE_POWERSAVE:
> + index = EPP_INDEX_BALANCE_POWERSAVE;
> + break;
> + case HWP_EPP_POWERSAVE:
> + index = EPP_INDEX_POWERSAVE;
> + break;
> + default:
> + break;
> + }
> +
> + return index;
> +}
> +
> +static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
> +{
> + int ret;
> + struct cppc_perf_ctrls perf_ctrls;
> +
> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> + u64 value = READ_ONCE(cpudata->cppc_req_cached);
> +
> + value &= ~GENMASK_ULL(31, 24);
> + value |= (u64)epp << 24;
> + WRITE_ONCE(cpudata->cppc_req_cached, value);
> +
> + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
> + if (!ret)
> + cpudata->epp_cached = epp;
> + } else {
> + perf_ctrls.energy_perf = epp;
> + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
> + if (ret) {
> + pr_debug("failed to set energy perf value (%d)\n", ret);
> + return ret;
> + }
> + cpudata->epp_cached = epp;
> + }
> +
> + return ret;
> +}
> +
> +static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
> + int pref_index)
> +{
> + int epp = -EINVAL;
> + int ret;
> +
> + if (!pref_index) {
> + pr_debug("EPP pref_index is invalid\n");
> + return -EINVAL;
> + }
> +
> + if (epp == -EINVAL)
> + epp = epp_values[pref_index];
> +
> + if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> + pr_debug("EPP cannot be set under performance policy\n");
> + return -EBUSY;
> + }
> +
> + ret = amd_pstate_set_epp(cpudata, epp);
> +
> + return ret;
> +}
> +
> static inline int pstate_enable(bool enable)
> {
> return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable);
> @@ -81,11 +203,21 @@ static inline int pstate_enable(bool enable)
> static int cppc_enable(bool enable)
> {
> int cpu, ret = 0;
> + struct cppc_perf_ctrls perf_ctrls;
>
> for_each_present_cpu(cpu) {
> ret = cppc_set_enable(cpu, enable);
> if (ret)
> return ret;
> +
> + /* Enable autonomous mode for EPP */
> + if (cppc_state == AMD_PSTATE_ACTIVE) {
> + /* Set desired perf as zero to allow EPP firmware control */
> + perf_ctrls.desired_perf = 0;
> + ret = cppc_set_perf(cpu, &perf_ctrls);
> + if (ret)
> + return ret;
> + }
> }
>
> return ret;
> @@ -429,7 +561,7 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
> return;
>
> cpudata->boost_supported = true;
> - amd_pstate_driver.boost_enabled = true;
> + default_pstate_driver->boost_enabled = true;
> }
>
> static void amd_perf_ctl_reset(unsigned int cpu)
> @@ -603,10 +735,61 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
> return sprintf(&buf[0], "%u\n", perf);
> }
>
> +static ssize_t show_energy_performance_available_preferences(
> + struct cpufreq_policy *policy, char *buf)
> +{
> + int i = 0;
> + int offset = 0;
> +
> + while (energy_perf_strings[i] != NULL)
> + offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]);
> +
> + sysfs_emit_at(buf, offset, "\n");
> +
> + return offset;
> +}
> +
> +static ssize_t store_energy_performance_preference(
> + struct cpufreq_policy *policy, const char *buf, size_t count)
> +{
> + struct amd_cpudata *cpudata = policy->driver_data;
> + char str_preference[21];
> + ssize_t ret;
> +
> + ret = sscanf(buf, "%20s", str_preference);
> + if (ret != 1)
> + return -EINVAL;
> +
> + ret = match_string(energy_perf_strings, -1, str_preference);
> + if (ret < 0)
> + return -EINVAL;
> +
> + mutex_lock(&amd_pstate_limits_lock);
> + ret = amd_pstate_set_energy_pref_index(cpudata, ret);
> + mutex_unlock(&amd_pstate_limits_lock);
> +
> + return ret ?: count;
> +}
> +
> +static ssize_t show_energy_performance_preference(
> + struct cpufreq_policy *policy, char *buf)
> +{
> + struct amd_cpudata *cpudata = policy->driver_data;
> + int preference;
> +
> + preference = amd_pstate_get_energy_pref_index(cpudata);
> + if (preference < 0)
> + return preference;
> +
> + return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
> +}
> +
> cpufreq_freq_attr_ro(amd_pstate_max_freq);
> cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
>
> cpufreq_freq_attr_ro(amd_pstate_highest_perf);
> +cpufreq_freq_attr_rw(energy_performance_preference);
> +cpufreq_freq_attr_ro(energy_performance_available_preferences);
>
> static struct freq_attr *amd_pstate_attr[] = {
> &amd_pstate_max_freq,
> @@ -615,6 +798,235 @@ static struct freq_attr *amd_pstate_attr[] = {
> NULL,
> };
>
> +static struct freq_attr *amd_pstate_epp_attr[] = {
> + &amd_pstate_max_freq,
> + &amd_pstate_lowest_nonlinear_freq,
> + &amd_pstate_highest_perf,
> + &energy_performance_preference,
> + &energy_performance_available_preferences,
> + NULL,
> +};
> +
> +static inline void update_boost_state(void)
> +{
> + u64 misc_en;
> + struct amd_cpudata *cpudata;
> +
> + cpudata = all_cpu_data[0];
> + rdmsrl(MSR_K7_HWCR, misc_en);
> + global_params.cppc_boost_disabled = misc_en & BIT_ULL(25);
> +}
> +
> +static int amd_pstate_init_cpu(unsigned int cpunum)
> +{
> + struct amd_cpudata *cpudata;
> +
> + cpudata = all_cpu_data[cpunum];
> + if (!cpudata) {
> + cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
> + if (!cpudata)
> + return -ENOMEM;
> + WRITE_ONCE(all_cpu_data[cpunum], cpudata);
> +
> + cpudata->cpu = cpunum;
> + }
> +
> + cpudata->epp_policy = 0;
> + pr_debug("controlling: cpu %d\n", cpunum);
> + return 0;
> +}
> +
> +static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
> +{
> + int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
> + struct amd_cpudata *cpudata;
> + struct device *dev;
> + int rc;
> + u64 value;
> +
> + rc = amd_pstate_init_cpu(policy->cpu);
> + if (rc)
> + return rc;
> +
> + cpudata = all_cpu_data[policy->cpu];
> +
> + dev = get_cpu_device(policy->cpu);
> + if (!dev)
> + goto free_cpudata1;
> +
> + rc = amd_pstate_init_perf(cpudata);
> + if (rc)
> + goto free_cpudata1;
> +
> + min_freq = amd_get_min_freq(cpudata);
> + max_freq = amd_get_max_freq(cpudata);
> + nominal_freq = amd_get_nominal_freq(cpudata);
> + lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
> + if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
> + dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
> + min_freq, max_freq);
> + ret = -EINVAL;
> + goto free_cpudata1;
> + }
> +
> + policy->min = min_freq;
> + policy->max = max_freq;
> +
> + policy->cpuinfo.min_freq = min_freq;
> + policy->cpuinfo.max_freq = max_freq;
> + /* It will be updated by governor */
> + policy->cur = policy->cpuinfo.min_freq;
> +
> + /* Initial processor data capability frequencies */
> + cpudata->max_freq = max_freq;
> + cpudata->min_freq = min_freq;
> + cpudata->nominal_freq = nominal_freq;
> + cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
> +
> + policy->driver_data = cpudata;
> +
> + cpudata->epp_cached = amd_pstate_get_epp(cpudata, value);
> +
> + policy->min = policy->cpuinfo.min_freq;
> + policy->max = policy->cpuinfo.max_freq;
> +
> + /*
> + * Set the policy to powersave to provide a valid fallback value in case
> + * the default cpufreq governor is neither powersave nor performance.
> + */
> + policy->policy = CPUFREQ_POLICY_POWERSAVE;
> +
> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> + policy->fast_switch_possible = true;
> + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
> + if (ret)
> + return ret;
> + WRITE_ONCE(cpudata->cppc_req_cached, value);
> +
> + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
> + if (ret)
> + return ret;
> + WRITE_ONCE(cpudata->cppc_cap1_cached, value);
> + }
> + amd_pstate_boost_init(cpudata);
> +
> + return 0;
> +
> +free_cpudata1:
> + kfree(cpudata);
> + return ret;
> +}
> +
> +static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
> +{
> + pr_debug("CPU %d exiting\n", policy->cpu);
> + policy->fast_switch_possible = false;
> + return 0;
> +}
> +
> +static void amd_pstate_update_max_freq(unsigned int cpu)
> +{
> + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
> +
> + if (!policy)
> + return;
> +
> + refresh_frequency_limits(policy);
> + cpufreq_cpu_put(policy);
> +}
> +
> +static void amd_pstate_epp_update_limits(unsigned int cpu)
> +{
> + mutex_lock(&amd_pstate_driver_lock);
> + update_boost_state();
> + if (global_params.cppc_boost_disabled) {
> + for_each_possible_cpu(cpu)
> + amd_pstate_update_max_freq(cpu);
> + } else {
> + cpufreq_update_policy(cpu);
> + }
> + mutex_unlock(&amd_pstate_driver_lock);
> +}
> +
> +static void amd_pstate_epp_init(unsigned int cpu)
> +{
> + struct amd_cpudata *cpudata = all_cpu_data[cpu];
> + u32 max_perf, min_perf;
> + u64 value;
> + s16 epp;
> +
> + max_perf = READ_ONCE(cpudata->highest_perf);
> + min_perf = READ_ONCE(cpudata->lowest_perf);
> +
> + value = READ_ONCE(cpudata->cppc_req_cached);
> +
> + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
> + min_perf = max_perf;
> +
> + /* Initial min/max values for CPPC Performance Controls Register */
> + value &= ~AMD_CPPC_MIN_PERF(~0L);
> + value |= AMD_CPPC_MIN_PERF(min_perf);
> +
> + value &= ~AMD_CPPC_MAX_PERF(~0L);
> + value |= AMD_CPPC_MAX_PERF(max_perf);
> +
> + /* CPPC EPP feature require to set zero to the desire perf bit */
> + value &= ~AMD_CPPC_DES_PERF(~0L);
> + value |= AMD_CPPC_DES_PERF(0);
> +
> + if (cpudata->epp_policy == cpudata->policy)
> + goto skip_epp;
> +
> + cpudata->epp_policy = cpudata->policy;
> +
> + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> + epp = amd_pstate_get_epp(cpudata, value);
> + if (epp < 0)
> + goto skip_epp;
> + /* force the epp value to be zero for performance policy */
> + epp = 0;
> + } else {
> + /* Get BIOS pre-defined epp value */
> + epp = amd_pstate_get_epp(cpudata, value);
> + if (epp)
> + goto skip_epp;
> + }
> + /* Set initial EPP value */
> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> + value &= ~GENMASK_ULL(31, 24);
> + value |= (u64)epp << 24;
> + }
> +
> +skip_epp:
> + WRITE_ONCE(cpudata->cppc_req_cached, value);
> + amd_pstate_set_epp(cpudata, epp);
> +}
> +
> +static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
> +{
> + struct amd_cpudata *cpudata;
> +
> + if (!policy->cpuinfo.max_freq)
> + return -ENODEV;
> +
> + pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
> + policy->cpuinfo.max_freq, policy->max);
> +
> + cpudata = all_cpu_data[policy->cpu];
> + cpudata->policy = policy->policy;
> +
> + amd_pstate_epp_init(policy->cpu);
> +
> + return 0;
> +}
> +
> +static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
> +{
> + cpufreq_verify_within_cpu_limits(policy);
> + pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
> + return 0;
> +}
> +
> static struct cpufreq_driver amd_pstate_driver = {
> .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
> .verify = amd_pstate_verify,
> @@ -628,8 +1040,20 @@ static struct cpufreq_driver amd_pstate_driver = {
> .attr = amd_pstate_attr,
> };
>
> +static struct cpufreq_driver amd_pstate_epp_driver = {
> + .flags = CPUFREQ_CONST_LOOPS,
> + .verify = amd_pstate_epp_verify_policy,
> + .setpolicy = amd_pstate_epp_set_policy,
> + .init = amd_pstate_epp_cpu_init,
> + .exit = amd_pstate_epp_cpu_exit,
> + .update_limits = amd_pstate_epp_update_limits,
> + .name = "amd_pstate_epp",
> + .attr = amd_pstate_epp_attr,
> +};
> +
> static int __init amd_pstate_init(void)
> {
> + static struct amd_cpudata **cpudata;
> int ret;
>
> if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
> @@ -656,7 +1080,8 @@ static int __init amd_pstate_init(void)
> /* capability check */
> if (boot_cpu_has(X86_FEATURE_CPPC)) {
> pr_debug("AMD CPPC MSR based functionality is supported\n");
> - amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
> + if (cppc_state == AMD_PSTATE_PASSIVE)
> + default_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
> } else {
> pr_debug("AMD CPPC shared memory based functionality is supported\n");
> static_call_update(amd_pstate_enable, cppc_enable);
> @@ -664,17 +1089,21 @@ static int __init amd_pstate_init(void)
> static_call_update(amd_pstate_update_perf, cppc_update_perf);
> }
>
> + cpudata = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
> + if (!cpudata)
> + return -ENOMEM;
> + WRITE_ONCE(all_cpu_data, cpudata);
> +
> /* enable amd pstate feature */
> ret = amd_pstate_enable(true);
> if (ret) {
> - pr_err("failed to enable amd-pstate with return %d\n", ret);
> + pr_err("failed to enable with return %d\n", ret);
> return ret;
> }
>
> - ret = cpufreq_register_driver(&amd_pstate_driver);
> + ret = cpufreq_register_driver(default_pstate_driver);
> if (ret)
> - pr_err("failed to register amd_pstate_driver with return %d\n",
> - ret);
> + pr_err("failed to register with return %d\n", ret);
>
> return ret;
> }
> @@ -696,6 +1125,12 @@ static int __init amd_pstate_param(char *str)
> if (cppc_state == AMD_PSTATE_DISABLE)
> pr_info("driver is explicitly disabled\n");
>
> + if (cppc_state == AMD_PSTATE_ACTIVE)
> + default_pstate_driver = &amd_pstate_epp_driver;
> +
> + if (cppc_state == AMD_PSTATE_PASSIVE)
> + default_pstate_driver = &amd_pstate_driver;
> +
> return 0;
> }
>
> diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
> index 922d05a13902..fe1aef743c09 100644
> --- a/include/linux/amd-pstate.h
> +++ b/include/linux/amd-pstate.h
> @@ -47,6 +47,10 @@ struct amd_aperf_mperf {
> * @prev: Last Aperf/Mperf/tsc count value read from register
> * @freq: current cpu frequency value
> * @boost_supported: check whether the Processor or SBIOS supports boost mode
> + * @epp_policy: Last saved policy used to set energy-performance preference
> + * @epp_cached: Cached CPPC energy-performance preference value
> + * @policy: Cpufreq policy value
> + * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value
> *
> * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
> * represents all the attributes and goals that AMD P-State requests at runtime.
> @@ -72,6 +76,12 @@ struct amd_cpudata {
>
> u64 freq;
> bool boost_supported;
> +
> + /* EPP feature related attributes*/
> + s16 epp_policy;
> + s16 epp_cached;
> + u32 policy;
> + u64 cppc_cap1_cached;
> };
>
> /**
> --
> 2.34.1
>
[AMD Official Use Only - General]
Hi Ray.
> -----Original Message-----
> From: Huang, Ray <Ray.Huang@amd.com>
> Sent: Friday, December 23, 2022 3:43 PM
> To: Yuan, Perry <Perry.Yuan@amd.com>
> Cc: rafael.j.wysocki@intel.com; Limonciello, Mario
> <Mario.Limonciello@amd.com>; viresh.kumar@linaro.org; Sharma, Deepak
> <Deepak.Sharma@amd.com>; Fontenot, Nathan
> <Nathan.Fontenot@amd.com>; Deucher, Alexander
> <Alexander.Deucher@amd.com>; Huang, Shimmer
> <Shimmer.Huang@amd.com>; Du, Xiaojian <Xiaojian.Du@amd.com>; Meng,
> Li (Jassmine) <Li.Meng@amd.com>; Karny, Wyes <Wyes.Karny@amd.com>;
> linux-pm@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH v8 06/13] cpufreq: amd-pstate: implement Pstate EPP
> support for the AMD processors
>
> On Mon, Dec 19, 2022 at 02:40:35PM +0800, Yuan, Perry wrote:
> > From: Perry Yuan <Perry.Yuan@amd.com>
> >
> > Add EPP driver support for AMD SoCs which support a dedicated MSR for
> > CPPC. EPP is used by the DPM controller to configure the frequency
> > that a core operates at during short periods of activity.
> >
> > The SoC EPP targets are configured on a scale from 0 to 255 where 0
> > represents maximum performance and 255 represents maximum
> efficiency.
> >
> > The amd-pstate driver exports profile string names to userspace that
> > are tied to specific EPP values.
> >
> > The balance_performance string (0x80) provides the best balance for
> > efficiency versus power on most systems, but users can choose other
> > strings to meet their needs as well.
> >
> > $ cat
> >
> /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_
> p
> > references default performance balance_performance balance_power
> power
> >
> > $ cat
> >
> /sys/devices/system/cpu/cpufreq/policy0/energy_performance_preferenc
> e
> > balance_performance
> >
> > To enable the driver,it needs to add `amd_pstate=active` to kernel
> > command line and kernel will load the active mode epp driver
> >
>
> Please check the comments in V7's reply:
>
> https://lore.kernel.org/lkml/Y6VVr+WYqwWb6XV0@amd.com/
>
> I think the static call is not hard required at this moment.
>
> But the boost/refresh_freq_limits stuff and cpudata may still need some
> enhancement. Others, looks good for me right now.
>
> Thanks,
> Ray
Thanks for your quick review at this hard time.
I will rework the patch as your suggestion in V9.
Perry.
>
> > Signed-off-by: Perry Yuan <Perry.Yuan@amd.com>
> > ---
> > drivers/cpufreq/amd-pstate.c | 447
> ++++++++++++++++++++++++++++++++++-
> > include/linux/amd-pstate.h | 10 +
> > 2 files changed, 451 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/cpufreq/amd-pstate.c
> > b/drivers/cpufreq/amd-pstate.c index 861a905f9324..66b39457a312 100644
> > --- a/drivers/cpufreq/amd-pstate.c
> > +++ b/drivers/cpufreq/amd-pstate.c
> > @@ -59,7 +59,10 @@
> > * we disable it by default to go acpi-cpufreq on these processors and add
> a
> > * module parameter to be able to enable it manually for debugging.
> > */
> > +static struct cpufreq_driver *default_pstate_driver;
> > static struct cpufreq_driver amd_pstate_driver;
> > +static struct cpufreq_driver amd_pstate_epp_driver; static struct
> > +amd_cpudata **all_cpu_data;
> > static int cppc_state = AMD_PSTATE_DISABLE;
> >
> > static inline int get_mode_idx_from_str(const char *str, size_t size)
> > @@ -70,9 +73,128 @@ static inline int get_mode_idx_from_str(const char
> *str, size_t size)
> > if (!strncmp(str, amd_pstate_mode_string[i], size))
> > return i;
> > }
> > +
> > return -EINVAL;
> > }
> >
> > +/**
> > + * struct amd_pstate_params - global parameters for the performance
> > +control
> > + * @ cppc_boost_disabled wheher the core performance boost disabled
> > +*/ struct amd_pstate_params {
> > + bool cppc_boost_disabled;
> > +};
> > +
> > +static struct amd_pstate_params global_params;
> > +
> > +static DEFINE_MUTEX(amd_pstate_limits_lock);
> > +static DEFINE_MUTEX(amd_pstate_driver_lock);
> > +
> > +static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64
> > +cppc_req_cached) {
> > + u64 epp;
> > + int ret;
> > +
> > + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > + if (!cppc_req_cached) {
> > + epp = rdmsrl_on_cpu(cpudata->cpu,
> MSR_AMD_CPPC_REQ,
> > + &cppc_req_cached);
> > + if (epp)
> > + return epp;
> > + }
> > + epp = (cppc_req_cached >> 24) & 0xFF;
> > + } else {
> > + ret = cppc_get_epp_perf(cpudata->cpu, &epp);
> > + if (ret < 0) {
> > + pr_debug("Could not retrieve energy perf value
> (%d)\n", ret);
> > + return -EIO;
> > + }
> > + }
> > +
> > + return (s16)(epp & 0xff);
> > +}
> > +
> > +static int amd_pstate_get_energy_pref_index(struct amd_cpudata
> > +*cpudata) {
> > + s16 epp;
> > + int index = -EINVAL;
> > +
> > + epp = amd_pstate_get_epp(cpudata, 0);
> > + if (epp < 0)
> > + return epp;
> > +
> > + switch (epp) {
> > + case HWP_EPP_PERFORMANCE:
> > + index = EPP_INDEX_PERFORMANCE;
> > + break;
> > + case HWP_EPP_BALANCE_PERFORMANCE:
> > + index = EPP_INDEX_BALANCE_PERFORMANCE;
> > + break;
> > + case HWP_EPP_BALANCE_POWERSAVE:
> > + index = EPP_INDEX_BALANCE_POWERSAVE;
> > + break;
> > + case HWP_EPP_POWERSAVE:
> > + index = EPP_INDEX_POWERSAVE;
> > + break;
> > + default:
> > + break;
> > + }
> > +
> > + return index;
> > +}
> > +
> > +static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) {
> > + int ret;
> > + struct cppc_perf_ctrls perf_ctrls;
> > +
> > + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > + u64 value = READ_ONCE(cpudata->cppc_req_cached);
> > +
> > + value &= ~GENMASK_ULL(31, 24);
> > + value |= (u64)epp << 24;
> > + WRITE_ONCE(cpudata->cppc_req_cached, value);
> > +
> > + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
> value);
> > + if (!ret)
> > + cpudata->epp_cached = epp;
> > + } else {
> > + perf_ctrls.energy_perf = epp;
> > + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
> > + if (ret) {
> > + pr_debug("failed to set energy perf value (%d)\n",
> ret);
> > + return ret;
> > + }
> > + cpudata->epp_cached = epp;
> > + }
> > +
> > + return ret;
> > +}
> > +
> > +static int amd_pstate_set_energy_pref_index(struct amd_cpudata
> *cpudata,
> > + int pref_index)
> > +{
> > + int epp = -EINVAL;
> > + int ret;
> > +
> > + if (!pref_index) {
> > + pr_debug("EPP pref_index is invalid\n");
> > + return -EINVAL;
> > + }
> > +
> > + if (epp == -EINVAL)
> > + epp = epp_values[pref_index];
> > +
> > + if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
> {
> > + pr_debug("EPP cannot be set under performance policy\n");
> > + return -EBUSY;
> > + }
> > +
> > + ret = amd_pstate_set_epp(cpudata, epp);
> > +
> > + return ret;
> > +}
> > +
> > static inline int pstate_enable(bool enable) {
> > return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable); @@ -81,11
> +203,21
> > @@ static inline int pstate_enable(bool enable) static int
> > cppc_enable(bool enable) {
> > int cpu, ret = 0;
> > + struct cppc_perf_ctrls perf_ctrls;
> >
> > for_each_present_cpu(cpu) {
> > ret = cppc_set_enable(cpu, enable);
> > if (ret)
> > return ret;
> > +
> > + /* Enable autonomous mode for EPP */
> > + if (cppc_state == AMD_PSTATE_ACTIVE) {
> > + /* Set desired perf as zero to allow EPP firmware
> control */
> > + perf_ctrls.desired_perf = 0;
> > + ret = cppc_set_perf(cpu, &perf_ctrls);
> > + if (ret)
> > + return ret;
> > + }
> > }
> >
> > return ret;
> > @@ -429,7 +561,7 @@ static void amd_pstate_boost_init(struct
> amd_cpudata *cpudata)
> > return;
> >
> > cpudata->boost_supported = true;
> > - amd_pstate_driver.boost_enabled = true;
> > + default_pstate_driver->boost_enabled = true;
> > }
> >
> > static void amd_perf_ctl_reset(unsigned int cpu) @@ -603,10 +735,61
> > @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy
> *policy,
> > return sprintf(&buf[0], "%u\n", perf); }
> >
> > +static ssize_t show_energy_performance_available_preferences(
> > + struct cpufreq_policy *policy, char *buf) {
> > + int i = 0;
> > + int offset = 0;
> > +
> > + while (energy_perf_strings[i] != NULL)
> > + offset += sysfs_emit_at(buf, offset, "%s ",
> > +energy_perf_strings[i++]);
> > +
> > + sysfs_emit_at(buf, offset, "\n");
> > +
> > + return offset;
> > +}
> > +
> > +static ssize_t store_energy_performance_preference(
> > + struct cpufreq_policy *policy, const char *buf, size_t count) {
> > + struct amd_cpudata *cpudata = policy->driver_data;
> > + char str_preference[21];
> > + ssize_t ret;
> > +
> > + ret = sscanf(buf, "%20s", str_preference);
> > + if (ret != 1)
> > + return -EINVAL;
> > +
> > + ret = match_string(energy_perf_strings, -1, str_preference);
> > + if (ret < 0)
> > + return -EINVAL;
> > +
> > + mutex_lock(&amd_pstate_limits_lock);
> > + ret = amd_pstate_set_energy_pref_index(cpudata, ret);
> > + mutex_unlock(&amd_pstate_limits_lock);
> > +
> > + return ret ?: count;
> > +}
> > +
> > +static ssize_t show_energy_performance_preference(
> > + struct cpufreq_policy *policy, char *buf) {
> > + struct amd_cpudata *cpudata = policy->driver_data;
> > + int preference;
> > +
> > + preference = amd_pstate_get_energy_pref_index(cpudata);
> > + if (preference < 0)
> > + return preference;
> > +
> > + return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); }
> > +
> > cpufreq_freq_attr_ro(amd_pstate_max_freq);
> > cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
> >
> > cpufreq_freq_attr_ro(amd_pstate_highest_perf);
> > +cpufreq_freq_attr_rw(energy_performance_preference);
> > +cpufreq_freq_attr_ro(energy_performance_available_preferences);
> >
> > static struct freq_attr *amd_pstate_attr[] = {
> > &amd_pstate_max_freq,
> > @@ -615,6 +798,235 @@ static struct freq_attr *amd_pstate_attr[] = {
> > NULL,
> > };
> >
> > +static struct freq_attr *amd_pstate_epp_attr[] = {
> > + &amd_pstate_max_freq,
> > + &amd_pstate_lowest_nonlinear_freq,
> > + &amd_pstate_highest_perf,
> > + &energy_performance_preference,
> > + &energy_performance_available_preferences,
> > + NULL,
> > +};
> > +
> > +static inline void update_boost_state(void) {
> > + u64 misc_en;
> > + struct amd_cpudata *cpudata;
> > +
> > + cpudata = all_cpu_data[0];
> > + rdmsrl(MSR_K7_HWCR, misc_en);
> > + global_params.cppc_boost_disabled = misc_en & BIT_ULL(25); }
> > +
> > +static int amd_pstate_init_cpu(unsigned int cpunum) {
> > + struct amd_cpudata *cpudata;
> > +
> > + cpudata = all_cpu_data[cpunum];
> > + if (!cpudata) {
> > + cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
> > + if (!cpudata)
> > + return -ENOMEM;
> > + WRITE_ONCE(all_cpu_data[cpunum], cpudata);
> > +
> > + cpudata->cpu = cpunum;
> > + }
> > +
> > + cpudata->epp_policy = 0;
> > + pr_debug("controlling: cpu %d\n", cpunum);
> > + return 0;
> > +}
> > +
> > +static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) {
> > + int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
> > + struct amd_cpudata *cpudata;
> > + struct device *dev;
> > + int rc;
> > + u64 value;
> > +
> > + rc = amd_pstate_init_cpu(policy->cpu);
> > + if (rc)
> > + return rc;
> > +
> > + cpudata = all_cpu_data[policy->cpu];
> > +
> > + dev = get_cpu_device(policy->cpu);
> > + if (!dev)
> > + goto free_cpudata1;
> > +
> > + rc = amd_pstate_init_perf(cpudata);
> > + if (rc)
> > + goto free_cpudata1;
> > +
> > + min_freq = amd_get_min_freq(cpudata);
> > + max_freq = amd_get_max_freq(cpudata);
> > + nominal_freq = amd_get_nominal_freq(cpudata);
> > + lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
> > + if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
> > + dev_err(dev, "min_freq(%d) or max_freq(%d) value is
> incorrect\n",
> > + min_freq, max_freq);
> > + ret = -EINVAL;
> > + goto free_cpudata1;
> > + }
> > +
> > + policy->min = min_freq;
> > + policy->max = max_freq;
> > +
> > + policy->cpuinfo.min_freq = min_freq;
> > + policy->cpuinfo.max_freq = max_freq;
> > + /* It will be updated by governor */
> > + policy->cur = policy->cpuinfo.min_freq;
> > +
> > + /* Initial processor data capability frequencies */
> > + cpudata->max_freq = max_freq;
> > + cpudata->min_freq = min_freq;
> > + cpudata->nominal_freq = nominal_freq;
> > + cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
> > +
> > + policy->driver_data = cpudata;
> > +
> > + cpudata->epp_cached = amd_pstate_get_epp(cpudata, value);
> > +
> > + policy->min = policy->cpuinfo.min_freq;
> > + policy->max = policy->cpuinfo.max_freq;
> > +
> > + /*
> > + * Set the policy to powersave to provide a valid fallback value in case
> > + * the default cpufreq governor is neither powersave nor
> performance.
> > + */
> > + policy->policy = CPUFREQ_POLICY_POWERSAVE;
> > +
> > + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > + policy->fast_switch_possible = true;
> > + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
> &value);
> > + if (ret)
> > + return ret;
> > + WRITE_ONCE(cpudata->cppc_req_cached, value);
> > +
> > + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
> &value);
> > + if (ret)
> > + return ret;
> > + WRITE_ONCE(cpudata->cppc_cap1_cached, value);
> > + }
> > + amd_pstate_boost_init(cpudata);
> > +
> > + return 0;
> > +
> > +free_cpudata1:
> > + kfree(cpudata);
> > + return ret;
> > +}
> > +
> > +static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) {
> > + pr_debug("CPU %d exiting\n", policy->cpu);
> > + policy->fast_switch_possible = false;
> > + return 0;
> > +}
> > +
> > +static void amd_pstate_update_max_freq(unsigned int cpu) {
> > + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
> > +
> > + if (!policy)
> > + return;
> > +
> > + refresh_frequency_limits(policy);
> > + cpufreq_cpu_put(policy);
> > +}
> > +
> > +static void amd_pstate_epp_update_limits(unsigned int cpu) {
> > + mutex_lock(&amd_pstate_driver_lock);
> > + update_boost_state();
> > + if (global_params.cppc_boost_disabled) {
> > + for_each_possible_cpu(cpu)
> > + amd_pstate_update_max_freq(cpu);
> > + } else {
> > + cpufreq_update_policy(cpu);
> > + }
> > + mutex_unlock(&amd_pstate_driver_lock);
> > +}
> > +
> > +static void amd_pstate_epp_init(unsigned int cpu) {
> > + struct amd_cpudata *cpudata = all_cpu_data[cpu];
> > + u32 max_perf, min_perf;
> > + u64 value;
> > + s16 epp;
> > +
> > + max_perf = READ_ONCE(cpudata->highest_perf);
> > + min_perf = READ_ONCE(cpudata->lowest_perf);
> > +
> > + value = READ_ONCE(cpudata->cppc_req_cached);
> > +
> > + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
> > + min_perf = max_perf;
> > +
> > + /* Initial min/max values for CPPC Performance Controls Register */
> > + value &= ~AMD_CPPC_MIN_PERF(~0L);
> > + value |= AMD_CPPC_MIN_PERF(min_perf);
> > +
> > + value &= ~AMD_CPPC_MAX_PERF(~0L);
> > + value |= AMD_CPPC_MAX_PERF(max_perf);
> > +
> > + /* CPPC EPP feature require to set zero to the desire perf bit */
> > + value &= ~AMD_CPPC_DES_PERF(~0L);
> > + value |= AMD_CPPC_DES_PERF(0);
> > +
> > + if (cpudata->epp_policy == cpudata->policy)
> > + goto skip_epp;
> > +
> > + cpudata->epp_policy = cpudata->policy;
> > +
> > + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> > + epp = amd_pstate_get_epp(cpudata, value);
> > + if (epp < 0)
> > + goto skip_epp;
> > + /* force the epp value to be zero for performance policy */
> > + epp = 0;
> > + } else {
> > + /* Get BIOS pre-defined epp value */
> > + epp = amd_pstate_get_epp(cpudata, value);
> > + if (epp)
> > + goto skip_epp;
> > + }
> > + /* Set initial EPP value */
> > + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > + value &= ~GENMASK_ULL(31, 24);
> > + value |= (u64)epp << 24;
> > + }
> > +
> > +skip_epp:
> > + WRITE_ONCE(cpudata->cppc_req_cached, value);
> > + amd_pstate_set_epp(cpudata, epp);
> > +}
> > +
> > +static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) {
> > + struct amd_cpudata *cpudata;
> > +
> > + if (!policy->cpuinfo.max_freq)
> > + return -ENODEV;
> > +
> > + pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
> > + policy->cpuinfo.max_freq, policy->max);
> > +
> > + cpudata = all_cpu_data[policy->cpu];
> > + cpudata->policy = policy->policy;
> > +
> > + amd_pstate_epp_init(policy->cpu);
> > +
> > + return 0;
> > +}
> > +
> > +static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data
> > +*policy) {
> > + cpufreq_verify_within_cpu_limits(policy);
> > + pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy-
> >min);
> > + return 0;
> > +}
> > +
> > static struct cpufreq_driver amd_pstate_driver = {
> > .flags = CPUFREQ_CONST_LOOPS |
> CPUFREQ_NEED_UPDATE_LIMITS,
> > .verify = amd_pstate_verify,
> > @@ -628,8 +1040,20 @@ static struct cpufreq_driver amd_pstate_driver =
> {
> > .attr = amd_pstate_attr,
> > };
> >
> > +static struct cpufreq_driver amd_pstate_epp_driver = {
> > + .flags = CPUFREQ_CONST_LOOPS,
> > + .verify = amd_pstate_epp_verify_policy,
> > + .setpolicy = amd_pstate_epp_set_policy,
> > + .init = amd_pstate_epp_cpu_init,
> > + .exit = amd_pstate_epp_cpu_exit,
> > + .update_limits = amd_pstate_epp_update_limits,
> > + .name = "amd_pstate_epp",
> > + .attr = amd_pstate_epp_attr,
> > +};
> > +
> > static int __init amd_pstate_init(void) {
> > + static struct amd_cpudata **cpudata;
> > int ret;
> >
> > if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) @@ -656,7
> +1080,8 @@
> > static int __init amd_pstate_init(void)
> > /* capability check */
> > if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > pr_debug("AMD CPPC MSR based functionality is
> supported\n");
> > - amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
> > + if (cppc_state == AMD_PSTATE_PASSIVE)
> > + default_pstate_driver->adjust_perf =
> amd_pstate_adjust_perf;
> > } else {
> > pr_debug("AMD CPPC shared memory based functionality is
> supported\n");
> > static_call_update(amd_pstate_enable, cppc_enable); @@ -
> 664,17
> > +1089,21 @@ static int __init amd_pstate_init(void)
> > static_call_update(amd_pstate_update_perf,
> cppc_update_perf);
> > }
> >
> > + cpudata = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
> > + if (!cpudata)
> > + return -ENOMEM;
> > + WRITE_ONCE(all_cpu_data, cpudata);
> > +
> > /* enable amd pstate feature */
> > ret = amd_pstate_enable(true);
> > if (ret) {
> > - pr_err("failed to enable amd-pstate with return %d\n", ret);
> > + pr_err("failed to enable with return %d\n", ret);
> > return ret;
> > }
> >
> > - ret = cpufreq_register_driver(&amd_pstate_driver);
> > + ret = cpufreq_register_driver(default_pstate_driver);
> > if (ret)
> > - pr_err("failed to register amd_pstate_driver with
> return %d\n",
> > - ret);
> > + pr_err("failed to register with return %d\n", ret);
> >
> > return ret;
> > }
> > @@ -696,6 +1125,12 @@ static int __init amd_pstate_param(char *str)
> > if (cppc_state == AMD_PSTATE_DISABLE)
> > pr_info("driver is explicitly disabled\n");
> >
> > + if (cppc_state == AMD_PSTATE_ACTIVE)
> > + default_pstate_driver = &amd_pstate_epp_driver;
> > +
> > + if (cppc_state == AMD_PSTATE_PASSIVE)
> > + default_pstate_driver = &amd_pstate_driver;
> > +
> > return 0;
> > }
> >
> > diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
> > index 922d05a13902..fe1aef743c09 100644
> > --- a/include/linux/amd-pstate.h
> > +++ b/include/linux/amd-pstate.h
> > @@ -47,6 +47,10 @@ struct amd_aperf_mperf {
> > * @prev: Last Aperf/Mperf/tsc count value read from register
> > * @freq: current cpu frequency value
> > * @boost_supported: check whether the Processor or SBIOS supports
> > boost mode
> > + * @epp_policy: Last saved policy used to set energy-performance
> > + preference
> > + * @epp_cached: Cached CPPC energy-performance preference value
> > + * @policy: Cpufreq policy value
> > + * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value
> > *
> > * The amd_cpudata is key private data for each CPU thread in AMD P-
> State, and
> > * represents all the attributes and goals that AMD P-State requests at
> runtime.
> > @@ -72,6 +76,12 @@ struct amd_cpudata {
> >
> > u64 freq;
> > bool boost_supported;
> > +
> > + /* EPP feature related attributes*/
> > + s16 epp_policy;
> > + s16 epp_cached;
> > + u32 policy;
> > + u64 cppc_cap1_cached;
> > };
> >
> > /**
> > --
> > 2.34.1
> >
@@ -59,7 +59,10 @@
* we disable it by default to go acpi-cpufreq on these processors and add a
* module parameter to be able to enable it manually for debugging.
*/
+static struct cpufreq_driver *default_pstate_driver;
static struct cpufreq_driver amd_pstate_driver;
+static struct cpufreq_driver amd_pstate_epp_driver;
+static struct amd_cpudata **all_cpu_data;
static int cppc_state = AMD_PSTATE_DISABLE;
static inline int get_mode_idx_from_str(const char *str, size_t size)
@@ -70,9 +73,128 @@ static inline int get_mode_idx_from_str(const char *str, size_t size)
if (!strncmp(str, amd_pstate_mode_string[i], size))
return i;
}
+
return -EINVAL;
}
+/**
+ * struct amd_pstate_params - global parameters for the performance control
+ * @ cppc_boost_disabled wheher the core performance boost disabled
+ */
+struct amd_pstate_params {
+ bool cppc_boost_disabled;
+};
+
+static struct amd_pstate_params global_params;
+
+static DEFINE_MUTEX(amd_pstate_limits_lock);
+static DEFINE_MUTEX(amd_pstate_driver_lock);
+
+static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
+{
+ u64 epp;
+ int ret;
+
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ if (!cppc_req_cached) {
+ epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
+ &cppc_req_cached);
+ if (epp)
+ return epp;
+ }
+ epp = (cppc_req_cached >> 24) & 0xFF;
+ } else {
+ ret = cppc_get_epp_perf(cpudata->cpu, &epp);
+ if (ret < 0) {
+ pr_debug("Could not retrieve energy perf value (%d)\n", ret);
+ return -EIO;
+ }
+ }
+
+ return (s16)(epp & 0xff);
+}
+
+static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
+{
+ s16 epp;
+ int index = -EINVAL;
+
+ epp = amd_pstate_get_epp(cpudata, 0);
+ if (epp < 0)
+ return epp;
+
+ switch (epp) {
+ case HWP_EPP_PERFORMANCE:
+ index = EPP_INDEX_PERFORMANCE;
+ break;
+ case HWP_EPP_BALANCE_PERFORMANCE:
+ index = EPP_INDEX_BALANCE_PERFORMANCE;
+ break;
+ case HWP_EPP_BALANCE_POWERSAVE:
+ index = EPP_INDEX_BALANCE_POWERSAVE;
+ break;
+ case HWP_EPP_POWERSAVE:
+ index = EPP_INDEX_POWERSAVE;
+ break;
+ default:
+ break;
+ }
+
+ return index;
+}
+
+static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
+{
+ int ret;
+ struct cppc_perf_ctrls perf_ctrls;
+
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ u64 value = READ_ONCE(cpudata->cppc_req_cached);
+
+ value &= ~GENMASK_ULL(31, 24);
+ value |= (u64)epp << 24;
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
+
+ ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
+ if (!ret)
+ cpudata->epp_cached = epp;
+ } else {
+ perf_ctrls.energy_perf = epp;
+ ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
+ if (ret) {
+ pr_debug("failed to set energy perf value (%d)\n", ret);
+ return ret;
+ }
+ cpudata->epp_cached = epp;
+ }
+
+ return ret;
+}
+
+static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
+ int pref_index)
+{
+ int epp = -EINVAL;
+ int ret;
+
+ if (!pref_index) {
+ pr_debug("EPP pref_index is invalid\n");
+ return -EINVAL;
+ }
+
+ if (epp == -EINVAL)
+ epp = epp_values[pref_index];
+
+ if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
+ pr_debug("EPP cannot be set under performance policy\n");
+ return -EBUSY;
+ }
+
+ ret = amd_pstate_set_epp(cpudata, epp);
+
+ return ret;
+}
+
static inline int pstate_enable(bool enable)
{
return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable);
@@ -81,11 +203,21 @@ static inline int pstate_enable(bool enable)
static int cppc_enable(bool enable)
{
int cpu, ret = 0;
+ struct cppc_perf_ctrls perf_ctrls;
for_each_present_cpu(cpu) {
ret = cppc_set_enable(cpu, enable);
if (ret)
return ret;
+
+ /* Enable autonomous mode for EPP */
+ if (cppc_state == AMD_PSTATE_ACTIVE) {
+ /* Set desired perf as zero to allow EPP firmware control */
+ perf_ctrls.desired_perf = 0;
+ ret = cppc_set_perf(cpu, &perf_ctrls);
+ if (ret)
+ return ret;
+ }
}
return ret;
@@ -429,7 +561,7 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
return;
cpudata->boost_supported = true;
- amd_pstate_driver.boost_enabled = true;
+ default_pstate_driver->boost_enabled = true;
}
static void amd_perf_ctl_reset(unsigned int cpu)
@@ -603,10 +735,61 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
return sprintf(&buf[0], "%u\n", perf);
}
+static ssize_t show_energy_performance_available_preferences(
+ struct cpufreq_policy *policy, char *buf)
+{
+ int i = 0;
+ int offset = 0;
+
+ while (energy_perf_strings[i] != NULL)
+ offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]);
+
+ sysfs_emit_at(buf, offset, "\n");
+
+ return offset;
+}
+
+static ssize_t store_energy_performance_preference(
+ struct cpufreq_policy *policy, const char *buf, size_t count)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ char str_preference[21];
+ ssize_t ret;
+
+ ret = sscanf(buf, "%20s", str_preference);
+ if (ret != 1)
+ return -EINVAL;
+
+ ret = match_string(energy_perf_strings, -1, str_preference);
+ if (ret < 0)
+ return -EINVAL;
+
+ mutex_lock(&amd_pstate_limits_lock);
+ ret = amd_pstate_set_energy_pref_index(cpudata, ret);
+ mutex_unlock(&amd_pstate_limits_lock);
+
+ return ret ?: count;
+}
+
+static ssize_t show_energy_performance_preference(
+ struct cpufreq_policy *policy, char *buf)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ int preference;
+
+ preference = amd_pstate_get_energy_pref_index(cpudata);
+ if (preference < 0)
+ return preference;
+
+ return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
+}
+
cpufreq_freq_attr_ro(amd_pstate_max_freq);
cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
cpufreq_freq_attr_ro(amd_pstate_highest_perf);
+cpufreq_freq_attr_rw(energy_performance_preference);
+cpufreq_freq_attr_ro(energy_performance_available_preferences);
static struct freq_attr *amd_pstate_attr[] = {
&amd_pstate_max_freq,
@@ -615,6 +798,235 @@ static struct freq_attr *amd_pstate_attr[] = {
NULL,
};
+static struct freq_attr *amd_pstate_epp_attr[] = {
+ &amd_pstate_max_freq,
+ &amd_pstate_lowest_nonlinear_freq,
+ &amd_pstate_highest_perf,
+ &energy_performance_preference,
+ &energy_performance_available_preferences,
+ NULL,
+};
+
+static inline void update_boost_state(void)
+{
+ u64 misc_en;
+ struct amd_cpudata *cpudata;
+
+ cpudata = all_cpu_data[0];
+ rdmsrl(MSR_K7_HWCR, misc_en);
+ global_params.cppc_boost_disabled = misc_en & BIT_ULL(25);
+}
+
+static int amd_pstate_init_cpu(unsigned int cpunum)
+{
+ struct amd_cpudata *cpudata;
+
+ cpudata = all_cpu_data[cpunum];
+ if (!cpudata) {
+ cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
+ if (!cpudata)
+ return -ENOMEM;
+ WRITE_ONCE(all_cpu_data[cpunum], cpudata);
+
+ cpudata->cpu = cpunum;
+ }
+
+ cpudata->epp_policy = 0;
+ pr_debug("controlling: cpu %d\n", cpunum);
+ return 0;
+}
+
+static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
+{
+ int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
+ struct amd_cpudata *cpudata;
+ struct device *dev;
+ int rc;
+ u64 value;
+
+ rc = amd_pstate_init_cpu(policy->cpu);
+ if (rc)
+ return rc;
+
+ cpudata = all_cpu_data[policy->cpu];
+
+ dev = get_cpu_device(policy->cpu);
+ if (!dev)
+ goto free_cpudata1;
+
+ rc = amd_pstate_init_perf(cpudata);
+ if (rc)
+ goto free_cpudata1;
+
+ min_freq = amd_get_min_freq(cpudata);
+ max_freq = amd_get_max_freq(cpudata);
+ nominal_freq = amd_get_nominal_freq(cpudata);
+ lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
+ if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
+ dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
+ min_freq, max_freq);
+ ret = -EINVAL;
+ goto free_cpudata1;
+ }
+
+ policy->min = min_freq;
+ policy->max = max_freq;
+
+ policy->cpuinfo.min_freq = min_freq;
+ policy->cpuinfo.max_freq = max_freq;
+ /* It will be updated by governor */
+ policy->cur = policy->cpuinfo.min_freq;
+
+ /* Initial processor data capability frequencies */
+ cpudata->max_freq = max_freq;
+ cpudata->min_freq = min_freq;
+ cpudata->nominal_freq = nominal_freq;
+ cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
+
+ policy->driver_data = cpudata;
+
+ cpudata->epp_cached = amd_pstate_get_epp(cpudata, value);
+
+ policy->min = policy->cpuinfo.min_freq;
+ policy->max = policy->cpuinfo.max_freq;
+
+ /*
+ * Set the policy to powersave to provide a valid fallback value in case
+ * the default cpufreq governor is neither powersave nor performance.
+ */
+ policy->policy = CPUFREQ_POLICY_POWERSAVE;
+
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ policy->fast_switch_possible = true;
+ ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
+ if (ret)
+ return ret;
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
+
+ ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
+ if (ret)
+ return ret;
+ WRITE_ONCE(cpudata->cppc_cap1_cached, value);
+ }
+ amd_pstate_boost_init(cpudata);
+
+ return 0;
+
+free_cpudata1:
+ kfree(cpudata);
+ return ret;
+}
+
+static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
+{
+ pr_debug("CPU %d exiting\n", policy->cpu);
+ policy->fast_switch_possible = false;
+ return 0;
+}
+
+static void amd_pstate_update_max_freq(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+
+ if (!policy)
+ return;
+
+ refresh_frequency_limits(policy);
+ cpufreq_cpu_put(policy);
+}
+
+static void amd_pstate_epp_update_limits(unsigned int cpu)
+{
+ mutex_lock(&amd_pstate_driver_lock);
+ update_boost_state();
+ if (global_params.cppc_boost_disabled) {
+ for_each_possible_cpu(cpu)
+ amd_pstate_update_max_freq(cpu);
+ } else {
+ cpufreq_update_policy(cpu);
+ }
+ mutex_unlock(&amd_pstate_driver_lock);
+}
+
+static void amd_pstate_epp_init(unsigned int cpu)
+{
+ struct amd_cpudata *cpudata = all_cpu_data[cpu];
+ u32 max_perf, min_perf;
+ u64 value;
+ s16 epp;
+
+ max_perf = READ_ONCE(cpudata->highest_perf);
+ min_perf = READ_ONCE(cpudata->lowest_perf);
+
+ value = READ_ONCE(cpudata->cppc_req_cached);
+
+ if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
+ min_perf = max_perf;
+
+ /* Initial min/max values for CPPC Performance Controls Register */
+ value &= ~AMD_CPPC_MIN_PERF(~0L);
+ value |= AMD_CPPC_MIN_PERF(min_perf);
+
+ value &= ~AMD_CPPC_MAX_PERF(~0L);
+ value |= AMD_CPPC_MAX_PERF(max_perf);
+
+ /* CPPC EPP feature require to set zero to the desire perf bit */
+ value &= ~AMD_CPPC_DES_PERF(~0L);
+ value |= AMD_CPPC_DES_PERF(0);
+
+ if (cpudata->epp_policy == cpudata->policy)
+ goto skip_epp;
+
+ cpudata->epp_policy = cpudata->policy;
+
+ if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
+ epp = amd_pstate_get_epp(cpudata, value);
+ if (epp < 0)
+ goto skip_epp;
+ /* force the epp value to be zero for performance policy */
+ epp = 0;
+ } else {
+ /* Get BIOS pre-defined epp value */
+ epp = amd_pstate_get_epp(cpudata, value);
+ if (epp)
+ goto skip_epp;
+ }
+ /* Set initial EPP value */
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ value &= ~GENMASK_ULL(31, 24);
+ value |= (u64)epp << 24;
+ }
+
+skip_epp:
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
+ amd_pstate_set_epp(cpudata, epp);
+}
+
+static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata;
+
+ if (!policy->cpuinfo.max_freq)
+ return -ENODEV;
+
+ pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
+ policy->cpuinfo.max_freq, policy->max);
+
+ cpudata = all_cpu_data[policy->cpu];
+ cpudata->policy = policy->policy;
+
+ amd_pstate_epp_init(policy->cpu);
+
+ return 0;
+}
+
+static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
+{
+ cpufreq_verify_within_cpu_limits(policy);
+ pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
+ return 0;
+}
+
static struct cpufreq_driver amd_pstate_driver = {
.flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
.verify = amd_pstate_verify,
@@ -628,8 +1040,20 @@ static struct cpufreq_driver amd_pstate_driver = {
.attr = amd_pstate_attr,
};
+static struct cpufreq_driver amd_pstate_epp_driver = {
+ .flags = CPUFREQ_CONST_LOOPS,
+ .verify = amd_pstate_epp_verify_policy,
+ .setpolicy = amd_pstate_epp_set_policy,
+ .init = amd_pstate_epp_cpu_init,
+ .exit = amd_pstate_epp_cpu_exit,
+ .update_limits = amd_pstate_epp_update_limits,
+ .name = "amd_pstate_epp",
+ .attr = amd_pstate_epp_attr,
+};
+
static int __init amd_pstate_init(void)
{
+ static struct amd_cpudata **cpudata;
int ret;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
@@ -656,7 +1080,8 @@ static int __init amd_pstate_init(void)
/* capability check */
if (boot_cpu_has(X86_FEATURE_CPPC)) {
pr_debug("AMD CPPC MSR based functionality is supported\n");
- amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
+ if (cppc_state == AMD_PSTATE_PASSIVE)
+ default_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
} else {
pr_debug("AMD CPPC shared memory based functionality is supported\n");
static_call_update(amd_pstate_enable, cppc_enable);
@@ -664,17 +1089,21 @@ static int __init amd_pstate_init(void)
static_call_update(amd_pstate_update_perf, cppc_update_perf);
}
+ cpudata = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
+ if (!cpudata)
+ return -ENOMEM;
+ WRITE_ONCE(all_cpu_data, cpudata);
+
/* enable amd pstate feature */
ret = amd_pstate_enable(true);
if (ret) {
- pr_err("failed to enable amd-pstate with return %d\n", ret);
+ pr_err("failed to enable with return %d\n", ret);
return ret;
}
- ret = cpufreq_register_driver(&amd_pstate_driver);
+ ret = cpufreq_register_driver(default_pstate_driver);
if (ret)
- pr_err("failed to register amd_pstate_driver with return %d\n",
- ret);
+ pr_err("failed to register with return %d\n", ret);
return ret;
}
@@ -696,6 +1125,12 @@ static int __init amd_pstate_param(char *str)
if (cppc_state == AMD_PSTATE_DISABLE)
pr_info("driver is explicitly disabled\n");
+ if (cppc_state == AMD_PSTATE_ACTIVE)
+ default_pstate_driver = &amd_pstate_epp_driver;
+
+ if (cppc_state == AMD_PSTATE_PASSIVE)
+ default_pstate_driver = &amd_pstate_driver;
+
return 0;
}
@@ -47,6 +47,10 @@ struct amd_aperf_mperf {
* @prev: Last Aperf/Mperf/tsc count value read from register
* @freq: current cpu frequency value
* @boost_supported: check whether the Processor or SBIOS supports boost mode
+ * @epp_policy: Last saved policy used to set energy-performance preference
+ * @epp_cached: Cached CPPC energy-performance preference value
+ * @policy: Cpufreq policy value
+ * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value
*
* The amd_cpudata is key private data for each CPU thread in AMD P-State, and
* represents all the attributes and goals that AMD P-State requests at runtime.
@@ -72,6 +76,12 @@ struct amd_cpudata {
u64 freq;
bool boost_supported;
+
+ /* EPP feature related attributes*/
+ s16 epp_policy;
+ s16 epp_cached;
+ u32 policy;
+ u64 cppc_cap1_cached;
};
/**