[v6,04/11] cpufreq: amd_pstate: implement Pstate EPP support for the AMD processors

Message ID 20221202074719.623673-5-perry.yuan@amd.com
State New
Headers
Series Implement AMD Pstate EPP Driver |

Commit Message

Yuan, Perry Dec. 2, 2022, 7:47 a.m. UTC
  From: Perry Yuan <Perry.Yuan@amd.com>

Add EPP driver support for AMD SoCs which support a dedicated MSR for
CPPC.  EPP is used by the DPM controller to configure the frequency that
a core operates at during short periods of activity.

The SoC EPP targets are configured on a scale from 0 to 255 where 0
represents maximum performance and 255 represents maximum efficiency.

The amd-pstate driver exports profile string names to userspace that are
tied to specific EPP values.

The balance_performance string (0x80) provides the best balance for
efficiency versus power on most systems, but users can choose other
strings to meet their needs as well.

$ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences
default performance balance_performance balance_power power

$ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_preference
balance_performance

To enable the driver,it needs to add `amd_pstate=active` to kernel
command line and kernel will load the active mode epp driver

Signed-off-by: Perry Yuan <Perry.Yuan@amd.com>
---
 drivers/cpufreq/amd-pstate.c | 643 ++++++++++++++++++++++++++++++++++-
 include/linux/amd-pstate.h   |  35 ++
 2 files changed, 672 insertions(+), 6 deletions(-)
  

Comments

Mario Limonciello Dec. 2, 2022, 5:36 p.m. UTC | #1
On 12/2/2022 01:47, Perry Yuan wrote:
> From: Perry Yuan <Perry.Yuan@amd.com>
> 
> Add EPP driver support for AMD SoCs which support a dedicated MSR for
> CPPC.  EPP is used by the DPM controller to configure the frequency that
> a core operates at during short periods of activity.
> 
> The SoC EPP targets are configured on a scale from 0 to 255 where 0
> represents maximum performance and 255 represents maximum efficiency.
> 
> The amd-pstate driver exports profile string names to userspace that are
> tied to specific EPP values.
> 
> The balance_performance string (0x80) provides the best balance for
> efficiency versus power on most systems, but users can choose other
> strings to meet their needs as well.
> 
> $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences
> default performance balance_performance balance_power power
> 
> $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_preference
> balance_performance
> 
> To enable the driver,it needs to add `amd_pstate=active` to kernel
> command line and kernel will load the active mode epp driver
> 
> Signed-off-by: Perry Yuan <Perry.Yuan@amd.com>
> ---
>   drivers/cpufreq/amd-pstate.c | 643 ++++++++++++++++++++++++++++++++++-
>   include/linux/amd-pstate.h   |  35 ++
>   2 files changed, 672 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 204e39006dda..5989d4d25d0f 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -37,9 +37,12 @@
>   #include <linux/uaccess.h>
>   #include <linux/static_call.h>
>   #include <linux/amd-pstate.h>
> +#include <linux/cpufreq_common.h>
>   
> +#ifdef CONFIG_ACPI
>   #include <acpi/processor.h>
>   #include <acpi/cppc_acpi.h>
> +#endif
>   
>   #include <asm/msr.h>
>   #include <asm/processor.h>
> @@ -59,9 +62,130 @@
>    * we disable it by default to go acpi-cpufreq on these processors and add a
>    * module parameter to be able to enable it manually for debugging.
>    */
> -static struct cpufreq_driver amd_pstate_driver;
> +static bool cppc_active;
>   static int cppc_load __initdata;
>   
> +static struct cpufreq_driver *default_pstate_driver;
> +static struct amd_cpudata **all_cpu_data;
> +static struct amd_pstate_params global_params;
> +
> +static DEFINE_MUTEX(amd_pstate_limits_lock);
> +static DEFINE_MUTEX(amd_pstate_driver_lock);
> +
> +static bool cppc_boost __read_mostly;
> +struct kobject *amd_pstate_kobj;
> +
> +#ifdef CONFIG_ACPI_CPPC_LIB
Rather than making this conditional I would think that amd-pstate should 
probably depend on or select CONFIG_ACPI_CPPC_LIB, no?

> +static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
> +{
> +	s16 epp;
> +	struct cppc_perf_caps perf_caps;
> +	int ret;
> +
> +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> +		if (!cppc_req_cached) {
> +			epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
> +					&cppc_req_cached);
> +			if (epp)
> +				return epp;
> +		}
> +		epp = (cppc_req_cached >> 24) & 0xFF;
> +	} else {
> +		ret = cppc_get_epp_caps(cpudata->cpu, &perf_caps);
> +		if (ret < 0) {
> +			pr_debug("Could not retrieve energy perf value (%d)\n", ret);
> +			return -EIO;
> +		}
> +		epp = (s16) perf_caps.energy_perf;
> +	}
> +
> +	return epp;
> +}
> +#endif
> +
> +static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
> +{
> +	s16 epp;
> +	int index = -EINVAL;
> +
> +	epp = amd_pstate_get_epp(cpudata, 0);
> +	if (epp < 0)
> +		return epp;
> +
> +	switch (epp) {
> +	case HWP_EPP_PERFORMANCE:
> +		index = EPP_INDEX_PERFORMANCE;
> +		break;
> +	case HWP_EPP_BALANCE_PERFORMANCE:
> +		index = EPP_INDEX_BALANCE_PERFORMANCE;
> +		break;
> +	case HWP_EPP_BALANCE_POWERSAVE:
> +		index = EPP_INDEX_BALANCE_POWERSAVE;
> +		break;
> +	case HWP_EPP_POWERSAVE:
> +		index = EPP_INDEX_POWERSAVE;
> +		break;
> +	default:
> +			break;
> +	}
> +
> +	return index;
> +}
> +
> +#ifdef CONFIG_ACPI_CPPC_LIB
> +static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
> +{
> +	int ret;
> +	struct cppc_perf_ctrls perf_ctrls;
> +
> +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> +		u64 value = READ_ONCE(cpudata->cppc_req_cached);
> +
> +		value &= ~GENMASK_ULL(31, 24);
> +		value |= (u64)epp << 24;
> +		WRITE_ONCE(cpudata->cppc_req_cached, value);
> +
> +		ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
> +		if (!ret)
> +			cpudata->epp_cached = epp;
> +	} else {
> +		perf_ctrls.energy_perf = epp;
> +		ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
> +		if (ret) {
> +			pr_debug("failed to set energy perf value (%d)\n", ret);
> +			return ret;
> +		}
> +		cpudata->epp_cached = epp;
> +	}
> +
> +	return ret;
> +}
> +
> +static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
> +		int pref_index)
> +{
> +	int epp = -EINVAL;
> +	int ret;
> +
> +	if (!pref_index) {
> +		pr_debug("EPP pref_index is invalid\n");
> +		return -EINVAL;
> +	}
> +
> +	if (epp == -EINVAL)
> +		epp = epp_values[pref_index];
> +
> +	if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> +		pr_debug("EPP cannot be set under performance policy\n");
> +		return -EBUSY;
> +	}
> +
> +	ret = amd_pstate_set_epp(cpudata, epp);
> +
> +	return ret;
> +}
> +#endif
> +
>   static inline int pstate_enable(bool enable)
>   {
>   	return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable);
> @@ -70,11 +194,21 @@ static inline int pstate_enable(bool enable)
>   static int cppc_enable(bool enable)
>   {
>   	int cpu, ret = 0;
> +	struct cppc_perf_ctrls perf_ctrls;
>   
>   	for_each_present_cpu(cpu) {
>   		ret = cppc_set_enable(cpu, enable);
>   		if (ret)
>   			return ret;
> +
> +		/* Enable autonomous mode for EPP */
> +		if (!cppc_active) {
> +			/* Set desired perf as zero to allow EPP firmware control */
> +			perf_ctrls.desired_perf = 0;
> +			ret = cppc_set_perf(cpu, &perf_ctrls);
> +			if (ret)
> +				return ret;
> +		}
>   	}
>   
>   	return ret;
> @@ -417,7 +551,7 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
>   		return;
>   
>   	cpudata->boost_supported = true;
> -	amd_pstate_driver.boost_enabled = true;
> +	default_pstate_driver->boost_enabled = true;
>   }
>   
>   static void amd_perf_ctl_reset(unsigned int cpu)
> @@ -591,10 +725,61 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
>   	return sprintf(&buf[0], "%u\n", perf);
>   }
>   
> +static ssize_t show_energy_performance_available_preferences(
> +				struct cpufreq_policy *policy, char *buf)
> +{
> +	int i = 0;
> +	int ret = 0;
> +
> +	while (energy_perf_strings[i] != NULL)
> +		ret += sysfs_emit(buf, "%s", energy_perf_strings[i++]);
> +
> +	ret += sysfs_emit(buf, "\n");
> +
> +	return ret;
> +}
> +
> +static ssize_t store_energy_performance_preference(
> +		struct cpufreq_policy *policy, const char *buf, size_t count)
> +{
> +	struct amd_cpudata *cpudata = policy->driver_data;
> +	char str_preference[21];
> +	ssize_t ret;
> +
> +	ret = sscanf(buf, "%20s", str_preference);
> +	if (ret != 1)
> +		return -EINVAL;
> +
> +	ret = match_string(energy_perf_strings, -1, str_preference);
> +	if (ret < 0)
> +		return -EINVAL;
> +
> +	mutex_lock(&amd_pstate_limits_lock);
> +	ret = amd_pstate_set_energy_pref_index(cpudata, ret);
> +	mutex_unlock(&amd_pstate_limits_lock);
> +
> +	return ret ?: count;
> +}
> +
> +static ssize_t show_energy_performance_preference(
> +				struct cpufreq_policy *policy, char *buf)
> +{
> +	struct amd_cpudata *cpudata = policy->driver_data;
> +	int preference;
> +
> +	preference = amd_pstate_get_energy_pref_index(cpudata);
> +	if (preference < 0)
> +		return preference;
> +
> +	return  sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);

You've got an extra space after "return".

> +}
> +
>   cpufreq_freq_attr_ro(amd_pstate_max_freq);
>   cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
>   
>   cpufreq_freq_attr_ro(amd_pstate_highest_perf);
> +cpufreq_freq_attr_rw(energy_performance_preference);
> +cpufreq_freq_attr_ro(energy_performance_available_preferences);
>   
>   static struct freq_attr *amd_pstate_attr[] = {
>   	&amd_pstate_max_freq,
> @@ -603,6 +788,429 @@ static struct freq_attr *amd_pstate_attr[] = {
>   	NULL,
>   };
>   
> +static struct freq_attr *amd_pstate_epp_attr[] = {
> +	&amd_pstate_max_freq,
> +	&amd_pstate_lowest_nonlinear_freq,
> +	&amd_pstate_highest_perf,
> +	&energy_performance_preference,
> +	&energy_performance_available_preferences,
> +	NULL,
> +};
> +
> +static inline void update_boost_state(void)
> +{
> +	u64 misc_en;
> +	struct amd_cpudata *cpudata;
> +
> +	cpudata = all_cpu_data[0];
> +	rdmsrl(MSR_K7_HWCR, misc_en);
> +	global_params.cppc_boost_disabled = misc_en & BIT_ULL(25);
> +}
> +
> +static bool amd_pstate_acpi_pm_profile_server(void)
> +{
> +	if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
> +	    acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)
> +		return true;
> +
> +	return false;
> +}

Right now this is only used to control boost, but I wonder if this 
should instad also control the default EPP policy as all?

> +
> +static int amd_pstate_init_cpu(unsigned int cpunum)
> +{
> +	struct amd_cpudata *cpudata;
> +
> +	cpudata = all_cpu_data[cpunum];
> +	if (!cpudata) {
> +		cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
> +		if (!cpudata)
> +			return -ENOMEM;
> +		WRITE_ONCE(all_cpu_data[cpunum], cpudata);
> +
> +		cpudata->cpu = cpunum;
> +
> +		if (cppc_active) {
> +			if (amd_pstate_acpi_pm_profile_server())
> +				cppc_boost = true;
> +		}
> +
> +	}
> +	cpudata->epp_powersave = -EINVAL;
> +	cpudata->epp_policy = 0; > +	pr_debug("controlling: cpu %d\n", cpunum);
> +	return 0;
> +}
> +
> +static int __amd_pstate_cpu_init(struct cpufreq_policy *policy)
> +{
> +	int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
> +	struct amd_cpudata *cpudata;
> +	struct device *dev;
> +	int rc;
> +	u64 value;
> +
> +	rc = amd_pstate_init_cpu(policy->cpu);
> +	if (rc)
> +		return rc;
> +
> +	cpudata = all_cpu_data[policy->cpu];
> +
> +	dev = get_cpu_device(policy->cpu);
> +	if (!dev)
> +		goto free_cpudata1;
> +
> +	rc = amd_pstate_init_perf(cpudata);
> +	if (rc)
> +		goto free_cpudata1;
> +
> +	min_freq = amd_get_min_freq(cpudata);
> +	max_freq = amd_get_max_freq(cpudata);
> +	nominal_freq = amd_get_nominal_freq(cpudata);
> +	lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
> +	if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
> +		dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
> +				min_freq, max_freq);
> +		ret = -EINVAL;
> +		goto free_cpudata1;
> +	}
> +
> +	policy->min = min_freq;
> +	policy->max = max_freq;
> +
> +	policy->cpuinfo.min_freq = min_freq;
> +	policy->cpuinfo.max_freq = max_freq;
> +	/* It will be updated by governor */
> +	policy->cur = policy->cpuinfo.min_freq;
> +
> +	/* Initial processor data capability frequencies */
> +	cpudata->max_freq = max_freq;
> +	cpudata->min_freq = min_freq;
> +	cpudata->nominal_freq = nominal_freq;
> +	cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
> +
> +	policy->driver_data = cpudata;
> +
> +	update_boost_state();
> +	cpudata->epp_cached = amd_pstate_get_epp(cpudata, value);
> +
> +	policy->min = policy->cpuinfo.min_freq;
> +	policy->max = policy->cpuinfo.max_freq;
> +
> +	if (boot_cpu_has(X86_FEATURE_CPPC))
> +		policy->fast_switch_possible = true;
> +
> +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> +		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
> +		if (ret)
> +			return ret;
> +		WRITE_ONCE(cpudata->cppc_req_cached, value);
> +
> +		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
> +		if (ret)
> +			return ret;
> +		WRITE_ONCE(cpudata->cppc_cap1_cached, value);
> +	}
> +	amd_pstate_boost_init(cpudata);
> +
> +	return 0;
> +
> +free_cpudata1:
> +	kfree(cpudata);
> +	return ret;
> +}
> +
> +static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
> +{
> +	int ret;
> +
> +	ret = __amd_pstate_cpu_init(policy);
> +	if (ret)
> +		return ret;
> +	/*
> +	 * Set the policy to powersave to provide a valid fallback value in case
> +	 * the default cpufreq governor is neither powersave nor performance.
> +	 */
> +	policy->policy = CPUFREQ_POLICY_POWERSAVE;
> +
> +	return 0;
> +}
> +
> +static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
> +{
> +	pr_debug("CPU %d exiting\n", policy->cpu);
> +	policy->fast_switch_possible = false;
> +	return 0;
> +}
> +
> +static void amd_pstate_update_max_freq(unsigned int cpu)
> +{
> +	struct cpufreq_policy *policy = policy = cpufreq_cpu_get(cpu);
> +
> +	if (!policy)
> +		return;
> +
> +	refresh_frequency_limits(policy);
> +	cpufreq_cpu_put(policy);
> +}
> +
> +static void amd_pstate_epp_update_limits(unsigned int cpu)
> +{
> +	mutex_lock(&amd_pstate_driver_lock);
> +	update_boost_state();
> +	if (global_params.cppc_boost_disabled) {
> +		for_each_possible_cpu(cpu)
> +			amd_pstate_update_max_freq(cpu);
> +	} else {
> +		cpufreq_update_policy(cpu);
> +	}
> +	mutex_unlock(&amd_pstate_driver_lock);
> +}
> +
> +static int cppc_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
> +
> +static inline void amd_pstate_boost_up(struct amd_cpudata *cpudata)
> +{
> +	u64 hwp_req = READ_ONCE(cpudata->cppc_req_cached);
> +	u64 hwp_cap = READ_ONCE(cpudata->cppc_cap1_cached);
> +	u32 max_limit = (hwp_req & 0xff);
> +	u32 min_limit = (hwp_req & 0xff00) >> 8;
> +	u32 boost_level1;
> +
> +	/* If max and min are equal or already at max, nothing to boost */
> +	if (max_limit == min_limit)
> +		return;
> +
> +	/* Set boost max and min to initial value */
> +	if (!cpudata->cppc_boost_min)
> +		cpudata->cppc_boost_min = min_limit;
> +
> +	boost_level1 = ((AMD_CPPC_NOMINAL_PERF(hwp_cap) + min_limit) >> 1);
> +
> +	if (cpudata->cppc_boost_min < boost_level1)
> +		cpudata->cppc_boost_min = boost_level1;
> +	else if (cpudata->cppc_boost_min < AMD_CPPC_NOMINAL_PERF(hwp_cap))
> +		cpudata->cppc_boost_min = AMD_CPPC_NOMINAL_PERF(hwp_cap);
> +	else if (cpudata->cppc_boost_min == AMD_CPPC_NOMINAL_PERF(hwp_cap))
> +		cpudata->cppc_boost_min = max_limit;
> +	else
> +		return;
> +
> +	hwp_req &= ~AMD_CPPC_MIN_PERF(~0L);
> +	hwp_req |= AMD_CPPC_MIN_PERF(cpudata->cppc_boost_min);
> +	wrmsrl(MSR_AMD_CPPC_REQ, hwp_req);
> +	cpudata->last_update = cpudata->sample.time;
> +}
> +
> +static inline void amd_pstate_boost_down(struct amd_cpudata *cpudata)
> +{
> +	bool expired;
> +
> +	if (cpudata->cppc_boost_min) {
> +		expired = time_after64(cpudata->sample.time, cpudata->last_update +
> +					cppc_boost_hold_time_ns);
> +
> +		if (expired) {
> +			wrmsrl(MSR_AMD_CPPC_REQ, cpudata->cppc_req_cached);
> +			cpudata->cppc_boost_min = 0;
> +		}
> +	}
> +
> +	cpudata->last_update = cpudata->sample.time;
> +}
> +
> +static inline void amd_pstate_boost_update_util(struct amd_cpudata *cpudata,
> +						      u64 time)
> +{
> +	cpudata->sample.time = time;
> +	if (smp_processor_id() != cpudata->cpu)
> +		return;
> +
> +	if (cpudata->sched_flags & SCHED_CPUFREQ_IOWAIT) {
> +		bool do_io = false;
> +
> +		cpudata->sched_flags = 0;
> +		/*
> +		 * Set iowait_boost flag and update time. Since IO WAIT flag
> +		 * is set all the time, we can't just conclude that there is
> +		 * some IO bound activity is scheduled on this CPU with just
> +		 * one occurrence. If we receive at least two in two
> +		 * consecutive ticks, then we treat as boost candidate.
> +		 * This is leveraged from Intel Pstate driver.
> +		 */
> +		if (time_before64(time, cpudata->last_io_update + 2 * TICK_NSEC))
> +			do_io = true;
> +
> +		cpudata->last_io_update = time;
> +
> +		if (do_io)
> +			amd_pstate_boost_up(cpudata);
> +
> +	} else {
> +		amd_pstate_boost_down(cpudata);
> +	}
> +}
> +
> +static inline void amd_pstate_cppc_update_hook(struct update_util_data *data,
> +						u64 time, unsigned int flags)
> +{
> +	struct amd_cpudata *cpudata = container_of(data,
> +				struct amd_cpudata, update_util);
> +
> +	cpudata->sched_flags |= flags;
> +
> +	if (smp_processor_id() == cpudata->cpu)
> +		amd_pstate_boost_update_util(cpudata, time);
> +}
> +
> +static void amd_pstate_clear_update_util_hook(unsigned int cpu)
> +{
> +	struct amd_cpudata *cpudata = all_cpu_data[cpu];
> +
> +	if (!cpudata->update_util_set)
> +		return;
> +
> +	cpufreq_remove_update_util_hook(cpu);
> +	cpudata->update_util_set = false;
> +	synchronize_rcu();
> +}
> +
> +static void amd_pstate_set_update_util_hook(unsigned int cpu_num)
> +{
> +	struct amd_cpudata *cpudata = all_cpu_data[cpu_num];
> +
> +	if (!cppc_boost) {
> +		if (cpudata->update_util_set)
> +			amd_pstate_clear_update_util_hook(cpudata->cpu);
> +		return;
> +	}
> +
> +	if (cpudata->update_util_set)
> +		return;
> +
> +	cpudata->sample.time = 0;
> +	cpufreq_add_update_util_hook(cpu_num, &cpudata->update_util,
> +						amd_pstate_cppc_update_hook);
> +	cpudata->update_util_set = true;
> +}
> +
> +static void amd_pstate_epp_init(unsigned int cpu)
> +{
> +	struct amd_cpudata *cpudata = all_cpu_data[cpu];
> +	u32 max_perf, min_perf;
> +	u64 value;
> +	s16 epp;
> +	int ret;
> +
> +	max_perf = READ_ONCE(cpudata->highest_perf);
> +	min_perf = READ_ONCE(cpudata->lowest_perf);
> +
> +	value = READ_ONCE(cpudata->cppc_req_cached);
> +
> +	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
> +		min_perf = max_perf;
> +
> +	/* Initial min/max values for CPPC Performance Controls Register */
> +	value &= ~AMD_CPPC_MIN_PERF(~0L);
> +	value |= AMD_CPPC_MIN_PERF(min_perf);
> +
> +	value &= ~AMD_CPPC_MAX_PERF(~0L);
> +	value |= AMD_CPPC_MAX_PERF(max_perf);
> +
> +	/* CPPC EPP feature require to set zero to the desire perf bit */
> +	value &= ~AMD_CPPC_DES_PERF(~0L);
> +	value |= AMD_CPPC_DES_PERF(0);
> +
> +	if (cpudata->epp_policy == cpudata->policy)
> +		goto skip_epp;
> +
> +	cpudata->epp_policy = cpudata->policy;
> +
> +	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> +		epp = amd_pstate_get_epp(cpudata, value);
> +		cpudata->epp_powersave = epp;
> +		if (epp < 0)
> +			goto skip_epp;
> +		/* force the epp value to be zero for performance policy */
> +		epp = 0;
> +	} else {
> +		if (cpudata->epp_powersave < 0)
> +			goto skip_epp;
> +		/* Get BIOS pre-defined epp value */
> +		epp = amd_pstate_get_epp(cpudata, value);
> +		if (epp)
> +			goto skip_epp;
> +		epp = cpudata->epp_powersave;
> +	}
> +	/* Set initial EPP value */
> +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> +		value &= ~GENMASK_ULL(31, 24);
> +		value |= (u64)epp << 24;
> +	}
> +
> +skip_epp:
> +	WRITE_ONCE(cpudata->cppc_req_cached, value);
> +	ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
> +	if (!ret)
> +		cpudata->epp_cached = epp;
> +}
> +
> +static void amd_pstate_set_max_limits(struct amd_cpudata *cpudata)
> +{
> +	u64 hwp_cap = READ_ONCE(cpudata->cppc_cap1_cached);
> +	u64 hwp_req = READ_ONCE(cpudata->cppc_req_cached);
> +	u32 max_limit = (hwp_cap >> 24) & 0xff;
> +
> +	hwp_req &= ~AMD_CPPC_MIN_PERF(~0L);
> +	hwp_req |= AMD_CPPC_MIN_PERF(max_limit);
> +	wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, hwp_req);
> +}
> +
> +static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
> +{
> +	struct amd_cpudata *cpudata;
> +
> +	if (!policy->cpuinfo.max_freq)
> +		return -ENODEV;
> +
> +	pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
> +				policy->cpuinfo.max_freq, policy->max);
> +
> +	cpudata = all_cpu_data[policy->cpu];
> +	cpudata->policy = policy->policy;
> +
> +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> +		mutex_lock(&amd_pstate_limits_lock);
> +
> +		if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> +			amd_pstate_clear_update_util_hook(policy->cpu);
> +			amd_pstate_set_max_limits(cpudata);
> +		} else {
> +			amd_pstate_set_update_util_hook(policy->cpu);
> +		}
> +
> +		if (boot_cpu_has(X86_FEATURE_CPPC))
> +			amd_pstate_epp_init(policy->cpu);
> +
> +		mutex_unlock(&amd_pstate_limits_lock);
> +	}
> +
> +	return 0;
> +}
> +
> +static void amd_pstate_verify_cpu_policy(struct amd_cpudata *cpudata,
> +					   struct cpufreq_policy_data *policy)
> +{
> +	update_boost_state();
> +	cpufreq_verify_within_cpu_limits(policy);
> +}
> +
> +static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
> +{
> +	amd_pstate_verify_cpu_policy(all_cpu_data[policy->cpu], policy);
> +	pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
> +	return 0;
> +}
> +
>   static struct cpufreq_driver amd_pstate_driver = {
>   	.flags		= CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
>   	.verify		= amd_pstate_verify,
> @@ -616,8 +1224,20 @@ static struct cpufreq_driver amd_pstate_driver = {
>   	.attr		= amd_pstate_attr,
>   };
>   
> +static struct cpufreq_driver amd_pstate_epp_driver = {
> +	.flags		= CPUFREQ_CONST_LOOPS,
> +	.verify		= amd_pstate_epp_verify_policy,
> +	.setpolicy	= amd_pstate_epp_set_policy,
> +	.init		= amd_pstate_epp_cpu_init,
> +	.exit		= amd_pstate_epp_cpu_exit,
> +	.update_limits	= amd_pstate_epp_update_limits,
> +	.name		= "amd_pstate_epp",
> +	.attr		= amd_pstate_epp_attr,
> +};
> +
>   static int __init amd_pstate_init(void)
>   {
> +	static struct amd_cpudata **cpudata;
>   	int ret;
>   
>   	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
> @@ -644,7 +1264,8 @@ static int __init amd_pstate_init(void)
>   	/* capability check */
>   	if (boot_cpu_has(X86_FEATURE_CPPC)) {
>   		pr_debug("AMD CPPC MSR based functionality is supported\n");
> -		amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
> +		if (!cppc_active)
> +			default_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
>   	} else {
>   		pr_debug("AMD CPPC shared memory based functionality is supported\n");
>   		static_call_update(amd_pstate_enable, cppc_enable);
> @@ -652,6 +1273,10 @@ static int __init amd_pstate_init(void)
>   		static_call_update(amd_pstate_update_perf, cppc_update_perf);
>   	}
>   
> +	cpudata = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
> +	if (!cpudata)
> +		return -ENOMEM;
> +	WRITE_ONCE(all_cpu_data, cpudata);
>   	/* enable amd pstate feature */
>   	ret = amd_pstate_enable(true);
>   	if (ret) {
> @@ -659,9 +1284,9 @@ static int __init amd_pstate_init(void)
>   		return ret;
>   	}
>   
> -	ret = cpufreq_register_driver(&amd_pstate_driver);
> +	ret = cpufreq_register_driver(default_pstate_driver);
>   	if (ret)
> -		pr_err("failed to register amd_pstate_driver with return %d\n",
> +		pr_err("failed to register amd pstate driver with return %d\n",
>   		       ret);
>   
>   	return ret;
> @@ -676,8 +1301,14 @@ static int __init amd_pstate_param(char *str)
>   	if (!strcmp(str, "disable")) {
>   		cppc_load = 0;
>   		pr_info("driver is explicitly disabled\n");
> -	} else if (!strcmp(str, "passive"))
> +	} else if (!strcmp(str, "passive")) {
>   		cppc_load = 1;
> +		default_pstate_driver = &amd_pstate_driver;
> +	} else if (!strcmp(str, "active")) {
> +		cppc_active = 1;
> +		cppc_load = 1;
> +		default_pstate_driver = &amd_pstate_epp_driver;
> +	}
>   
>   	return 0;
>   }
> diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
> index 1c4b8659f171..888af62040f1 100644
> --- a/include/linux/amd-pstate.h
> +++ b/include/linux/amd-pstate.h
> @@ -25,6 +25,7 @@ struct amd_aperf_mperf {
>   	u64 aperf;
>   	u64 mperf;
>   	u64 tsc;
> +	u64 time;
>   };
>   
>   /**
> @@ -47,6 +48,18 @@ struct amd_aperf_mperf {
>    * @prev: Last Aperf/Mperf/tsc count value read from register
>    * @freq: current cpu frequency value
>    * @boost_supported: check whether the Processor or SBIOS supports boost mode
> + * @epp_powersave: Last saved CPPC energy performance preference
> +				when policy switched to performance
> + * @epp_policy: Last saved policy used to set energy-performance preference
> + * @epp_cached: Cached CPPC energy-performance preference value
> + * @policy: Cpufreq policy value
> + * @sched_flags: Store scheduler flags for possible cross CPU update
> + * @update_util_set: CPUFreq utility callback is set
> + * @last_update: Time stamp of the last performance state update
> + * @cppc_boost_min: Last CPPC boosted min performance state
> + * @cppc_cap1_cached: Cached value of the last CPPC Capabilities MSR
> + * @update_util: Cpufreq utility callback information
> + * @sample: the stored performance sample
>    *
>    * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
>    * represents all the attributes and goals that AMD P-State requests at runtime.
> @@ -72,6 +85,28 @@ struct amd_cpudata {
>   
>   	u64	freq;
>   	bool	boost_supported;
> +
> +	/* EPP feature related attributes*/
> +	s16	epp_powersave;
> +	s16	epp_policy;
> +	s16	epp_cached;
> +	u32	policy;
> +	u32	sched_flags;
> +	bool	update_util_set;
> +	u64	last_update;
> +	u64	last_io_update;
> +	u32	cppc_boost_min;
> +	u64	cppc_cap1_cached;
> +	struct	update_util_data update_util;
> +	struct	amd_aperf_mperf sample;
> +};
> +
> +/**
> + * struct amd_pstate_params - global parameters for the performance control
> + * @ cppc_boost_disabled wheher the core performance boost disabled
> + */
> +struct amd_pstate_params {
> +	bool cppc_boost_disabled;
>   };
>   
>   #endif /* _LINUX_AMD_PSTATE_H */
  
kernel test robot Dec. 3, 2022, 7:52 p.m. UTC | #2
Hi Perry,

I love your patch! Yet something to improve:

[auto build test ERROR on rafael-pm/linux-next]
[also build test ERROR on linus/master v6.1-rc7 next-20221202]
[cannot apply to tip/x86/core]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Perry-Yuan/Implement-AMD-Pstate-EPP-Driver/20221202-155515
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
patch link:    https://lore.kernel.org/r/20221202074719.623673-5-perry.yuan%40amd.com
patch subject: [PATCH v6 04/11] cpufreq: amd_pstate: implement Pstate EPP support for the AMD processors
config: i386-allyesconfig
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/6f2da26e9fe72400ed922b14ea494beb5e38dc59
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Perry-Yuan/Implement-AMD-Pstate-EPP-Driver/20221202-155515
        git checkout 6f2da26e9fe72400ed922b14ea494beb5e38dc59
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   drivers/cpufreq/amd-pstate.c: In function 'amd_pstate_get_energy_pref_index':
>> drivers/cpufreq/amd-pstate.c:111:15: error: implicit declaration of function 'amd_pstate_get_epp' [-Werror=implicit-function-declaration]
     111 |         epp = amd_pstate_get_epp(cpudata, 0);
         |               ^~~~~~~~~~~~~~~~~~
   drivers/cpufreq/amd-pstate.c: In function 'store_energy_performance_preference':
>> drivers/cpufreq/amd-pstate.c:758:15: error: implicit declaration of function 'amd_pstate_set_energy_pref_index'; did you mean 'amd_pstate_get_energy_pref_index'? [-Werror=implicit-function-declaration]
     758 |         ret = amd_pstate_set_energy_pref_index(cpudata, ret);
         |               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |               amd_pstate_get_energy_pref_index
   drivers/cpufreq/amd-pstate.c: In function 'update_boost_state':
   drivers/cpufreq/amd-pstate.c:803:29: warning: variable 'cpudata' set but not used [-Wunused-but-set-variable]
     803 |         struct amd_cpudata *cpudata;
         |                             ^~~~~~~
   In file included from drivers/cpufreq/amd-pstate.c:40:
   At top level:
   include/linux/cpufreq_common.h:48:21: warning: 'epp_values' defined but not used [-Wunused-variable]
      48 | static unsigned int epp_values[] = {
         |                     ^~~~~~~~~~
   cc1: some warnings being treated as errors


vim +/amd_pstate_get_epp +111 drivers/cpufreq/amd-pstate.c

   105	
   106	static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
   107	{
   108		s16 epp;
   109		int index = -EINVAL;
   110	
 > 111		epp = amd_pstate_get_epp(cpudata, 0);
   112		if (epp < 0)
   113			return epp;
   114	
   115		switch (epp) {
   116		case HWP_EPP_PERFORMANCE:
   117			index = EPP_INDEX_PERFORMANCE;
   118			break;
   119		case HWP_EPP_BALANCE_PERFORMANCE:
   120			index = EPP_INDEX_BALANCE_PERFORMANCE;
   121			break;
   122		case HWP_EPP_BALANCE_POWERSAVE:
   123			index = EPP_INDEX_BALANCE_POWERSAVE;
   124			break;
   125		case HWP_EPP_POWERSAVE:
   126			index = EPP_INDEX_POWERSAVE;
   127			break;
   128		default:
   129				break;
   130		}
   131	
   132		return index;
   133	}
   134
  
Huang Rui Dec. 5, 2022, 12:22 p.m. UTC | #3
On Fri, Dec 02, 2022 at 03:47:12PM +0800, Yuan, Perry wrote:
> From: Perry Yuan <Perry.Yuan@amd.com>
> 

> cpufreq: amd_pstate: implement Pstate EPP support for the AMD processors

I suggest that we align the format like "cpufreq: amd-pstate:" as the
prefix of the subject like previous patches. Because it's there anywhere
even in the documentation file. I think it's not worth to change such as
minor update like amd-pstate to amd_pstate for all the files and comments.
So let's align the previous way.

> Add EPP driver support for AMD SoCs which support a dedicated MSR for
> CPPC.  EPP is used by the DPM controller to configure the frequency that
> a core operates at during short periods of activity.
> 
> The SoC EPP targets are configured on a scale from 0 to 255 where 0
> represents maximum performance and 255 represents maximum efficiency.
> 
> The amd-pstate driver exports profile string names to userspace that are
> tied to specific EPP values.
> 
> The balance_performance string (0x80) provides the best balance for
> efficiency versus power on most systems, but users can choose other
> strings to meet their needs as well.
> 
> $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences
> default performance balance_performance balance_power power
> 
> $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_preference
> balance_performance
> 
> To enable the driver,it needs to add `amd_pstate=active` to kernel
> command line and kernel will load the active mode epp driver
> 
> Signed-off-by: Perry Yuan <Perry.Yuan@amd.com>
> ---
>  drivers/cpufreq/amd-pstate.c | 643 ++++++++++++++++++++++++++++++++++-
>  include/linux/amd-pstate.h   |  35 ++
>  2 files changed, 672 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 204e39006dda..5989d4d25d0f 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -37,9 +37,12 @@
>  #include <linux/uaccess.h>
>  #include <linux/static_call.h>
>  #include <linux/amd-pstate.h>
> +#include <linux/cpufreq_common.h>
>  
> +#ifdef CONFIG_ACPI
>  #include <acpi/processor.h>
>  #include <acpi/cppc_acpi.h>
> +#endif

The amd-pstate module depends on ACPI in the kconfig, so we won't need add
CONFIG_ACPI here.

>  
>  #include <asm/msr.h>
>  #include <asm/processor.h>
> @@ -59,9 +62,130 @@
>   * we disable it by default to go acpi-cpufreq on these processors and add a
>   * module parameter to be able to enable it manually for debugging.
>   */
> -static struct cpufreq_driver amd_pstate_driver;
> +static bool cppc_active;
>  static int cppc_load __initdata;
>  
> +static struct cpufreq_driver *default_pstate_driver;
> +static struct amd_cpudata **all_cpu_data;
> +static struct amd_pstate_params global_params;
> +
> +static DEFINE_MUTEX(amd_pstate_limits_lock);
> +static DEFINE_MUTEX(amd_pstate_driver_lock);
> +
> +static bool cppc_boost __read_mostly;
> +struct kobject *amd_pstate_kobj;

Where is it using amd_pstate_kobj?

> +
> +#ifdef CONFIG_ACPI_CPPC_LIB
> +static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
> +{
> +	s16 epp;
> +	struct cppc_perf_caps perf_caps;
> +	int ret;
> +
> +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> +		if (!cppc_req_cached) {
> +			epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
> +					&cppc_req_cached);
> +			if (epp)
> +				return epp;
> +		}
> +		epp = (cppc_req_cached >> 24) & 0xFF;
> +	} else {
> +		ret = cppc_get_epp_caps(cpudata->cpu, &perf_caps);
> +		if (ret < 0) {
> +			pr_debug("Could not retrieve energy perf value (%d)\n", ret);
> +			return -EIO;
> +		}
> +		epp = (s16) perf_caps.energy_perf;
> +	}
> +
> +	return epp;
> +}
> +#endif
> +
> +static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
> +{
> +	s16 epp;
> +	int index = -EINVAL;
> +
> +	epp = amd_pstate_get_epp(cpudata, 0);
> +	if (epp < 0)
> +		return epp;
> +
> +	switch (epp) {
> +	case HWP_EPP_PERFORMANCE:
> +		index = EPP_INDEX_PERFORMANCE;
> +		break;
> +	case HWP_EPP_BALANCE_PERFORMANCE:
> +		index = EPP_INDEX_BALANCE_PERFORMANCE;
> +		break;
> +	case HWP_EPP_BALANCE_POWERSAVE:
> +		index = EPP_INDEX_BALANCE_POWERSAVE;
> +		break;
> +	case HWP_EPP_POWERSAVE:
> +		index = EPP_INDEX_POWERSAVE;
> +		break;
> +	default:
> +			break;
> +	}
> +
> +	return index;
> +}
> +
> +#ifdef CONFIG_ACPI_CPPC_LIB
> +static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
> +{
> +	int ret;
> +	struct cppc_perf_ctrls perf_ctrls;
> +
> +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> +		u64 value = READ_ONCE(cpudata->cppc_req_cached);
> +
> +		value &= ~GENMASK_ULL(31, 24);
> +		value |= (u64)epp << 24;
> +		WRITE_ONCE(cpudata->cppc_req_cached, value);
> +
> +		ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
> +		if (!ret)
> +			cpudata->epp_cached = epp;
> +	} else {
> +		perf_ctrls.energy_perf = epp;
> +		ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
> +		if (ret) {
> +			pr_debug("failed to set energy perf value (%d)\n", ret);
> +			return ret;
> +		}
> +		cpudata->epp_cached = epp;
> +	}
> +
> +	return ret;
> +}
> +
> +static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
> +		int pref_index)
> +{
> +	int epp = -EINVAL;
> +	int ret;
> +
> +	if (!pref_index) {
> +		pr_debug("EPP pref_index is invalid\n");
> +		return -EINVAL;
> +	}
> +
> +	if (epp == -EINVAL)
> +		epp = epp_values[pref_index];
> +
> +	if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> +		pr_debug("EPP cannot be set under performance policy\n");
> +		return -EBUSY;
> +	}
> +
> +	ret = amd_pstate_set_epp(cpudata, epp);
> +
> +	return ret;
> +}
> +#endif
> +
>  static inline int pstate_enable(bool enable)
>  {
>  	return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable);
> @@ -70,11 +194,21 @@ static inline int pstate_enable(bool enable)
>  static int cppc_enable(bool enable)
>  {
>  	int cpu, ret = 0;
> +	struct cppc_perf_ctrls perf_ctrls;
>  
>  	for_each_present_cpu(cpu) {
>  		ret = cppc_set_enable(cpu, enable);
>  		if (ret)
>  			return ret;
> +
> +		/* Enable autonomous mode for EPP */
> +		if (!cppc_active) {
> +			/* Set desired perf as zero to allow EPP firmware control */
> +			perf_ctrls.desired_perf = 0;
> +			ret = cppc_set_perf(cpu, &perf_ctrls);
> +			if (ret)
> +				return ret;
> +		}
>  	}
>  
>  	return ret;
> @@ -417,7 +551,7 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
>  		return;
>  
>  	cpudata->boost_supported = true;
> -	amd_pstate_driver.boost_enabled = true;
> +	default_pstate_driver->boost_enabled = true;
>  }
>  
>  static void amd_perf_ctl_reset(unsigned int cpu)
> @@ -591,10 +725,61 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
>  	return sprintf(&buf[0], "%u\n", perf);
>  }
>  
> +static ssize_t show_energy_performance_available_preferences(
> +				struct cpufreq_policy *policy, char *buf)
> +{
> +	int i = 0;
> +	int ret = 0;
> +
> +	while (energy_perf_strings[i] != NULL)
> +		ret += sysfs_emit(buf, "%s", energy_perf_strings[i++]);
> +
> +	ret += sysfs_emit(buf, "\n");
> +
> +	return ret;
> +}
> +
> +static ssize_t store_energy_performance_preference(
> +		struct cpufreq_policy *policy, const char *buf, size_t count)
> +{
> +	struct amd_cpudata *cpudata = policy->driver_data;
> +	char str_preference[21];
> +	ssize_t ret;
> +
> +	ret = sscanf(buf, "%20s", str_preference);
> +	if (ret != 1)
> +		return -EINVAL;
> +
> +	ret = match_string(energy_perf_strings, -1, str_preference);
> +	if (ret < 0)
> +		return -EINVAL;
> +
> +	mutex_lock(&amd_pstate_limits_lock);
> +	ret = amd_pstate_set_energy_pref_index(cpudata, ret);
> +	mutex_unlock(&amd_pstate_limits_lock);
> +
> +	return ret ?: count;
> +}
> +
> +static ssize_t show_energy_performance_preference(
> +				struct cpufreq_policy *policy, char *buf)
> +{
> +	struct amd_cpudata *cpudata = policy->driver_data;
> +	int preference;
> +
> +	preference = amd_pstate_get_energy_pref_index(cpudata);
> +	if (preference < 0)
> +		return preference;
> +
> +	return  sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
> +}
> +
>  cpufreq_freq_attr_ro(amd_pstate_max_freq);
>  cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
>  
>  cpufreq_freq_attr_ro(amd_pstate_highest_perf);
> +cpufreq_freq_attr_rw(energy_performance_preference);
> +cpufreq_freq_attr_ro(energy_performance_available_preferences);
>  
>  static struct freq_attr *amd_pstate_attr[] = {
>  	&amd_pstate_max_freq,
> @@ -603,6 +788,429 @@ static struct freq_attr *amd_pstate_attr[] = {
>  	NULL,
>  };
>  
> +static struct freq_attr *amd_pstate_epp_attr[] = {
> +	&amd_pstate_max_freq,
> +	&amd_pstate_lowest_nonlinear_freq,
> +	&amd_pstate_highest_perf,
> +	&energy_performance_preference,
> +	&energy_performance_available_preferences,
> +	NULL,
> +};
> +
> +static inline void update_boost_state(void)
> +{
> +	u64 misc_en;
> +	struct amd_cpudata *cpudata;
> +
> +	cpudata = all_cpu_data[0];
> +	rdmsrl(MSR_K7_HWCR, misc_en);
> +	global_params.cppc_boost_disabled = misc_en & BIT_ULL(25);
> +}
> +
> +static bool amd_pstate_acpi_pm_profile_server(void)
> +{
> +	if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
> +	    acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)
> +		return true;
> +
> +	return false;
> +}
> +
> +static int amd_pstate_init_cpu(unsigned int cpunum)
> +{
> +	struct amd_cpudata *cpudata;
> +
> +	cpudata = all_cpu_data[cpunum];
> +	if (!cpudata) {
> +		cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
> +		if (!cpudata)
> +			return -ENOMEM;
> +		WRITE_ONCE(all_cpu_data[cpunum], cpudata);
> +
> +		cpudata->cpu = cpunum;
> +
> +		if (cppc_active) {
> +			if (amd_pstate_acpi_pm_profile_server())
> +				cppc_boost = true;
> +		}
> +
> +	}
> +	cpudata->epp_powersave = -EINVAL;
> +	cpudata->epp_policy = 0;
> +	pr_debug("controlling: cpu %d\n", cpunum);
> +	return 0;
> +}
> +
> +static int __amd_pstate_cpu_init(struct cpufreq_policy *policy)
> +{
> +	int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
> +	struct amd_cpudata *cpudata;
> +	struct device *dev;
> +	int rc;
> +	u64 value;
> +
> +	rc = amd_pstate_init_cpu(policy->cpu);
> +	if (rc)
> +		return rc;
> +
> +	cpudata = all_cpu_data[policy->cpu];
> +
> +	dev = get_cpu_device(policy->cpu);
> +	if (!dev)
> +		goto free_cpudata1;
> +
> +	rc = amd_pstate_init_perf(cpudata);
> +	if (rc)
> +		goto free_cpudata1;
> +
> +	min_freq = amd_get_min_freq(cpudata);
> +	max_freq = amd_get_max_freq(cpudata);
> +	nominal_freq = amd_get_nominal_freq(cpudata);
> +	lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
> +	if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
> +		dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
> +				min_freq, max_freq);
> +		ret = -EINVAL;
> +		goto free_cpudata1;
> +	}
> +
> +	policy->min = min_freq;
> +	policy->max = max_freq;
> +
> +	policy->cpuinfo.min_freq = min_freq;
> +	policy->cpuinfo.max_freq = max_freq;
> +	/* It will be updated by governor */
> +	policy->cur = policy->cpuinfo.min_freq;
> +
> +	/* Initial processor data capability frequencies */
> +	cpudata->max_freq = max_freq;
> +	cpudata->min_freq = min_freq;
> +	cpudata->nominal_freq = nominal_freq;
> +	cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
> +
> +	policy->driver_data = cpudata;
> +
> +	update_boost_state();
> +	cpudata->epp_cached = amd_pstate_get_epp(cpudata, value);
> +
> +	policy->min = policy->cpuinfo.min_freq;
> +	policy->max = policy->cpuinfo.max_freq;
> +
> +	if (boot_cpu_has(X86_FEATURE_CPPC))
> +		policy->fast_switch_possible = true;
> +
> +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> +		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
> +		if (ret)
> +			return ret;
> +		WRITE_ONCE(cpudata->cppc_req_cached, value);
> +
> +		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
> +		if (ret)
> +			return ret;
> +		WRITE_ONCE(cpudata->cppc_cap1_cached, value);
> +	}
> +	amd_pstate_boost_init(cpudata);
> +
> +	return 0;
> +
> +free_cpudata1:
> +	kfree(cpudata);
> +	return ret;
> +}
> +
> +static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
> +{
> +	int ret;
> +
> +	ret = __amd_pstate_cpu_init(policy);
> +	if (ret)
> +		return ret;
> +	/*
> +	 * Set the policy to powersave to provide a valid fallback value in case
> +	 * the default cpufreq governor is neither powersave nor performance.
> +	 */
> +	policy->policy = CPUFREQ_POLICY_POWERSAVE;
> +
> +	return 0;
> +}
> +
> +static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
> +{
> +	pr_debug("CPU %d exiting\n", policy->cpu);
> +	policy->fast_switch_possible = false;
> +	return 0;
> +}
> +
> +static void amd_pstate_update_max_freq(unsigned int cpu)
> +{
> +	struct cpufreq_policy *policy = policy = cpufreq_cpu_get(cpu);
> +
> +	if (!policy)
> +		return;
> +
> +	refresh_frequency_limits(policy);
> +	cpufreq_cpu_put(policy);
> +}
> +
> +static void amd_pstate_epp_update_limits(unsigned int cpu)
> +{
> +	mutex_lock(&amd_pstate_driver_lock);
> +	update_boost_state();
> +	if (global_params.cppc_boost_disabled) {
> +		for_each_possible_cpu(cpu)
> +			amd_pstate_update_max_freq(cpu);
> +	} else {
> +		cpufreq_update_policy(cpu);
> +	}
> +	mutex_unlock(&amd_pstate_driver_lock);
> +}
> +
> +static int cppc_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
> +
> +static inline void amd_pstate_boost_up(struct amd_cpudata *cpudata)
> +{
> +	u64 hwp_req = READ_ONCE(cpudata->cppc_req_cached);
> +	u64 hwp_cap = READ_ONCE(cpudata->cppc_cap1_cached);
> +	u32 max_limit = (hwp_req & 0xff);
> +	u32 min_limit = (hwp_req & 0xff00) >> 8;
> +	u32 boost_level1;
> +
> +	/* If max and min are equal or already at max, nothing to boost */
> +	if (max_limit == min_limit)
> +		return;
> +
> +	/* Set boost max and min to initial value */
> +	if (!cpudata->cppc_boost_min)
> +		cpudata->cppc_boost_min = min_limit;
> +
> +	boost_level1 = ((AMD_CPPC_NOMINAL_PERF(hwp_cap) + min_limit) >> 1);
> +
> +	if (cpudata->cppc_boost_min < boost_level1)
> +		cpudata->cppc_boost_min = boost_level1;
> +	else if (cpudata->cppc_boost_min < AMD_CPPC_NOMINAL_PERF(hwp_cap))
> +		cpudata->cppc_boost_min = AMD_CPPC_NOMINAL_PERF(hwp_cap);
> +	else if (cpudata->cppc_boost_min == AMD_CPPC_NOMINAL_PERF(hwp_cap))
> +		cpudata->cppc_boost_min = max_limit;
> +	else
> +		return;
> +
> +	hwp_req &= ~AMD_CPPC_MIN_PERF(~0L);
> +	hwp_req |= AMD_CPPC_MIN_PERF(cpudata->cppc_boost_min);
> +	wrmsrl(MSR_AMD_CPPC_REQ, hwp_req);
> +	cpudata->last_update = cpudata->sample.time;
> +}
> +
> +static inline void amd_pstate_boost_down(struct amd_cpudata *cpudata)
> +{
> +	bool expired;
> +
> +	if (cpudata->cppc_boost_min) {
> +		expired = time_after64(cpudata->sample.time, cpudata->last_update +
> +					cppc_boost_hold_time_ns);
> +
> +		if (expired) {
> +			wrmsrl(MSR_AMD_CPPC_REQ, cpudata->cppc_req_cached);
> +			cpudata->cppc_boost_min = 0;
> +		}
> +	}
> +
> +	cpudata->last_update = cpudata->sample.time;
> +}
> +
> +static inline void amd_pstate_boost_update_util(struct amd_cpudata *cpudata,
> +						      u64 time)
> +{
> +	cpudata->sample.time = time;
> +	if (smp_processor_id() != cpudata->cpu)
> +		return;
> +
> +	if (cpudata->sched_flags & SCHED_CPUFREQ_IOWAIT) {
> +		bool do_io = false;
> +
> +		cpudata->sched_flags = 0;
> +		/*
> +		 * Set iowait_boost flag and update time. Since IO WAIT flag
> +		 * is set all the time, we can't just conclude that there is
> +		 * some IO bound activity is scheduled on this CPU with just
> +		 * one occurrence. If we receive at least two in two
> +		 * consecutive ticks, then we treat as boost candidate.
> +		 * This is leveraged from Intel Pstate driver.
> +		 */
> +		if (time_before64(time, cpudata->last_io_update + 2 * TICK_NSEC))
> +			do_io = true;
> +
> +		cpudata->last_io_update = time;
> +
> +		if (do_io)
> +			amd_pstate_boost_up(cpudata);
> +
> +	} else {
> +		amd_pstate_boost_down(cpudata);
> +	}
> +}
> +
> +static inline void amd_pstate_cppc_update_hook(struct update_util_data *data,
> +						u64 time, unsigned int flags)
> +{
> +	struct amd_cpudata *cpudata = container_of(data,
> +				struct amd_cpudata, update_util);
> +
> +	cpudata->sched_flags |= flags;
> +
> +	if (smp_processor_id() == cpudata->cpu)
> +		amd_pstate_boost_update_util(cpudata, time);
> +}
> +
> +static void amd_pstate_clear_update_util_hook(unsigned int cpu)
> +{
> +	struct amd_cpudata *cpudata = all_cpu_data[cpu];
> +
> +	if (!cpudata->update_util_set)
> +		return;
> +
> +	cpufreq_remove_update_util_hook(cpu);
> +	cpudata->update_util_set = false;
> +	synchronize_rcu();
> +}
> +
> +static void amd_pstate_set_update_util_hook(unsigned int cpu_num)
> +{
> +	struct amd_cpudata *cpudata = all_cpu_data[cpu_num];
> +
> +	if (!cppc_boost) {
> +		if (cpudata->update_util_set)
> +			amd_pstate_clear_update_util_hook(cpudata->cpu);
> +		return;
> +	}
> +
> +	if (cpudata->update_util_set)
> +		return;
> +
> +	cpudata->sample.time = 0;
> +	cpufreq_add_update_util_hook(cpu_num, &cpudata->update_util,
> +						amd_pstate_cppc_update_hook);
> +	cpudata->update_util_set = true;
> +}
> +
> +static void amd_pstate_epp_init(unsigned int cpu)
> +{
> +	struct amd_cpudata *cpudata = all_cpu_data[cpu];
> +	u32 max_perf, min_perf;
> +	u64 value;
> +	s16 epp;
> +	int ret;
> +
> +	max_perf = READ_ONCE(cpudata->highest_perf);
> +	min_perf = READ_ONCE(cpudata->lowest_perf);
> +
> +	value = READ_ONCE(cpudata->cppc_req_cached);
> +
> +	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
> +		min_perf = max_perf;
> +
> +	/* Initial min/max values for CPPC Performance Controls Register */
> +	value &= ~AMD_CPPC_MIN_PERF(~0L);
> +	value |= AMD_CPPC_MIN_PERF(min_perf);
> +
> +	value &= ~AMD_CPPC_MAX_PERF(~0L);
> +	value |= AMD_CPPC_MAX_PERF(max_perf);
> +
> +	/* CPPC EPP feature require to set zero to the desire perf bit */
> +	value &= ~AMD_CPPC_DES_PERF(~0L);
> +	value |= AMD_CPPC_DES_PERF(0);
> +
> +	if (cpudata->epp_policy == cpudata->policy)
> +		goto skip_epp;
> +
> +	cpudata->epp_policy = cpudata->policy;
> +
> +	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> +		epp = amd_pstate_get_epp(cpudata, value);
> +		cpudata->epp_powersave = epp;
> +		if (epp < 0)
> +			goto skip_epp;
> +		/* force the epp value to be zero for performance policy */
> +		epp = 0;
> +	} else {
> +		if (cpudata->epp_powersave < 0)
> +			goto skip_epp;
> +		/* Get BIOS pre-defined epp value */
> +		epp = amd_pstate_get_epp(cpudata, value);
> +		if (epp)
> +			goto skip_epp;
> +		epp = cpudata->epp_powersave;
> +	}
> +	/* Set initial EPP value */
> +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> +		value &= ~GENMASK_ULL(31, 24);
> +		value |= (u64)epp << 24;
> +	}
> +
> +skip_epp:
> +	WRITE_ONCE(cpudata->cppc_req_cached, value);
> +	ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
> +	if (!ret)
> +		cpudata->epp_cached = epp;
> +}
> +
> +static void amd_pstate_set_max_limits(struct amd_cpudata *cpudata)
> +{
> +	u64 hwp_cap = READ_ONCE(cpudata->cppc_cap1_cached);
> +	u64 hwp_req = READ_ONCE(cpudata->cppc_req_cached);
> +	u32 max_limit = (hwp_cap >> 24) & 0xff;
> +
> +	hwp_req &= ~AMD_CPPC_MIN_PERF(~0L);
> +	hwp_req |= AMD_CPPC_MIN_PERF(max_limit);
> +	wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, hwp_req);
> +}
> +
> +static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
> +{
> +	struct amd_cpudata *cpudata;
> +
> +	if (!policy->cpuinfo.max_freq)
> +		return -ENODEV;
> +
> +	pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
> +				policy->cpuinfo.max_freq, policy->max);
> +
> +	cpudata = all_cpu_data[policy->cpu];
> +	cpudata->policy = policy->policy;
> +
> +	if (boot_cpu_has(X86_FEATURE_CPPC)) {

X86_FEATURE_CPPC indicated the MSR support, but I believe the share memory
CPU also has the EPP, right?

> +		mutex_lock(&amd_pstate_limits_lock);
> +
> +		if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> +			amd_pstate_clear_update_util_hook(policy->cpu);
> +			amd_pstate_set_max_limits(cpudata);
> +		} else {
> +			amd_pstate_set_update_util_hook(policy->cpu);

May I know why amd processors also needs to set and update util hook?

> +		}
> +
> +		if (boot_cpu_has(X86_FEATURE_CPPC))

I am confused why if (boot_cpu_has(X86_FEATURE_CPPC) is still inside of a
if (boot_cpu_has(X86_FEATURE_CPPC)).

> +			amd_pstate_epp_init(policy->cpu);
> +
> +		mutex_unlock(&amd_pstate_limits_lock);
> +	}
> +
> +	return 0;
> +}
> +
> +static void amd_pstate_verify_cpu_policy(struct amd_cpudata *cpudata,
> +					   struct cpufreq_policy_data *policy)
> +{
> +	update_boost_state();
> +	cpufreq_verify_within_cpu_limits(policy);
> +}
> +
> +static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
> +{
> +	amd_pstate_verify_cpu_policy(all_cpu_data[policy->cpu], policy);
> +	pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
> +	return 0;
> +}
> +
>  static struct cpufreq_driver amd_pstate_driver = {
>  	.flags		= CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
>  	.verify		= amd_pstate_verify,
> @@ -616,8 +1224,20 @@ static struct cpufreq_driver amd_pstate_driver = {
>  	.attr		= amd_pstate_attr,
>  };
>  
> +static struct cpufreq_driver amd_pstate_epp_driver = {
> +	.flags		= CPUFREQ_CONST_LOOPS,
> +	.verify		= amd_pstate_epp_verify_policy,
> +	.setpolicy	= amd_pstate_epp_set_policy,
> +	.init		= amd_pstate_epp_cpu_init,
> +	.exit		= amd_pstate_epp_cpu_exit,
> +	.update_limits	= amd_pstate_epp_update_limits,
> +	.name		= "amd_pstate_epp",
> +	.attr		= amd_pstate_epp_attr,
> +};
> +
>  static int __init amd_pstate_init(void)
>  {
> +	static struct amd_cpudata **cpudata;
>  	int ret;
>  
>  	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
> @@ -644,7 +1264,8 @@ static int __init amd_pstate_init(void)
>  	/* capability check */
>  	if (boot_cpu_has(X86_FEATURE_CPPC)) {
>  		pr_debug("AMD CPPC MSR based functionality is supported\n");
> -		amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
> +		if (!cppc_active)
> +			default_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
>  	} else {
>  		pr_debug("AMD CPPC shared memory based functionality is supported\n");
>  		static_call_update(amd_pstate_enable, cppc_enable);
> @@ -652,6 +1273,10 @@ static int __init amd_pstate_init(void)
>  		static_call_update(amd_pstate_update_perf, cppc_update_perf);
>  	}
>  
> +	cpudata = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
> +	if (!cpudata)
> +		return -ENOMEM;
> +	WRITE_ONCE(all_cpu_data, cpudata);
>  	/* enable amd pstate feature */
>  	ret = amd_pstate_enable(true);
>  	if (ret) {
> @@ -659,9 +1284,9 @@ static int __init amd_pstate_init(void)
>  		return ret;
>  	}
>  
> -	ret = cpufreq_register_driver(&amd_pstate_driver);
> +	ret = cpufreq_register_driver(default_pstate_driver);
>  	if (ret)
> -		pr_err("failed to register amd_pstate_driver with return %d\n",
> +		pr_err("failed to register amd pstate driver with return %d\n",
>  		       ret);
>  
>  	return ret;
> @@ -676,8 +1301,14 @@ static int __init amd_pstate_param(char *str)
>  	if (!strcmp(str, "disable")) {
>  		cppc_load = 0;
>  		pr_info("driver is explicitly disabled\n");
> -	} else if (!strcmp(str, "passive"))
> +	} else if (!strcmp(str, "passive")) {
>  		cppc_load = 1;
> +		default_pstate_driver = &amd_pstate_driver;
> +	} else if (!strcmp(str, "active")) {
> +		cppc_active = 1;
> +		cppc_load = 1;
> +		default_pstate_driver = &amd_pstate_epp_driver;
> +	}
>  
>  	return 0;
>  }
> diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
> index 1c4b8659f171..888af62040f1 100644
> --- a/include/linux/amd-pstate.h
> +++ b/include/linux/amd-pstate.h
> @@ -25,6 +25,7 @@ struct amd_aperf_mperf {
>  	u64 aperf;
>  	u64 mperf;
>  	u64 tsc;
> +	u64 time;
>  };
>  
>  /**
> @@ -47,6 +48,18 @@ struct amd_aperf_mperf {
>   * @prev: Last Aperf/Mperf/tsc count value read from register
>   * @freq: current cpu frequency value
>   * @boost_supported: check whether the Processor or SBIOS supports boost mode
> + * @epp_powersave: Last saved CPPC energy performance preference
> +				when policy switched to performance
> + * @epp_policy: Last saved policy used to set energy-performance preference
> + * @epp_cached: Cached CPPC energy-performance preference value
> + * @policy: Cpufreq policy value
> + * @sched_flags: Store scheduler flags for possible cross CPU update
> + * @update_util_set: CPUFreq utility callback is set
> + * @last_update: Time stamp of the last performance state update
> + * @cppc_boost_min: Last CPPC boosted min performance state
> + * @cppc_cap1_cached: Cached value of the last CPPC Capabilities MSR
> + * @update_util: Cpufreq utility callback information
> + * @sample: the stored performance sample
>   *
>   * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
>   * represents all the attributes and goals that AMD P-State requests at runtime.
> @@ -72,6 +85,28 @@ struct amd_cpudata {
>  
>  	u64	freq;
>  	bool	boost_supported;
> +
> +	/* EPP feature related attributes*/
> +	s16	epp_powersave;
> +	s16	epp_policy;
> +	s16	epp_cached;
> +	u32	policy;
> +	u32	sched_flags;
> +	bool	update_util_set;
> +	u64	last_update;
> +	u64	last_io_update;
> +	u32	cppc_boost_min;
> +	u64	cppc_cap1_cached;
> +	struct	update_util_data update_util;
> +	struct	amd_aperf_mperf sample;
> +};
> +
> +/**
> + * struct amd_pstate_params - global parameters for the performance control
> + * @ cppc_boost_disabled wheher the core performance boost disabled
> + */
> +struct amd_pstate_params {
> +	bool cppc_boost_disabled;
>  };
>  
>  #endif /* _LINUX_AMD_PSTATE_H */
> -- 
> 2.34.1
>
  
Yuan, Perry Dec. 8, 2022, 3:43 p.m. UTC | #4
[AMD Official Use Only - General]

Hi Ray, 

> -----Original Message-----
> From: Huang, Ray <Ray.Huang@amd.com>
> Sent: Monday, December 5, 2022 8:22 PM
> To: Yuan, Perry <Perry.Yuan@amd.com>
> Cc: rafael.j.wysocki@intel.com; Limonciello, Mario
> <Mario.Limonciello@amd.com>; viresh.kumar@linaro.org; Sharma, Deepak
> <Deepak.Sharma@amd.com>; Fontenot, Nathan
> <Nathan.Fontenot@amd.com>; Deucher, Alexander
> <Alexander.Deucher@amd.com>; Huang, Shimmer
> <Shimmer.Huang@amd.com>; Du, Xiaojian <Xiaojian.Du@amd.com>; Meng,
> Li (Jassmine) <Li.Meng@amd.com>; Karny, Wyes <Wyes.Karny@amd.com>;
> linux-pm@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH v6 04/11] cpufreq: amd_pstate: implement Pstate EPP
> support for the AMD processors
> 
> On Fri, Dec 02, 2022 at 03:47:12PM +0800, Yuan, Perry wrote:
> > From: Perry Yuan <Perry.Yuan@amd.com>
> >
> 
> > cpufreq: amd_pstate: implement Pstate EPP support for the AMD
> > processors
> 
> I suggest that we align the format like "cpufreq: amd-pstate:" as the prefix of
> the subject like previous patches. Because it's there anywhere even in the
> documentation file. I think it's not worth to change such as minor update like
> amd-pstate to amd_pstate for all the files and comments.
> So let's align the previous way.

Agreed , I have revised the title  and part of the kernel docs update patches in V7.
Please take a look at the v7 if you have some other concerns. 


> 
> > Add EPP driver support for AMD SoCs which support a dedicated MSR for
> > CPPC.  EPP is used by the DPM controller to configure the frequency
> > that a core operates at during short periods of activity.
> >
> > The SoC EPP targets are configured on a scale from 0 to 255 where 0
> > represents maximum performance and 255 represents maximum
> efficiency.
> >
> > The amd-pstate driver exports profile string names to userspace that
> > are tied to specific EPP values.
> >
> > The balance_performance string (0x80) provides the best balance for
> > efficiency versus power on most systems, but users can choose other
> > strings to meet their needs as well.
> >
> > $ cat
> >
> /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_
> p
> > references default performance balance_performance balance_power
> power
> >
> > $ cat
> >
> /sys/devices/system/cpu/cpufreq/policy0/energy_performance_preferenc
> e
> > balance_performance
> >
> > To enable the driver,it needs to add `amd_pstate=active` to kernel
> > command line and kernel will load the active mode epp driver
> >
> > Signed-off-by: Perry Yuan <Perry.Yuan@amd.com>
> > ---
> >  drivers/cpufreq/amd-pstate.c | 643
> ++++++++++++++++++++++++++++++++++-
> >  include/linux/amd-pstate.h   |  35 ++
> >  2 files changed, 672 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/cpufreq/amd-pstate.c
> > b/drivers/cpufreq/amd-pstate.c index 204e39006dda..5989d4d25d0f
> 100644
> > --- a/drivers/cpufreq/amd-pstate.c
> > +++ b/drivers/cpufreq/amd-pstate.c
> > @@ -37,9 +37,12 @@
> >  #include <linux/uaccess.h>
> >  #include <linux/static_call.h>
> >  #include <linux/amd-pstate.h>
> > +#include <linux/cpufreq_common.h>
> >
> > +#ifdef CONFIG_ACPI
> >  #include <acpi/processor.h>
> >  #include <acpi/cppc_acpi.h>
> > +#endif
> 
> The amd-pstate module depends on ACPI in the kconfig, so we won't need
> add CONFIG_ACPI here.

Removed in V7.


> 
> >
> >  #include <asm/msr.h>
> >  #include <asm/processor.h>
> > @@ -59,9 +62,130 @@
> >   * we disable it by default to go acpi-cpufreq on these processors and add
> a
> >   * module parameter to be able to enable it manually for debugging.
> >   */
> > -static struct cpufreq_driver amd_pstate_driver;
> > +static bool cppc_active;
> >  static int cppc_load __initdata;
> >
> > +static struct cpufreq_driver *default_pstate_driver; static struct
> > +amd_cpudata **all_cpu_data; static struct amd_pstate_params
> > +global_params;
> > +
> > +static DEFINE_MUTEX(amd_pstate_limits_lock);
> > +static DEFINE_MUTEX(amd_pstate_driver_lock);
> > +
> > +static bool cppc_boost __read_mostly; struct kobject
> > +*amd_pstate_kobj;
> 
> Where is it using amd_pstate_kobj?

Moved the amd_pstate_kobj definition to the patch where it is actually used in v7. 

> 
> > +
> > +#ifdef CONFIG_ACPI_CPPC_LIB
> > +static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64
> > +cppc_req_cached) {
> > +	s16 epp;
> > +	struct cppc_perf_caps perf_caps;
> > +	int ret;
> > +
> > +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > +		if (!cppc_req_cached) {
> > +			epp = rdmsrl_on_cpu(cpudata->cpu,
> MSR_AMD_CPPC_REQ,
> > +					&cppc_req_cached);
> > +			if (epp)
> > +				return epp;
> > +		}
> > +		epp = (cppc_req_cached >> 24) & 0xFF;
> > +	} else {
> > +		ret = cppc_get_epp_caps(cpudata->cpu, &perf_caps);
> > +		if (ret < 0) {
> > +			pr_debug("Could not retrieve energy perf value
> (%d)\n", ret);
> > +			return -EIO;
> > +		}
> > +		epp = (s16) perf_caps.energy_perf;
> > +	}
> > +
> > +	return epp;
> > +}
> > +#endif
> > +
> > +static int amd_pstate_get_energy_pref_index(struct amd_cpudata
> > +*cpudata) {
> > +	s16 epp;
> > +	int index = -EINVAL;
> > +
> > +	epp = amd_pstate_get_epp(cpudata, 0);
> > +	if (epp < 0)
> > +		return epp;
> > +
> > +	switch (epp) {
> > +	case HWP_EPP_PERFORMANCE:
> > +		index = EPP_INDEX_PERFORMANCE;
> > +		break;
> > +	case HWP_EPP_BALANCE_PERFORMANCE:
> > +		index = EPP_INDEX_BALANCE_PERFORMANCE;
> > +		break;
> > +	case HWP_EPP_BALANCE_POWERSAVE:
> > +		index = EPP_INDEX_BALANCE_POWERSAVE;
> > +		break;
> > +	case HWP_EPP_POWERSAVE:
> > +		index = EPP_INDEX_POWERSAVE;
> > +		break;
> > +	default:
> > +			break;
> > +	}
> > +
> > +	return index;
> > +}
> > +
> > +#ifdef CONFIG_ACPI_CPPC_LIB
> > +static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) {
> > +	int ret;
> > +	struct cppc_perf_ctrls perf_ctrls;
> > +
> > +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > +		u64 value = READ_ONCE(cpudata->cppc_req_cached);
> > +
> > +		value &= ~GENMASK_ULL(31, 24);
> > +		value |= (u64)epp << 24;
> > +		WRITE_ONCE(cpudata->cppc_req_cached, value);
> > +
> > +		ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
> value);
> > +		if (!ret)
> > +			cpudata->epp_cached = epp;
> > +	} else {
> > +		perf_ctrls.energy_perf = epp;
> > +		ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
> > +		if (ret) {
> > +			pr_debug("failed to set energy perf value (%d)\n",
> ret);
> > +			return ret;
> > +		}
> > +		cpudata->epp_cached = epp;
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +static int amd_pstate_set_energy_pref_index(struct amd_cpudata
> *cpudata,
> > +		int pref_index)
> > +{
> > +	int epp = -EINVAL;
> > +	int ret;
> > +
> > +	if (!pref_index) {
> > +		pr_debug("EPP pref_index is invalid\n");
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (epp == -EINVAL)
> > +		epp = epp_values[pref_index];
> > +
> > +	if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
> {
> > +		pr_debug("EPP cannot be set under performance policy\n");
> > +		return -EBUSY;
> > +	}
> > +
> > +	ret = amd_pstate_set_epp(cpudata, epp);
> > +
> > +	return ret;
> > +}
> > +#endif
> > +
> >  static inline int pstate_enable(bool enable)  {
> >  	return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable); @@ -70,11
> +194,21
> > @@ static inline int pstate_enable(bool enable)  static int
> > cppc_enable(bool enable)  {
> >  	int cpu, ret = 0;
> > +	struct cppc_perf_ctrls perf_ctrls;
> >
> >  	for_each_present_cpu(cpu) {
> >  		ret = cppc_set_enable(cpu, enable);
> >  		if (ret)
> >  			return ret;
> > +
> > +		/* Enable autonomous mode for EPP */
> > +		if (!cppc_active) {
> > +			/* Set desired perf as zero to allow EPP firmware
> control */
> > +			perf_ctrls.desired_perf = 0;
> > +			ret = cppc_set_perf(cpu, &perf_ctrls);
> > +			if (ret)
> > +				return ret;
> > +		}
> >  	}
> >
> >  	return ret;
> > @@ -417,7 +551,7 @@ static void amd_pstate_boost_init(struct
> amd_cpudata *cpudata)
> >  		return;
> >
> >  	cpudata->boost_supported = true;
> > -	amd_pstate_driver.boost_enabled = true;
> > +	default_pstate_driver->boost_enabled = true;
> >  }
> >
> >  static void amd_perf_ctl_reset(unsigned int cpu) @@ -591,10 +725,61
> > @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy
> *policy,
> >  	return sprintf(&buf[0], "%u\n", perf);  }
> >
> > +static ssize_t show_energy_performance_available_preferences(
> > +				struct cpufreq_policy *policy, char *buf) {
> > +	int i = 0;
> > +	int ret = 0;
> > +
> > +	while (energy_perf_strings[i] != NULL)
> > +		ret += sysfs_emit(buf, "%s", energy_perf_strings[i++]);
> > +
> > +	ret += sysfs_emit(buf, "\n");
> > +
> > +	return ret;
> > +}
> > +
> > +static ssize_t store_energy_performance_preference(
> > +		struct cpufreq_policy *policy, const char *buf, size_t count) {
> > +	struct amd_cpudata *cpudata = policy->driver_data;
> > +	char str_preference[21];
> > +	ssize_t ret;
> > +
> > +	ret = sscanf(buf, "%20s", str_preference);
> > +	if (ret != 1)
> > +		return -EINVAL;
> > +
> > +	ret = match_string(energy_perf_strings, -1, str_preference);
> > +	if (ret < 0)
> > +		return -EINVAL;
> > +
> > +	mutex_lock(&amd_pstate_limits_lock);
> > +	ret = amd_pstate_set_energy_pref_index(cpudata, ret);
> > +	mutex_unlock(&amd_pstate_limits_lock);
> > +
> > +	return ret ?: count;
> > +}
> > +
> > +static ssize_t show_energy_performance_preference(
> > +				struct cpufreq_policy *policy, char *buf) {
> > +	struct amd_cpudata *cpudata = policy->driver_data;
> > +	int preference;
> > +
> > +	preference = amd_pstate_get_energy_pref_index(cpudata);
> > +	if (preference < 0)
> > +		return preference;
> > +
> > +	return  sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); }
> > +
> >  cpufreq_freq_attr_ro(amd_pstate_max_freq);
> >  cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
> >
> >  cpufreq_freq_attr_ro(amd_pstate_highest_perf);
> > +cpufreq_freq_attr_rw(energy_performance_preference);
> > +cpufreq_freq_attr_ro(energy_performance_available_preferences);
> >
> >  static struct freq_attr *amd_pstate_attr[] = {
> >  	&amd_pstate_max_freq,
> > @@ -603,6 +788,429 @@ static struct freq_attr *amd_pstate_attr[] = {
> >  	NULL,
> >  };
> >
> > +static struct freq_attr *amd_pstate_epp_attr[] = {
> > +	&amd_pstate_max_freq,
> > +	&amd_pstate_lowest_nonlinear_freq,
> > +	&amd_pstate_highest_perf,
> > +	&energy_performance_preference,
> > +	&energy_performance_available_preferences,
> > +	NULL,
> > +};
> > +
> > +static inline void update_boost_state(void) {
> > +	u64 misc_en;
> > +	struct amd_cpudata *cpudata;
> > +
> > +	cpudata = all_cpu_data[0];
> > +	rdmsrl(MSR_K7_HWCR, misc_en);
> > +	global_params.cppc_boost_disabled = misc_en & BIT_ULL(25); }
> > +
> > +static bool amd_pstate_acpi_pm_profile_server(void)
> > +{
> > +	if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
> > +	    acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)
> > +		return true;
> > +
> > +	return false;
> > +}
> > +
> > +static int amd_pstate_init_cpu(unsigned int cpunum) {
> > +	struct amd_cpudata *cpudata;
> > +
> > +	cpudata = all_cpu_data[cpunum];
> > +	if (!cpudata) {
> > +		cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
> > +		if (!cpudata)
> > +			return -ENOMEM;
> > +		WRITE_ONCE(all_cpu_data[cpunum], cpudata);
> > +
> > +		cpudata->cpu = cpunum;
> > +
> > +		if (cppc_active) {
> > +			if (amd_pstate_acpi_pm_profile_server())
> > +				cppc_boost = true;
> > +		}
> > +
> > +	}
> > +	cpudata->epp_powersave = -EINVAL;
> > +	cpudata->epp_policy = 0;
> > +	pr_debug("controlling: cpu %d\n", cpunum);
> > +	return 0;
> > +}
> > +
> > +static int __amd_pstate_cpu_init(struct cpufreq_policy *policy) {
> > +	int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
> > +	struct amd_cpudata *cpudata;
> > +	struct device *dev;
> > +	int rc;
> > +	u64 value;
> > +
> > +	rc = amd_pstate_init_cpu(policy->cpu);
> > +	if (rc)
> > +		return rc;
> > +
> > +	cpudata = all_cpu_data[policy->cpu];
> > +
> > +	dev = get_cpu_device(policy->cpu);
> > +	if (!dev)
> > +		goto free_cpudata1;
> > +
> > +	rc = amd_pstate_init_perf(cpudata);
> > +	if (rc)
> > +		goto free_cpudata1;
> > +
> > +	min_freq = amd_get_min_freq(cpudata);
> > +	max_freq = amd_get_max_freq(cpudata);
> > +	nominal_freq = amd_get_nominal_freq(cpudata);
> > +	lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
> > +	if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
> > +		dev_err(dev, "min_freq(%d) or max_freq(%d) value is
> incorrect\n",
> > +				min_freq, max_freq);
> > +		ret = -EINVAL;
> > +		goto free_cpudata1;
> > +	}
> > +
> > +	policy->min = min_freq;
> > +	policy->max = max_freq;
> > +
> > +	policy->cpuinfo.min_freq = min_freq;
> > +	policy->cpuinfo.max_freq = max_freq;
> > +	/* It will be updated by governor */
> > +	policy->cur = policy->cpuinfo.min_freq;
> > +
> > +	/* Initial processor data capability frequencies */
> > +	cpudata->max_freq = max_freq;
> > +	cpudata->min_freq = min_freq;
> > +	cpudata->nominal_freq = nominal_freq;
> > +	cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
> > +
> > +	policy->driver_data = cpudata;
> > +
> > +	update_boost_state();
> > +	cpudata->epp_cached = amd_pstate_get_epp(cpudata, value);
> > +
> > +	policy->min = policy->cpuinfo.min_freq;
> > +	policy->max = policy->cpuinfo.max_freq;
> > +
> > +	if (boot_cpu_has(X86_FEATURE_CPPC))
> > +		policy->fast_switch_possible = true;
> > +
> > +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > +		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
> &value);
> > +		if (ret)
> > +			return ret;
> > +		WRITE_ONCE(cpudata->cppc_req_cached, value);
> > +
> > +		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
> &value);
> > +		if (ret)
> > +			return ret;
> > +		WRITE_ONCE(cpudata->cppc_cap1_cached, value);
> > +	}
> > +	amd_pstate_boost_init(cpudata);
> > +
> > +	return 0;
> > +
> > +free_cpudata1:
> > +	kfree(cpudata);
> > +	return ret;
> > +}
> > +
> > +static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) {
> > +	int ret;
> > +
> > +	ret = __amd_pstate_cpu_init(policy);
> > +	if (ret)
> > +		return ret;
> > +	/*
> > +	 * Set the policy to powersave to provide a valid fallback value in case
> > +	 * the default cpufreq governor is neither powersave nor
> performance.
> > +	 */
> > +	policy->policy = CPUFREQ_POLICY_POWERSAVE;
> > +
> > +	return 0;
> > +}
> > +
> > +static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) {
> > +	pr_debug("CPU %d exiting\n", policy->cpu);
> > +	policy->fast_switch_possible = false;
> > +	return 0;
> > +}
> > +
> > +static void amd_pstate_update_max_freq(unsigned int cpu) {
> > +	struct cpufreq_policy *policy = policy = cpufreq_cpu_get(cpu);
> > +
> > +	if (!policy)
> > +		return;
> > +
> > +	refresh_frequency_limits(policy);
> > +	cpufreq_cpu_put(policy);
> > +}
> > +
> > +static void amd_pstate_epp_update_limits(unsigned int cpu) {
> > +	mutex_lock(&amd_pstate_driver_lock);
> > +	update_boost_state();
> > +	if (global_params.cppc_boost_disabled) {
> > +		for_each_possible_cpu(cpu)
> > +			amd_pstate_update_max_freq(cpu);
> > +	} else {
> > +		cpufreq_update_policy(cpu);
> > +	}
> > +	mutex_unlock(&amd_pstate_driver_lock);
> > +}
> > +
> > +static int cppc_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
> > +
> > +static inline void amd_pstate_boost_up(struct amd_cpudata *cpudata) {
> > +	u64 hwp_req = READ_ONCE(cpudata->cppc_req_cached);
> > +	u64 hwp_cap = READ_ONCE(cpudata->cppc_cap1_cached);
> > +	u32 max_limit = (hwp_req & 0xff);
> > +	u32 min_limit = (hwp_req & 0xff00) >> 8;
> > +	u32 boost_level1;
> > +
> > +	/* If max and min are equal or already at max, nothing to boost */
> > +	if (max_limit == min_limit)
> > +		return;
> > +
> > +	/* Set boost max and min to initial value */
> > +	if (!cpudata->cppc_boost_min)
> > +		cpudata->cppc_boost_min = min_limit;
> > +
> > +	boost_level1 = ((AMD_CPPC_NOMINAL_PERF(hwp_cap) +
> min_limit) >> 1);
> > +
> > +	if (cpudata->cppc_boost_min < boost_level1)
> > +		cpudata->cppc_boost_min = boost_level1;
> > +	else if (cpudata->cppc_boost_min <
> AMD_CPPC_NOMINAL_PERF(hwp_cap))
> > +		cpudata->cppc_boost_min =
> AMD_CPPC_NOMINAL_PERF(hwp_cap);
> > +	else if (cpudata->cppc_boost_min ==
> AMD_CPPC_NOMINAL_PERF(hwp_cap))
> > +		cpudata->cppc_boost_min = max_limit;
> > +	else
> > +		return;
> > +
> > +	hwp_req &= ~AMD_CPPC_MIN_PERF(~0L);
> > +	hwp_req |= AMD_CPPC_MIN_PERF(cpudata->cppc_boost_min);
> > +	wrmsrl(MSR_AMD_CPPC_REQ, hwp_req);
> > +	cpudata->last_update = cpudata->sample.time; }
> > +
> > +static inline void amd_pstate_boost_down(struct amd_cpudata *cpudata)
> > +{
> > +	bool expired;
> > +
> > +	if (cpudata->cppc_boost_min) {
> > +		expired = time_after64(cpudata->sample.time, cpudata-
> >last_update +
> > +					cppc_boost_hold_time_ns);
> > +
> > +		if (expired) {
> > +			wrmsrl(MSR_AMD_CPPC_REQ, cpudata-
> >cppc_req_cached);
> > +			cpudata->cppc_boost_min = 0;
> > +		}
> > +	}
> > +
> > +	cpudata->last_update = cpudata->sample.time; }
> > +
> > +static inline void amd_pstate_boost_update_util(struct amd_cpudata
> *cpudata,
> > +						      u64 time)
> > +{
> > +	cpudata->sample.time = time;
> > +	if (smp_processor_id() != cpudata->cpu)
> > +		return;
> > +
> > +	if (cpudata->sched_flags & SCHED_CPUFREQ_IOWAIT) {
> > +		bool do_io = false;
> > +
> > +		cpudata->sched_flags = 0;
> > +		/*
> > +		 * Set iowait_boost flag and update time. Since IO WAIT flag
> > +		 * is set all the time, we can't just conclude that there is
> > +		 * some IO bound activity is scheduled on this CPU with just
> > +		 * one occurrence. If we receive at least two in two
> > +		 * consecutive ticks, then we treat as boost candidate.
> > +		 * This is leveraged from Intel Pstate driver.
> > +		 */
> > +		if (time_before64(time, cpudata->last_io_update + 2 *
> TICK_NSEC))
> > +			do_io = true;
> > +
> > +		cpudata->last_io_update = time;
> > +
> > +		if (do_io)
> > +			amd_pstate_boost_up(cpudata);
> > +
> > +	} else {
> > +		amd_pstate_boost_down(cpudata);
> > +	}
> > +}
> > +
> > +static inline void amd_pstate_cppc_update_hook(struct update_util_data
> *data,
> > +						u64 time, unsigned int flags)
> > +{
> > +	struct amd_cpudata *cpudata = container_of(data,
> > +				struct amd_cpudata, update_util);
> > +
> > +	cpudata->sched_flags |= flags;
> > +
> > +	if (smp_processor_id() == cpudata->cpu)
> > +		amd_pstate_boost_update_util(cpudata, time); }
> > +
> > +static void amd_pstate_clear_update_util_hook(unsigned int cpu) {
> > +	struct amd_cpudata *cpudata = all_cpu_data[cpu];
> > +
> > +	if (!cpudata->update_util_set)
> > +		return;
> > +
> > +	cpufreq_remove_update_util_hook(cpu);
> > +	cpudata->update_util_set = false;
> > +	synchronize_rcu();
> > +}
> > +
> > +static void amd_pstate_set_update_util_hook(unsigned int cpu_num) {
> > +	struct amd_cpudata *cpudata = all_cpu_data[cpu_num];
> > +
> > +	if (!cppc_boost) {
> > +		if (cpudata->update_util_set)
> > +			amd_pstate_clear_update_util_hook(cpudata->cpu);
> > +		return;
> > +	}
> > +
> > +	if (cpudata->update_util_set)
> > +		return;
> > +
> > +	cpudata->sample.time = 0;
> > +	cpufreq_add_update_util_hook(cpu_num, &cpudata->update_util,
> > +
> 	amd_pstate_cppc_update_hook);
> > +	cpudata->update_util_set = true;
> > +}
> > +
> > +static void amd_pstate_epp_init(unsigned int cpu) {
> > +	struct amd_cpudata *cpudata = all_cpu_data[cpu];
> > +	u32 max_perf, min_perf;
> > +	u64 value;
> > +	s16 epp;
> > +	int ret;
> > +
> > +	max_perf = READ_ONCE(cpudata->highest_perf);
> > +	min_perf = READ_ONCE(cpudata->lowest_perf);
> > +
> > +	value = READ_ONCE(cpudata->cppc_req_cached);
> > +
> > +	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
> > +		min_perf = max_perf;
> > +
> > +	/* Initial min/max values for CPPC Performance Controls Register */
> > +	value &= ~AMD_CPPC_MIN_PERF(~0L);
> > +	value |= AMD_CPPC_MIN_PERF(min_perf);
> > +
> > +	value &= ~AMD_CPPC_MAX_PERF(~0L);
> > +	value |= AMD_CPPC_MAX_PERF(max_perf);
> > +
> > +	/* CPPC EPP feature require to set zero to the desire perf bit */
> > +	value &= ~AMD_CPPC_DES_PERF(~0L);
> > +	value |= AMD_CPPC_DES_PERF(0);
> > +
> > +	if (cpudata->epp_policy == cpudata->policy)
> > +		goto skip_epp;
> > +
> > +	cpudata->epp_policy = cpudata->policy;
> > +
> > +	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> > +		epp = amd_pstate_get_epp(cpudata, value);
> > +		cpudata->epp_powersave = epp;
> > +		if (epp < 0)
> > +			goto skip_epp;
> > +		/* force the epp value to be zero for performance policy */
> > +		epp = 0;
> > +	} else {
> > +		if (cpudata->epp_powersave < 0)
> > +			goto skip_epp;
> > +		/* Get BIOS pre-defined epp value */
> > +		epp = amd_pstate_get_epp(cpudata, value);
> > +		if (epp)
> > +			goto skip_epp;
> > +		epp = cpudata->epp_powersave;
> > +	}
> > +	/* Set initial EPP value */
> > +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > +		value &= ~GENMASK_ULL(31, 24);
> > +		value |= (u64)epp << 24;
> > +	}
> > +
> > +skip_epp:
> > +	WRITE_ONCE(cpudata->cppc_req_cached, value);
> > +	ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
> > +	if (!ret)
> > +		cpudata->epp_cached = epp;
> > +}
> > +
> > +static void amd_pstate_set_max_limits(struct amd_cpudata *cpudata) {
> > +	u64 hwp_cap = READ_ONCE(cpudata->cppc_cap1_cached);
> > +	u64 hwp_req = READ_ONCE(cpudata->cppc_req_cached);
> > +	u32 max_limit = (hwp_cap >> 24) & 0xff;
> > +
> > +	hwp_req &= ~AMD_CPPC_MIN_PERF(~0L);
> > +	hwp_req |= AMD_CPPC_MIN_PERF(max_limit);
> > +	wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, hwp_req); }
> > +
> > +static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) {
> > +	struct amd_cpudata *cpudata;
> > +
> > +	if (!policy->cpuinfo.max_freq)
> > +		return -ENODEV;
> > +
> > +	pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
> > +				policy->cpuinfo.max_freq, policy->max);
> > +
> > +	cpudata = all_cpu_data[policy->cpu];
> > +	cpudata->policy = policy->policy;
> > +
> > +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> 
> X86_FEATURE_CPPC indicated the MSR support, but I believe the share
> memory CPU also has the EPP, right?

Yes,  the shared memory system which are using below preferred profile will add the util hook with another patchset. 
Current I would like to add this to MSR systems in this patchset as tight schedule. 

        if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
            acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)


> 
> > +		mutex_lock(&amd_pstate_limits_lock);
> > +
> > +		if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> > +			amd_pstate_clear_update_util_hook(policy->cpu);
> > +			amd_pstate_set_max_limits(cpudata);
> > +		} else {
> > +			amd_pstate_set_update_util_hook(policy->cpu);
> 
> May I know why amd processors also needs to set and update util hook?

Like intel already implemented this scheduler hook, it helps to improve the I/O performance, it is not enabled by default for Client CPU.


> 
> > +		}
> > +
> > +		if (boot_cpu_has(X86_FEATURE_CPPC))
> 
> I am confused why if (boot_cpu_has(X86_FEATURE_CPPC) is still inside of a if
> (boot_cpu_has(X86_FEATURE_CPPC)).
> 

Fixed in V7. 

> > +			amd_pstate_epp_init(policy->cpu);
> > +
> > +		mutex_unlock(&amd_pstate_limits_lock);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static void amd_pstate_verify_cpu_policy(struct amd_cpudata *cpudata,
> > +					   struct cpufreq_policy_data *policy)
> {
> > +	update_boost_state();
> > +	cpufreq_verify_within_cpu_limits(policy);
> > +}
> > +
> > +static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data
> > +*policy) {
> > +	amd_pstate_verify_cpu_policy(all_cpu_data[policy->cpu], policy);
> > +	pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy-
> >min);
> > +	return 0;
> > +}
> > +
> >  static struct cpufreq_driver amd_pstate_driver = {
> >  	.flags		= CPUFREQ_CONST_LOOPS |
> CPUFREQ_NEED_UPDATE_LIMITS,
> >  	.verify		= amd_pstate_verify,
> > @@ -616,8 +1224,20 @@ static struct cpufreq_driver amd_pstate_driver =
> {
> >  	.attr		= amd_pstate_attr,
> >  };
> >
> > +static struct cpufreq_driver amd_pstate_epp_driver = {
> > +	.flags		= CPUFREQ_CONST_LOOPS,
> > +	.verify		= amd_pstate_epp_verify_policy,
> > +	.setpolicy	= amd_pstate_epp_set_policy,
> > +	.init		= amd_pstate_epp_cpu_init,
> > +	.exit		= amd_pstate_epp_cpu_exit,
> > +	.update_limits	= amd_pstate_epp_update_limits,
> > +	.name		= "amd_pstate_epp",
> > +	.attr		= amd_pstate_epp_attr,
> > +};
> > +
> >  static int __init amd_pstate_init(void)  {
> > +	static struct amd_cpudata **cpudata;
> >  	int ret;
> >
> >  	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) @@ -644,7
> +1264,8 @@
> > static int __init amd_pstate_init(void)
> >  	/* capability check */
> >  	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> >  		pr_debug("AMD CPPC MSR based functionality is
> supported\n");
> > -		amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
> > +		if (!cppc_active)
> > +			default_pstate_driver->adjust_perf =
> amd_pstate_adjust_perf;
> >  	} else {
> >  		pr_debug("AMD CPPC shared memory based functionality is
> supported\n");
> >  		static_call_update(amd_pstate_enable, cppc_enable); @@ -
> 652,6
> > +1273,10 @@ static int __init amd_pstate_init(void)
> >  		static_call_update(amd_pstate_update_perf,
> cppc_update_perf);
> >  	}
> >
> > +	cpudata = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
> > +	if (!cpudata)
> > +		return -ENOMEM;
> > +	WRITE_ONCE(all_cpu_data, cpudata);
> >  	/* enable amd pstate feature */
> >  	ret = amd_pstate_enable(true);
> >  	if (ret) {
> > @@ -659,9 +1284,9 @@ static int __init amd_pstate_init(void)
> >  		return ret;
> >  	}
> >
> > -	ret = cpufreq_register_driver(&amd_pstate_driver);
> > +	ret = cpufreq_register_driver(default_pstate_driver);
> >  	if (ret)
> > -		pr_err("failed to register amd_pstate_driver with
> return %d\n",
> > +		pr_err("failed to register amd pstate driver with
> return %d\n",
> >  		       ret);
> >
> >  	return ret;
> > @@ -676,8 +1301,14 @@ static int __init amd_pstate_param(char *str)
> >  	if (!strcmp(str, "disable")) {
> >  		cppc_load = 0;
> >  		pr_info("driver is explicitly disabled\n");
> > -	} else if (!strcmp(str, "passive"))
> > +	} else if (!strcmp(str, "passive")) {
> >  		cppc_load = 1;
> > +		default_pstate_driver = &amd_pstate_driver;
> > +	} else if (!strcmp(str, "active")) {
> > +		cppc_active = 1;
> > +		cppc_load = 1;
> > +		default_pstate_driver = &amd_pstate_epp_driver;
> > +	}
> >
> >  	return 0;
> >  }
> > diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
> > index 1c4b8659f171..888af62040f1 100644
> > --- a/include/linux/amd-pstate.h
> > +++ b/include/linux/amd-pstate.h
> > @@ -25,6 +25,7 @@ struct amd_aperf_mperf {
> >  	u64 aperf;
> >  	u64 mperf;
> >  	u64 tsc;
> > +	u64 time;
> >  };
> >
> >  /**
> > @@ -47,6 +48,18 @@ struct amd_aperf_mperf {
> >   * @prev: Last Aperf/Mperf/tsc count value read from register
> >   * @freq: current cpu frequency value
> >   * @boost_supported: check whether the Processor or SBIOS supports
> > boost mode
> > + * @epp_powersave: Last saved CPPC energy performance preference
> > +				when policy switched to performance
> > + * @epp_policy: Last saved policy used to set energy-performance
> > +preference
> > + * @epp_cached: Cached CPPC energy-performance preference value
> > + * @policy: Cpufreq policy value
> > + * @sched_flags: Store scheduler flags for possible cross CPU update
> > + * @update_util_set: CPUFreq utility callback is set
> > + * @last_update: Time stamp of the last performance state update
> > + * @cppc_boost_min: Last CPPC boosted min performance state
> > + * @cppc_cap1_cached: Cached value of the last CPPC Capabilities MSR
> > + * @update_util: Cpufreq utility callback information
> > + * @sample: the stored performance sample
> >   *
> >   * The amd_cpudata is key private data for each CPU thread in AMD P-
> State, and
> >   * represents all the attributes and goals that AMD P-State requests at
> runtime.
> > @@ -72,6 +85,28 @@ struct amd_cpudata {
> >
> >  	u64	freq;
> >  	bool	boost_supported;
> > +
> > +	/* EPP feature related attributes*/
> > +	s16	epp_powersave;
> > +	s16	epp_policy;
> > +	s16	epp_cached;
> > +	u32	policy;
> > +	u32	sched_flags;
> > +	bool	update_util_set;
> > +	u64	last_update;
> > +	u64	last_io_update;
> > +	u32	cppc_boost_min;
> > +	u64	cppc_cap1_cached;
> > +	struct	update_util_data update_util;
> > +	struct	amd_aperf_mperf sample;
> > +};
> > +
> > +/**
> > + * struct amd_pstate_params - global parameters for the performance
> > +control
> > + * @ cppc_boost_disabled wheher the core performance boost disabled
> > +*/ struct amd_pstate_params {
> > +	bool cppc_boost_disabled;
> >  };
> >
> >  #endif /* _LINUX_AMD_PSTATE_H */
> > --
> > 2.34.1
> >
  
Yuan, Perry Dec. 8, 2022, 3:51 p.m. UTC | #5
[AMD Official Use Only - General]

Hi Mario. 

> -----Original Message-----
> From: Limonciello, Mario <Mario.Limonciello@amd.com>
> Sent: Saturday, December 3, 2022 1:36 AM
> To: Yuan, Perry <Perry.Yuan@amd.com>; rafael.j.wysocki@intel.com; Huang,
> Ray <Ray.Huang@amd.com>; viresh.kumar@linaro.org
> Cc: Sharma, Deepak <Deepak.Sharma@amd.com>; Fontenot, Nathan
> <Nathan.Fontenot@amd.com>; Deucher, Alexander
> <Alexander.Deucher@amd.com>; Huang, Shimmer
> <Shimmer.Huang@amd.com>; Du, Xiaojian <Xiaojian.Du@amd.com>; Meng,
> Li (Jassmine) <Li.Meng@amd.com>; Karny, Wyes <Wyes.Karny@amd.com>;
> linux-pm@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH v6 04/11] cpufreq: amd_pstate: implement Pstate EPP
> support for the AMD processors
> 
> On 12/2/2022 01:47, Perry Yuan wrote:
> > From: Perry Yuan <Perry.Yuan@amd.com>
> >
> > Add EPP driver support for AMD SoCs which support a dedicated MSR for
> > CPPC.  EPP is used by the DPM controller to configure the frequency
> > that a core operates at during short periods of activity.
> >
> > The SoC EPP targets are configured on a scale from 0 to 255 where 0
> > represents maximum performance and 255 represents maximum
> efficiency.
> >
> > The amd-pstate driver exports profile string names to userspace that
> > are tied to specific EPP values.
> >
> > The balance_performance string (0x80) provides the best balance for
> > efficiency versus power on most systems, but users can choose other
> > strings to meet their needs as well.
> >
> > $ cat
> >
> /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_
> p
> > references default performance balance_performance balance_power
> power
> >
> > $ cat
> >
> /sys/devices/system/cpu/cpufreq/policy0/energy_performance_preferenc
> e
> > balance_performance
> >
> > To enable the driver,it needs to add `amd_pstate=active` to kernel
> > command line and kernel will load the active mode epp driver
> >
> > Signed-off-by: Perry Yuan <Perry.Yuan@amd.com>
> > ---
> >   drivers/cpufreq/amd-pstate.c | 643
> ++++++++++++++++++++++++++++++++++-
> >   include/linux/amd-pstate.h   |  35 ++
> >   2 files changed, 672 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/cpufreq/amd-pstate.c
> > b/drivers/cpufreq/amd-pstate.c index 204e39006dda..5989d4d25d0f
> 100644
> > --- a/drivers/cpufreq/amd-pstate.c
> > +++ b/drivers/cpufreq/amd-pstate.c
> > @@ -37,9 +37,12 @@
> >   #include <linux/uaccess.h>
> >   #include <linux/static_call.h>
> >   #include <linux/amd-pstate.h>
> > +#include <linux/cpufreq_common.h>
> >
> > +#ifdef CONFIG_ACPI
> >   #include <acpi/processor.h>
> >   #include <acpi/cppc_acpi.h>
> > +#endif
> >
> >   #include <asm/msr.h>
> >   #include <asm/processor.h>
> > @@ -59,9 +62,130 @@
> >    * we disable it by default to go acpi-cpufreq on these processors and add
> a
> >    * module parameter to be able to enable it manually for debugging.
> >    */
> > -static struct cpufreq_driver amd_pstate_driver;
> > +static bool cppc_active;
> >   static int cppc_load __initdata;
> >
> > +static struct cpufreq_driver *default_pstate_driver; static struct
> > +amd_cpudata **all_cpu_data; static struct amd_pstate_params
> > +global_params;
> > +
> > +static DEFINE_MUTEX(amd_pstate_limits_lock);
> > +static DEFINE_MUTEX(amd_pstate_driver_lock);
> > +
> > +static bool cppc_boost __read_mostly; struct kobject
> > +*amd_pstate_kobj;
> > +
> > +#ifdef CONFIG_ACPI_CPPC_LIB
> Rather than making this conditional I would think that amd-pstate should
> probably depend on or select CONFIG_ACPI_CPPC_LIB, no?

Yes, the Kconfig select CONFIG_ACPI_CPPC_LIB , I removed the checking in V7. 
Thanks for your review.

Perry.  

> 
> > +static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64
> > +cppc_req_cached) {
> > +	s16 epp;
> > +	struct cppc_perf_caps perf_caps;
> > +	int ret;
> > +
> > +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > +		if (!cppc_req_cached) {
> > +			epp = rdmsrl_on_cpu(cpudata->cpu,
> MSR_AMD_CPPC_REQ,
> > +					&cppc_req_cached);
> > +			if (epp)
> > +				return epp;
> > +		}
> > +		epp = (cppc_req_cached >> 24) & 0xFF;
> > +	} else {
> > +		ret = cppc_get_epp_caps(cpudata->cpu, &perf_caps);
> > +		if (ret < 0) {
> > +			pr_debug("Could not retrieve energy perf value
> (%d)\n", ret);
> > +			return -EIO;
> > +		}
> > +		epp = (s16) perf_caps.energy_perf;
> > +	}
> > +
> > +	return epp;
> > +}
> > +#endif
> > +
> > +static int amd_pstate_get_energy_pref_index(struct amd_cpudata
> > +*cpudata) {
> > +	s16 epp;
> > +	int index = -EINVAL;
> > +
> > +	epp = amd_pstate_get_epp(cpudata, 0);
> > +	if (epp < 0)
> > +		return epp;
> > +
> > +	switch (epp) {
> > +	case HWP_EPP_PERFORMANCE:
> > +		index = EPP_INDEX_PERFORMANCE;
> > +		break;
> > +	case HWP_EPP_BALANCE_PERFORMANCE:
> > +		index = EPP_INDEX_BALANCE_PERFORMANCE;
> > +		break;
> > +	case HWP_EPP_BALANCE_POWERSAVE:
> > +		index = EPP_INDEX_BALANCE_POWERSAVE;
> > +		break;
> > +	case HWP_EPP_POWERSAVE:
> > +		index = EPP_INDEX_POWERSAVE;
> > +		break;
> > +	default:
> > +			break;
> > +	}
> > +
> > +	return index;
> > +}
> > +
> > +#ifdef CONFIG_ACPI_CPPC_LIB
> > +static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) {
> > +	int ret;
> > +	struct cppc_perf_ctrls perf_ctrls;
> > +
> > +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > +		u64 value = READ_ONCE(cpudata->cppc_req_cached);
> > +
> > +		value &= ~GENMASK_ULL(31, 24);
> > +		value |= (u64)epp << 24;
> > +		WRITE_ONCE(cpudata->cppc_req_cached, value);
> > +
> > +		ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
> value);
> > +		if (!ret)
> > +			cpudata->epp_cached = epp;
> > +	} else {
> > +		perf_ctrls.energy_perf = epp;
> > +		ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
> > +		if (ret) {
> > +			pr_debug("failed to set energy perf value (%d)\n",
> ret);
> > +			return ret;
> > +		}
> > +		cpudata->epp_cached = epp;
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +static int amd_pstate_set_energy_pref_index(struct amd_cpudata
> *cpudata,
> > +		int pref_index)
> > +{
> > +	int epp = -EINVAL;
> > +	int ret;
> > +
> > +	if (!pref_index) {
> > +		pr_debug("EPP pref_index is invalid\n");
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (epp == -EINVAL)
> > +		epp = epp_values[pref_index];
> > +
> > +	if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
> {
> > +		pr_debug("EPP cannot be set under performance policy\n");
> > +		return -EBUSY;
> > +	}
> > +
> > +	ret = amd_pstate_set_epp(cpudata, epp);
> > +
> > +	return ret;
> > +}
> > +#endif
> > +
> >   static inline int pstate_enable(bool enable)
> >   {
> >   	return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable); @@ -70,11
> +194,21
> > @@ static inline int pstate_enable(bool enable)
> >   static int cppc_enable(bool enable)
> >   {
> >   	int cpu, ret = 0;
> > +	struct cppc_perf_ctrls perf_ctrls;
> >
> >   	for_each_present_cpu(cpu) {
> >   		ret = cppc_set_enable(cpu, enable);
> >   		if (ret)
> >   			return ret;
> > +
> > +		/* Enable autonomous mode for EPP */
> > +		if (!cppc_active) {
> > +			/* Set desired perf as zero to allow EPP firmware
> control */
> > +			perf_ctrls.desired_perf = 0;
> > +			ret = cppc_set_perf(cpu, &perf_ctrls);
> > +			if (ret)
> > +				return ret;
> > +		}
> >   	}
> >
> >   	return ret;
> > @@ -417,7 +551,7 @@ static void amd_pstate_boost_init(struct
> amd_cpudata *cpudata)
> >   		return;
> >
> >   	cpudata->boost_supported = true;
> > -	amd_pstate_driver.boost_enabled = true;
> > +	default_pstate_driver->boost_enabled = true;
> >   }
> >
> >   static void amd_perf_ctl_reset(unsigned int cpu) @@ -591,10 +725,61
> > @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy
> *policy,
> >   	return sprintf(&buf[0], "%u\n", perf);
> >   }
> >
> > +static ssize_t show_energy_performance_available_preferences(
> > +				struct cpufreq_policy *policy, char *buf) {
> > +	int i = 0;
> > +	int ret = 0;
> > +
> > +	while (energy_perf_strings[i] != NULL)
> > +		ret += sysfs_emit(buf, "%s", energy_perf_strings[i++]);
> > +
> > +	ret += sysfs_emit(buf, "\n");
> > +
> > +	return ret;
> > +}
> > +
> > +static ssize_t store_energy_performance_preference(
> > +		struct cpufreq_policy *policy, const char *buf, size_t count) {
> > +	struct amd_cpudata *cpudata = policy->driver_data;
> > +	char str_preference[21];
> > +	ssize_t ret;
> > +
> > +	ret = sscanf(buf, "%20s", str_preference);
> > +	if (ret != 1)
> > +		return -EINVAL;
> > +
> > +	ret = match_string(energy_perf_strings, -1, str_preference);
> > +	if (ret < 0)
> > +		return -EINVAL;
> > +
> > +	mutex_lock(&amd_pstate_limits_lock);
> > +	ret = amd_pstate_set_energy_pref_index(cpudata, ret);
> > +	mutex_unlock(&amd_pstate_limits_lock);
> > +
> > +	return ret ?: count;
> > +}
> > +
> > +static ssize_t show_energy_performance_preference(
> > +				struct cpufreq_policy *policy, char *buf) {
> > +	struct amd_cpudata *cpudata = policy->driver_data;
> > +	int preference;
> > +
> > +	preference = amd_pstate_get_energy_pref_index(cpudata);
> > +	if (preference < 0)
> > +		return preference;
> > +
> > +	return  sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
> 
> You've got an extra space after "return".

Fixed in V7. 

> 
> > +}
> > +
> >   cpufreq_freq_attr_ro(amd_pstate_max_freq);
> >   cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
> >
> >   cpufreq_freq_attr_ro(amd_pstate_highest_perf);
> > +cpufreq_freq_attr_rw(energy_performance_preference);
> > +cpufreq_freq_attr_ro(energy_performance_available_preferences);
> >
> >   static struct freq_attr *amd_pstate_attr[] = {
> >   	&amd_pstate_max_freq,
> > @@ -603,6 +788,429 @@ static struct freq_attr *amd_pstate_attr[] = {
> >   	NULL,
> >   };
> >
> > +static struct freq_attr *amd_pstate_epp_attr[] = {
> > +	&amd_pstate_max_freq,
> > +	&amd_pstate_lowest_nonlinear_freq,
> > +	&amd_pstate_highest_perf,
> > +	&energy_performance_preference,
> > +	&energy_performance_available_preferences,
> > +	NULL,
> > +};
> > +
> > +static inline void update_boost_state(void) {
> > +	u64 misc_en;
> > +	struct amd_cpudata *cpudata;
> > +
> > +	cpudata = all_cpu_data[0];
> > +	rdmsrl(MSR_K7_HWCR, misc_en);
> > +	global_params.cppc_boost_disabled = misc_en & BIT_ULL(25); }
> > +
> > +static bool amd_pstate_acpi_pm_profile_server(void)
> > +{
> > +	if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
> > +	    acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)
> > +		return true;
> > +
> > +	return false;
> > +}
> 
> Right now this is only used to control boost, but I wonder if this should instad
> also control the default EPP policy as all?

It should be able to control the EPP profile as well if needed.
Currently the default EPP profile is balance_performance for all CPUs.
If we want to set different profile for server CPU, there some are codes can be added here. 

> 
> > +
> > +static int amd_pstate_init_cpu(unsigned int cpunum) {
> > +	struct amd_cpudata *cpudata;
> > +
> > +	cpudata = all_cpu_data[cpunum];
> > +	if (!cpudata) {
> > +		cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
> > +		if (!cpudata)
> > +			return -ENOMEM;
> > +		WRITE_ONCE(all_cpu_data[cpunum], cpudata);
> > +
> > +		cpudata->cpu = cpunum;
> > +
> > +		if (cppc_active) {
> > +			if (amd_pstate_acpi_pm_profile_server())
> > +				cppc_boost = true;
> > +		}
> > +
> > +	}
> > +	cpudata->epp_powersave = -EINVAL;
> > +	cpudata->epp_policy = 0; > +	pr_debug("controlling: cpu %d\n",
> cpunum);
> > +	return 0;
> > +}
> > +
> > +static int __amd_pstate_cpu_init(struct cpufreq_policy *policy) {
> > +	int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
> > +	struct amd_cpudata *cpudata;
> > +	struct device *dev;
> > +	int rc;
> > +	u64 value;
> > +
> > +	rc = amd_pstate_init_cpu(policy->cpu);
> > +	if (rc)
> > +		return rc;
> > +
> > +	cpudata = all_cpu_data[policy->cpu];
> > +
> > +	dev = get_cpu_device(policy->cpu);
> > +	if (!dev)
> > +		goto free_cpudata1;
> > +
> > +	rc = amd_pstate_init_perf(cpudata);
> > +	if (rc)
> > +		goto free_cpudata1;
> > +
> > +	min_freq = amd_get_min_freq(cpudata);
> > +	max_freq = amd_get_max_freq(cpudata);
> > +	nominal_freq = amd_get_nominal_freq(cpudata);
> > +	lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
> > +	if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
> > +		dev_err(dev, "min_freq(%d) or max_freq(%d) value is
> incorrect\n",
> > +				min_freq, max_freq);
> > +		ret = -EINVAL;
> > +		goto free_cpudata1;
> > +	}
> > +
> > +	policy->min = min_freq;
> > +	policy->max = max_freq;
> > +
> > +	policy->cpuinfo.min_freq = min_freq;
> > +	policy->cpuinfo.max_freq = max_freq;
> > +	/* It will be updated by governor */
> > +	policy->cur = policy->cpuinfo.min_freq;
> > +
> > +	/* Initial processor data capability frequencies */
> > +	cpudata->max_freq = max_freq;
> > +	cpudata->min_freq = min_freq;
> > +	cpudata->nominal_freq = nominal_freq;
> > +	cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
> > +
> > +	policy->driver_data = cpudata;
> > +
> > +	update_boost_state();
> > +	cpudata->epp_cached = amd_pstate_get_epp(cpudata, value);
> > +
> > +	policy->min = policy->cpuinfo.min_freq;
> > +	policy->max = policy->cpuinfo.max_freq;
> > +
> > +	if (boot_cpu_has(X86_FEATURE_CPPC))
> > +		policy->fast_switch_possible = true;
> > +
> > +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > +		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
> &value);
> > +		if (ret)
> > +			return ret;
> > +		WRITE_ONCE(cpudata->cppc_req_cached, value);
> > +
> > +		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
> &value);
> > +		if (ret)
> > +			return ret;
> > +		WRITE_ONCE(cpudata->cppc_cap1_cached, value);
> > +	}
> > +	amd_pstate_boost_init(cpudata);
> > +
> > +	return 0;
> > +
> > +free_cpudata1:
> > +	kfree(cpudata);
> > +	return ret;
> > +}
> > +
> > +static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) {
> > +	int ret;
> > +
> > +	ret = __amd_pstate_cpu_init(policy);
> > +	if (ret)
> > +		return ret;
> > +	/*
> > +	 * Set the policy to powersave to provide a valid fallback value in case
> > +	 * the default cpufreq governor is neither powersave nor
> performance.
> > +	 */
> > +	policy->policy = CPUFREQ_POLICY_POWERSAVE;
> > +
> > +	return 0;
> > +}
> > +
> > +static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) {
> > +	pr_debug("CPU %d exiting\n", policy->cpu);
> > +	policy->fast_switch_possible = false;
> > +	return 0;
> > +}
> > +
> > +static void amd_pstate_update_max_freq(unsigned int cpu) {
> > +	struct cpufreq_policy *policy = policy = cpufreq_cpu_get(cpu);
> > +
> > +	if (!policy)
> > +		return;
> > +
> > +	refresh_frequency_limits(policy);
> > +	cpufreq_cpu_put(policy);
> > +}
> > +
> > +static void amd_pstate_epp_update_limits(unsigned int cpu) {
> > +	mutex_lock(&amd_pstate_driver_lock);
> > +	update_boost_state();
> > +	if (global_params.cppc_boost_disabled) {
> > +		for_each_possible_cpu(cpu)
> > +			amd_pstate_update_max_freq(cpu);
> > +	} else {
> > +		cpufreq_update_policy(cpu);
> > +	}
> > +	mutex_unlock(&amd_pstate_driver_lock);
> > +}
> > +
> > +static int cppc_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
> > +
> > +static inline void amd_pstate_boost_up(struct amd_cpudata *cpudata) {
> > +	u64 hwp_req = READ_ONCE(cpudata->cppc_req_cached);
> > +	u64 hwp_cap = READ_ONCE(cpudata->cppc_cap1_cached);
> > +	u32 max_limit = (hwp_req & 0xff);
> > +	u32 min_limit = (hwp_req & 0xff00) >> 8;
> > +	u32 boost_level1;
> > +
> > +	/* If max and min are equal or already at max, nothing to boost */
> > +	if (max_limit == min_limit)
> > +		return;
> > +
> > +	/* Set boost max and min to initial value */
> > +	if (!cpudata->cppc_boost_min)
> > +		cpudata->cppc_boost_min = min_limit;
> > +
> > +	boost_level1 = ((AMD_CPPC_NOMINAL_PERF(hwp_cap) +
> min_limit) >> 1);
> > +
> > +	if (cpudata->cppc_boost_min < boost_level1)
> > +		cpudata->cppc_boost_min = boost_level1;
> > +	else if (cpudata->cppc_boost_min <
> AMD_CPPC_NOMINAL_PERF(hwp_cap))
> > +		cpudata->cppc_boost_min =
> AMD_CPPC_NOMINAL_PERF(hwp_cap);
> > +	else if (cpudata->cppc_boost_min ==
> AMD_CPPC_NOMINAL_PERF(hwp_cap))
> > +		cpudata->cppc_boost_min = max_limit;
> > +	else
> > +		return;
> > +
> > +	hwp_req &= ~AMD_CPPC_MIN_PERF(~0L);
> > +	hwp_req |= AMD_CPPC_MIN_PERF(cpudata->cppc_boost_min);
> > +	wrmsrl(MSR_AMD_CPPC_REQ, hwp_req);
> > +	cpudata->last_update = cpudata->sample.time; }
> > +
> > +static inline void amd_pstate_boost_down(struct amd_cpudata *cpudata)
> > +{
> > +	bool expired;
> > +
> > +	if (cpudata->cppc_boost_min) {
> > +		expired = time_after64(cpudata->sample.time, cpudata-
> >last_update +
> > +					cppc_boost_hold_time_ns);
> > +
> > +		if (expired) {
> > +			wrmsrl(MSR_AMD_CPPC_REQ, cpudata-
> >cppc_req_cached);
> > +			cpudata->cppc_boost_min = 0;
> > +		}
> > +	}
> > +
> > +	cpudata->last_update = cpudata->sample.time; }
> > +
> > +static inline void amd_pstate_boost_update_util(struct amd_cpudata
> *cpudata,
> > +						      u64 time)
> > +{
> > +	cpudata->sample.time = time;
> > +	if (smp_processor_id() != cpudata->cpu)
> > +		return;
> > +
> > +	if (cpudata->sched_flags & SCHED_CPUFREQ_IOWAIT) {
> > +		bool do_io = false;
> > +
> > +		cpudata->sched_flags = 0;
> > +		/*
> > +		 * Set iowait_boost flag and update time. Since IO WAIT flag
> > +		 * is set all the time, we can't just conclude that there is
> > +		 * some IO bound activity is scheduled on this CPU with just
> > +		 * one occurrence. If we receive at least two in two
> > +		 * consecutive ticks, then we treat as boost candidate.
> > +		 * This is leveraged from Intel Pstate driver.
> > +		 */
> > +		if (time_before64(time, cpudata->last_io_update + 2 *
> TICK_NSEC))
> > +			do_io = true;
> > +
> > +		cpudata->last_io_update = time;
> > +
> > +		if (do_io)
> > +			amd_pstate_boost_up(cpudata);
> > +
> > +	} else {
> > +		amd_pstate_boost_down(cpudata);
> > +	}
> > +}
> > +
> > +static inline void amd_pstate_cppc_update_hook(struct update_util_data
> *data,
> > +						u64 time, unsigned int flags)
> > +{
> > +	struct amd_cpudata *cpudata = container_of(data,
> > +				struct amd_cpudata, update_util);
> > +
> > +	cpudata->sched_flags |= flags;
> > +
> > +	if (smp_processor_id() == cpudata->cpu)
> > +		amd_pstate_boost_update_util(cpudata, time); }
> > +
> > +static void amd_pstate_clear_update_util_hook(unsigned int cpu) {
> > +	struct amd_cpudata *cpudata = all_cpu_data[cpu];
> > +
> > +	if (!cpudata->update_util_set)
> > +		return;
> > +
> > +	cpufreq_remove_update_util_hook(cpu);
> > +	cpudata->update_util_set = false;
> > +	synchronize_rcu();
> > +}
> > +
> > +static void amd_pstate_set_update_util_hook(unsigned int cpu_num) {
> > +	struct amd_cpudata *cpudata = all_cpu_data[cpu_num];
> > +
> > +	if (!cppc_boost) {
> > +		if (cpudata->update_util_set)
> > +			amd_pstate_clear_update_util_hook(cpudata->cpu);
> > +		return;
> > +	}
> > +
> > +	if (cpudata->update_util_set)
> > +		return;
> > +
> > +	cpudata->sample.time = 0;
> > +	cpufreq_add_update_util_hook(cpu_num, &cpudata->update_util,
> > +
> 	amd_pstate_cppc_update_hook);
> > +	cpudata->update_util_set = true;
> > +}
> > +
> > +static void amd_pstate_epp_init(unsigned int cpu) {
> > +	struct amd_cpudata *cpudata = all_cpu_data[cpu];
> > +	u32 max_perf, min_perf;
> > +	u64 value;
> > +	s16 epp;
> > +	int ret;
> > +
> > +	max_perf = READ_ONCE(cpudata->highest_perf);
> > +	min_perf = READ_ONCE(cpudata->lowest_perf);
> > +
> > +	value = READ_ONCE(cpudata->cppc_req_cached);
> > +
> > +	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
> > +		min_perf = max_perf;
> > +
> > +	/* Initial min/max values for CPPC Performance Controls Register */
> > +	value &= ~AMD_CPPC_MIN_PERF(~0L);
> > +	value |= AMD_CPPC_MIN_PERF(min_perf);
> > +
> > +	value &= ~AMD_CPPC_MAX_PERF(~0L);
> > +	value |= AMD_CPPC_MAX_PERF(max_perf);
> > +
> > +	/* CPPC EPP feature require to set zero to the desire perf bit */
> > +	value &= ~AMD_CPPC_DES_PERF(~0L);
> > +	value |= AMD_CPPC_DES_PERF(0);
> > +
> > +	if (cpudata->epp_policy == cpudata->policy)
> > +		goto skip_epp;
> > +
> > +	cpudata->epp_policy = cpudata->policy;
> > +
> > +	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> > +		epp = amd_pstate_get_epp(cpudata, value);
> > +		cpudata->epp_powersave = epp;
> > +		if (epp < 0)
> > +			goto skip_epp;
> > +		/* force the epp value to be zero for performance policy */
> > +		epp = 0;
> > +	} else {
> > +		if (cpudata->epp_powersave < 0)
> > +			goto skip_epp;
> > +		/* Get BIOS pre-defined epp value */
> > +		epp = amd_pstate_get_epp(cpudata, value);
> > +		if (epp)
> > +			goto skip_epp;
> > +		epp = cpudata->epp_powersave;
> > +	}
> > +	/* Set initial EPP value */
> > +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > +		value &= ~GENMASK_ULL(31, 24);
> > +		value |= (u64)epp << 24;
> > +	}
> > +
> > +skip_epp:
> > +	WRITE_ONCE(cpudata->cppc_req_cached, value);
> > +	ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
> > +	if (!ret)
> > +		cpudata->epp_cached = epp;
> > +}
> > +
> > +static void amd_pstate_set_max_limits(struct amd_cpudata *cpudata) {
> > +	u64 hwp_cap = READ_ONCE(cpudata->cppc_cap1_cached);
> > +	u64 hwp_req = READ_ONCE(cpudata->cppc_req_cached);
> > +	u32 max_limit = (hwp_cap >> 24) & 0xff;
> > +
> > +	hwp_req &= ~AMD_CPPC_MIN_PERF(~0L);
> > +	hwp_req |= AMD_CPPC_MIN_PERF(max_limit);
> > +	wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, hwp_req); }
> > +
> > +static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) {
> > +	struct amd_cpudata *cpudata;
> > +
> > +	if (!policy->cpuinfo.max_freq)
> > +		return -ENODEV;
> > +
> > +	pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
> > +				policy->cpuinfo.max_freq, policy->max);
> > +
> > +	cpudata = all_cpu_data[policy->cpu];
> > +	cpudata->policy = policy->policy;
> > +
> > +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> > +		mutex_lock(&amd_pstate_limits_lock);
> > +
> > +		if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> > +			amd_pstate_clear_update_util_hook(policy->cpu);
> > +			amd_pstate_set_max_limits(cpudata);
> > +		} else {
> > +			amd_pstate_set_update_util_hook(policy->cpu);
> > +		}
> > +
> > +		if (boot_cpu_has(X86_FEATURE_CPPC))
> > +			amd_pstate_epp_init(policy->cpu);
> > +
> > +		mutex_unlock(&amd_pstate_limits_lock);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static void amd_pstate_verify_cpu_policy(struct amd_cpudata *cpudata,
> > +					   struct cpufreq_policy_data *policy)
> {
> > +	update_boost_state();
> > +	cpufreq_verify_within_cpu_limits(policy);
> > +}
> > +
> > +static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data
> > +*policy) {
> > +	amd_pstate_verify_cpu_policy(all_cpu_data[policy->cpu], policy);
> > +	pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy-
> >min);
> > +	return 0;
> > +}
> > +
> >   static struct cpufreq_driver amd_pstate_driver = {
> >   	.flags		= CPUFREQ_CONST_LOOPS |
> CPUFREQ_NEED_UPDATE_LIMITS,
> >   	.verify		= amd_pstate_verify,
> > @@ -616,8 +1224,20 @@ static struct cpufreq_driver amd_pstate_driver =
> {
> >   	.attr		= amd_pstate_attr,
> >   };
> >
> > +static struct cpufreq_driver amd_pstate_epp_driver = {
> > +	.flags		= CPUFREQ_CONST_LOOPS,
> > +	.verify		= amd_pstate_epp_verify_policy,
> > +	.setpolicy	= amd_pstate_epp_set_policy,
> > +	.init		= amd_pstate_epp_cpu_init,
> > +	.exit		= amd_pstate_epp_cpu_exit,
> > +	.update_limits	= amd_pstate_epp_update_limits,
> > +	.name		= "amd_pstate_epp",
> > +	.attr		= amd_pstate_epp_attr,
> > +};
> > +
> >   static int __init amd_pstate_init(void)
> >   {
> > +	static struct amd_cpudata **cpudata;
> >   	int ret;
> >
> >   	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) @@ -644,7
> +1264,8
> > @@ static int __init amd_pstate_init(void)
> >   	/* capability check */
> >   	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> >   		pr_debug("AMD CPPC MSR based functionality is
> supported\n");
> > -		amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
> > +		if (!cppc_active)
> > +			default_pstate_driver->adjust_perf =
> amd_pstate_adjust_perf;
> >   	} else {
> >   		pr_debug("AMD CPPC shared memory based functionality is
> supported\n");
> >   		static_call_update(amd_pstate_enable, cppc_enable); @@ -
> 652,6
> > +1273,10 @@ static int __init amd_pstate_init(void)
> >   		static_call_update(amd_pstate_update_perf,
> cppc_update_perf);
> >   	}
> >
> > +	cpudata = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
> > +	if (!cpudata)
> > +		return -ENOMEM;
> > +	WRITE_ONCE(all_cpu_data, cpudata);
> >   	/* enable amd pstate feature */
> >   	ret = amd_pstate_enable(true);
> >   	if (ret) {
> > @@ -659,9 +1284,9 @@ static int __init amd_pstate_init(void)
> >   		return ret;
> >   	}
> >
> > -	ret = cpufreq_register_driver(&amd_pstate_driver);
> > +	ret = cpufreq_register_driver(default_pstate_driver);
> >   	if (ret)
> > -		pr_err("failed to register amd_pstate_driver with
> return %d\n",
> > +		pr_err("failed to register amd pstate driver with
> return %d\n",
> >   		       ret);
> >
> >   	return ret;
> > @@ -676,8 +1301,14 @@ static int __init amd_pstate_param(char *str)
> >   	if (!strcmp(str, "disable")) {
> >   		cppc_load = 0;
> >   		pr_info("driver is explicitly disabled\n");
> > -	} else if (!strcmp(str, "passive"))
> > +	} else if (!strcmp(str, "passive")) {
> >   		cppc_load = 1;
> > +		default_pstate_driver = &amd_pstate_driver;
> > +	} else if (!strcmp(str, "active")) {
> > +		cppc_active = 1;
> > +		cppc_load = 1;
> > +		default_pstate_driver = &amd_pstate_epp_driver;
> > +	}
> >
> >   	return 0;
> >   }
> > diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
> > index 1c4b8659f171..888af62040f1 100644
> > --- a/include/linux/amd-pstate.h
> > +++ b/include/linux/amd-pstate.h
> > @@ -25,6 +25,7 @@ struct amd_aperf_mperf {
> >   	u64 aperf;
> >   	u64 mperf;
> >   	u64 tsc;
> > +	u64 time;
> >   };
> >
> >   /**
> > @@ -47,6 +48,18 @@ struct amd_aperf_mperf {
> >    * @prev: Last Aperf/Mperf/tsc count value read from register
> >    * @freq: current cpu frequency value
> >    * @boost_supported: check whether the Processor or SBIOS supports
> > boost mode
> > + * @epp_powersave: Last saved CPPC energy performance preference
> > +				when policy switched to performance
> > + * @epp_policy: Last saved policy used to set energy-performance
> > +preference
> > + * @epp_cached: Cached CPPC energy-performance preference value
> > + * @policy: Cpufreq policy value
> > + * @sched_flags: Store scheduler flags for possible cross CPU update
> > + * @update_util_set: CPUFreq utility callback is set
> > + * @last_update: Time stamp of the last performance state update
> > + * @cppc_boost_min: Last CPPC boosted min performance state
> > + * @cppc_cap1_cached: Cached value of the last CPPC Capabilities MSR
> > + * @update_util: Cpufreq utility callback information
> > + * @sample: the stored performance sample
> >    *
> >    * The amd_cpudata is key private data for each CPU thread in AMD P-
> State, and
> >    * represents all the attributes and goals that AMD P-State requests at
> runtime.
> > @@ -72,6 +85,28 @@ struct amd_cpudata {
> >
> >   	u64	freq;
> >   	bool	boost_supported;
> > +
> > +	/* EPP feature related attributes*/
> > +	s16	epp_powersave;
> > +	s16	epp_policy;
> > +	s16	epp_cached;
> > +	u32	policy;
> > +	u32	sched_flags;
> > +	bool	update_util_set;
> > +	u64	last_update;
> > +	u64	last_io_update;
> > +	u32	cppc_boost_min;
> > +	u64	cppc_cap1_cached;
> > +	struct	update_util_data update_util;
> > +	struct	amd_aperf_mperf sample;
> > +};
> > +
> > +/**
> > + * struct amd_pstate_params - global parameters for the performance
> > +control
> > + * @ cppc_boost_disabled wheher the core performance boost disabled
> > +*/ struct amd_pstate_params {
> > +	bool cppc_boost_disabled;
> >   };
> >
> >   #endif /* _LINUX_AMD_PSTATE_H */
  

Patch

diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 204e39006dda..5989d4d25d0f 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -37,9 +37,12 @@ 
 #include <linux/uaccess.h>
 #include <linux/static_call.h>
 #include <linux/amd-pstate.h>
+#include <linux/cpufreq_common.h>
 
+#ifdef CONFIG_ACPI
 #include <acpi/processor.h>
 #include <acpi/cppc_acpi.h>
+#endif
 
 #include <asm/msr.h>
 #include <asm/processor.h>
@@ -59,9 +62,130 @@ 
  * we disable it by default to go acpi-cpufreq on these processors and add a
  * module parameter to be able to enable it manually for debugging.
  */
-static struct cpufreq_driver amd_pstate_driver;
+static bool cppc_active;
 static int cppc_load __initdata;
 
+static struct cpufreq_driver *default_pstate_driver;
+static struct amd_cpudata **all_cpu_data;
+static struct amd_pstate_params global_params;
+
+static DEFINE_MUTEX(amd_pstate_limits_lock);
+static DEFINE_MUTEX(amd_pstate_driver_lock);
+
+static bool cppc_boost __read_mostly;
+struct kobject *amd_pstate_kobj;
+
+#ifdef CONFIG_ACPI_CPPC_LIB
+static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
+{
+	s16 epp;
+	struct cppc_perf_caps perf_caps;
+	int ret;
+
+	if (boot_cpu_has(X86_FEATURE_CPPC)) {
+		if (!cppc_req_cached) {
+			epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
+					&cppc_req_cached);
+			if (epp)
+				return epp;
+		}
+		epp = (cppc_req_cached >> 24) & 0xFF;
+	} else {
+		ret = cppc_get_epp_caps(cpudata->cpu, &perf_caps);
+		if (ret < 0) {
+			pr_debug("Could not retrieve energy perf value (%d)\n", ret);
+			return -EIO;
+		}
+		epp = (s16) perf_caps.energy_perf;
+	}
+
+	return epp;
+}
+#endif
+
+static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
+{
+	s16 epp;
+	int index = -EINVAL;
+
+	epp = amd_pstate_get_epp(cpudata, 0);
+	if (epp < 0)
+		return epp;
+
+	switch (epp) {
+	case HWP_EPP_PERFORMANCE:
+		index = EPP_INDEX_PERFORMANCE;
+		break;
+	case HWP_EPP_BALANCE_PERFORMANCE:
+		index = EPP_INDEX_BALANCE_PERFORMANCE;
+		break;
+	case HWP_EPP_BALANCE_POWERSAVE:
+		index = EPP_INDEX_BALANCE_POWERSAVE;
+		break;
+	case HWP_EPP_POWERSAVE:
+		index = EPP_INDEX_POWERSAVE;
+		break;
+	default:
+			break;
+	}
+
+	return index;
+}
+
+#ifdef CONFIG_ACPI_CPPC_LIB
+static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
+{
+	int ret;
+	struct cppc_perf_ctrls perf_ctrls;
+
+	if (boot_cpu_has(X86_FEATURE_CPPC)) {
+		u64 value = READ_ONCE(cpudata->cppc_req_cached);
+
+		value &= ~GENMASK_ULL(31, 24);
+		value |= (u64)epp << 24;
+		WRITE_ONCE(cpudata->cppc_req_cached, value);
+
+		ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
+		if (!ret)
+			cpudata->epp_cached = epp;
+	} else {
+		perf_ctrls.energy_perf = epp;
+		ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
+		if (ret) {
+			pr_debug("failed to set energy perf value (%d)\n", ret);
+			return ret;
+		}
+		cpudata->epp_cached = epp;
+	}
+
+	return ret;
+}
+
+static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
+		int pref_index)
+{
+	int epp = -EINVAL;
+	int ret;
+
+	if (!pref_index) {
+		pr_debug("EPP pref_index is invalid\n");
+		return -EINVAL;
+	}
+
+	if (epp == -EINVAL)
+		epp = epp_values[pref_index];
+
+	if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
+		pr_debug("EPP cannot be set under performance policy\n");
+		return -EBUSY;
+	}
+
+	ret = amd_pstate_set_epp(cpudata, epp);
+
+	return ret;
+}
+#endif
+
 static inline int pstate_enable(bool enable)
 {
 	return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable);
@@ -70,11 +194,21 @@  static inline int pstate_enable(bool enable)
 static int cppc_enable(bool enable)
 {
 	int cpu, ret = 0;
+	struct cppc_perf_ctrls perf_ctrls;
 
 	for_each_present_cpu(cpu) {
 		ret = cppc_set_enable(cpu, enable);
 		if (ret)
 			return ret;
+
+		/* Enable autonomous mode for EPP */
+		if (!cppc_active) {
+			/* Set desired perf as zero to allow EPP firmware control */
+			perf_ctrls.desired_perf = 0;
+			ret = cppc_set_perf(cpu, &perf_ctrls);
+			if (ret)
+				return ret;
+		}
 	}
 
 	return ret;
@@ -417,7 +551,7 @@  static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
 		return;
 
 	cpudata->boost_supported = true;
-	amd_pstate_driver.boost_enabled = true;
+	default_pstate_driver->boost_enabled = true;
 }
 
 static void amd_perf_ctl_reset(unsigned int cpu)
@@ -591,10 +725,61 @@  static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
 	return sprintf(&buf[0], "%u\n", perf);
 }
 
+static ssize_t show_energy_performance_available_preferences(
+				struct cpufreq_policy *policy, char *buf)
+{
+	int i = 0;
+	int ret = 0;
+
+	while (energy_perf_strings[i] != NULL)
+		ret += sysfs_emit(buf, "%s", energy_perf_strings[i++]);
+
+	ret += sysfs_emit(buf, "\n");
+
+	return ret;
+}
+
+static ssize_t store_energy_performance_preference(
+		struct cpufreq_policy *policy, const char *buf, size_t count)
+{
+	struct amd_cpudata *cpudata = policy->driver_data;
+	char str_preference[21];
+	ssize_t ret;
+
+	ret = sscanf(buf, "%20s", str_preference);
+	if (ret != 1)
+		return -EINVAL;
+
+	ret = match_string(energy_perf_strings, -1, str_preference);
+	if (ret < 0)
+		return -EINVAL;
+
+	mutex_lock(&amd_pstate_limits_lock);
+	ret = amd_pstate_set_energy_pref_index(cpudata, ret);
+	mutex_unlock(&amd_pstate_limits_lock);
+
+	return ret ?: count;
+}
+
+static ssize_t show_energy_performance_preference(
+				struct cpufreq_policy *policy, char *buf)
+{
+	struct amd_cpudata *cpudata = policy->driver_data;
+	int preference;
+
+	preference = amd_pstate_get_energy_pref_index(cpudata);
+	if (preference < 0)
+		return preference;
+
+	return  sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
+}
+
 cpufreq_freq_attr_ro(amd_pstate_max_freq);
 cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
 
 cpufreq_freq_attr_ro(amd_pstate_highest_perf);
+cpufreq_freq_attr_rw(energy_performance_preference);
+cpufreq_freq_attr_ro(energy_performance_available_preferences);
 
 static struct freq_attr *amd_pstate_attr[] = {
 	&amd_pstate_max_freq,
@@ -603,6 +788,429 @@  static struct freq_attr *amd_pstate_attr[] = {
 	NULL,
 };
 
+static struct freq_attr *amd_pstate_epp_attr[] = {
+	&amd_pstate_max_freq,
+	&amd_pstate_lowest_nonlinear_freq,
+	&amd_pstate_highest_perf,
+	&energy_performance_preference,
+	&energy_performance_available_preferences,
+	NULL,
+};
+
+static inline void update_boost_state(void)
+{
+	u64 misc_en;
+	struct amd_cpudata *cpudata;
+
+	cpudata = all_cpu_data[0];
+	rdmsrl(MSR_K7_HWCR, misc_en);
+	global_params.cppc_boost_disabled = misc_en & BIT_ULL(25);
+}
+
+static bool amd_pstate_acpi_pm_profile_server(void)
+{
+	if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
+	    acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)
+		return true;
+
+	return false;
+}
+
+static int amd_pstate_init_cpu(unsigned int cpunum)
+{
+	struct amd_cpudata *cpudata;
+
+	cpudata = all_cpu_data[cpunum];
+	if (!cpudata) {
+		cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
+		if (!cpudata)
+			return -ENOMEM;
+		WRITE_ONCE(all_cpu_data[cpunum], cpudata);
+
+		cpudata->cpu = cpunum;
+
+		if (cppc_active) {
+			if (amd_pstate_acpi_pm_profile_server())
+				cppc_boost = true;
+		}
+
+	}
+	cpudata->epp_powersave = -EINVAL;
+	cpudata->epp_policy = 0;
+	pr_debug("controlling: cpu %d\n", cpunum);
+	return 0;
+}
+
+static int __amd_pstate_cpu_init(struct cpufreq_policy *policy)
+{
+	int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
+	struct amd_cpudata *cpudata;
+	struct device *dev;
+	int rc;
+	u64 value;
+
+	rc = amd_pstate_init_cpu(policy->cpu);
+	if (rc)
+		return rc;
+
+	cpudata = all_cpu_data[policy->cpu];
+
+	dev = get_cpu_device(policy->cpu);
+	if (!dev)
+		goto free_cpudata1;
+
+	rc = amd_pstate_init_perf(cpudata);
+	if (rc)
+		goto free_cpudata1;
+
+	min_freq = amd_get_min_freq(cpudata);
+	max_freq = amd_get_max_freq(cpudata);
+	nominal_freq = amd_get_nominal_freq(cpudata);
+	lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
+	if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
+		dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
+				min_freq, max_freq);
+		ret = -EINVAL;
+		goto free_cpudata1;
+	}
+
+	policy->min = min_freq;
+	policy->max = max_freq;
+
+	policy->cpuinfo.min_freq = min_freq;
+	policy->cpuinfo.max_freq = max_freq;
+	/* It will be updated by governor */
+	policy->cur = policy->cpuinfo.min_freq;
+
+	/* Initial processor data capability frequencies */
+	cpudata->max_freq = max_freq;
+	cpudata->min_freq = min_freq;
+	cpudata->nominal_freq = nominal_freq;
+	cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
+
+	policy->driver_data = cpudata;
+
+	update_boost_state();
+	cpudata->epp_cached = amd_pstate_get_epp(cpudata, value);
+
+	policy->min = policy->cpuinfo.min_freq;
+	policy->max = policy->cpuinfo.max_freq;
+
+	if (boot_cpu_has(X86_FEATURE_CPPC))
+		policy->fast_switch_possible = true;
+
+	if (boot_cpu_has(X86_FEATURE_CPPC)) {
+		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
+		if (ret)
+			return ret;
+		WRITE_ONCE(cpudata->cppc_req_cached, value);
+
+		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
+		if (ret)
+			return ret;
+		WRITE_ONCE(cpudata->cppc_cap1_cached, value);
+	}
+	amd_pstate_boost_init(cpudata);
+
+	return 0;
+
+free_cpudata1:
+	kfree(cpudata);
+	return ret;
+}
+
+static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
+{
+	int ret;
+
+	ret = __amd_pstate_cpu_init(policy);
+	if (ret)
+		return ret;
+	/*
+	 * Set the policy to powersave to provide a valid fallback value in case
+	 * the default cpufreq governor is neither powersave nor performance.
+	 */
+	policy->policy = CPUFREQ_POLICY_POWERSAVE;
+
+	return 0;
+}
+
+static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
+{
+	pr_debug("CPU %d exiting\n", policy->cpu);
+	policy->fast_switch_possible = false;
+	return 0;
+}
+
+static void amd_pstate_update_max_freq(unsigned int cpu)
+{
+	struct cpufreq_policy *policy = policy = cpufreq_cpu_get(cpu);
+
+	if (!policy)
+		return;
+
+	refresh_frequency_limits(policy);
+	cpufreq_cpu_put(policy);
+}
+
+static void amd_pstate_epp_update_limits(unsigned int cpu)
+{
+	mutex_lock(&amd_pstate_driver_lock);
+	update_boost_state();
+	if (global_params.cppc_boost_disabled) {
+		for_each_possible_cpu(cpu)
+			amd_pstate_update_max_freq(cpu);
+	} else {
+		cpufreq_update_policy(cpu);
+	}
+	mutex_unlock(&amd_pstate_driver_lock);
+}
+
+static int cppc_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
+
+static inline void amd_pstate_boost_up(struct amd_cpudata *cpudata)
+{
+	u64 hwp_req = READ_ONCE(cpudata->cppc_req_cached);
+	u64 hwp_cap = READ_ONCE(cpudata->cppc_cap1_cached);
+	u32 max_limit = (hwp_req & 0xff);
+	u32 min_limit = (hwp_req & 0xff00) >> 8;
+	u32 boost_level1;
+
+	/* If max and min are equal or already at max, nothing to boost */
+	if (max_limit == min_limit)
+		return;
+
+	/* Set boost max and min to initial value */
+	if (!cpudata->cppc_boost_min)
+		cpudata->cppc_boost_min = min_limit;
+
+	boost_level1 = ((AMD_CPPC_NOMINAL_PERF(hwp_cap) + min_limit) >> 1);
+
+	if (cpudata->cppc_boost_min < boost_level1)
+		cpudata->cppc_boost_min = boost_level1;
+	else if (cpudata->cppc_boost_min < AMD_CPPC_NOMINAL_PERF(hwp_cap))
+		cpudata->cppc_boost_min = AMD_CPPC_NOMINAL_PERF(hwp_cap);
+	else if (cpudata->cppc_boost_min == AMD_CPPC_NOMINAL_PERF(hwp_cap))
+		cpudata->cppc_boost_min = max_limit;
+	else
+		return;
+
+	hwp_req &= ~AMD_CPPC_MIN_PERF(~0L);
+	hwp_req |= AMD_CPPC_MIN_PERF(cpudata->cppc_boost_min);
+	wrmsrl(MSR_AMD_CPPC_REQ, hwp_req);
+	cpudata->last_update = cpudata->sample.time;
+}
+
+static inline void amd_pstate_boost_down(struct amd_cpudata *cpudata)
+{
+	bool expired;
+
+	if (cpudata->cppc_boost_min) {
+		expired = time_after64(cpudata->sample.time, cpudata->last_update +
+					cppc_boost_hold_time_ns);
+
+		if (expired) {
+			wrmsrl(MSR_AMD_CPPC_REQ, cpudata->cppc_req_cached);
+			cpudata->cppc_boost_min = 0;
+		}
+	}
+
+	cpudata->last_update = cpudata->sample.time;
+}
+
+static inline void amd_pstate_boost_update_util(struct amd_cpudata *cpudata,
+						      u64 time)
+{
+	cpudata->sample.time = time;
+	if (smp_processor_id() != cpudata->cpu)
+		return;
+
+	if (cpudata->sched_flags & SCHED_CPUFREQ_IOWAIT) {
+		bool do_io = false;
+
+		cpudata->sched_flags = 0;
+		/*
+		 * Set iowait_boost flag and update time. Since IO WAIT flag
+		 * is set all the time, we can't just conclude that there is
+		 * some IO bound activity is scheduled on this CPU with just
+		 * one occurrence. If we receive at least two in two
+		 * consecutive ticks, then we treat as boost candidate.
+		 * This is leveraged from Intel Pstate driver.
+		 */
+		if (time_before64(time, cpudata->last_io_update + 2 * TICK_NSEC))
+			do_io = true;
+
+		cpudata->last_io_update = time;
+
+		if (do_io)
+			amd_pstate_boost_up(cpudata);
+
+	} else {
+		amd_pstate_boost_down(cpudata);
+	}
+}
+
+static inline void amd_pstate_cppc_update_hook(struct update_util_data *data,
+						u64 time, unsigned int flags)
+{
+	struct amd_cpudata *cpudata = container_of(data,
+				struct amd_cpudata, update_util);
+
+	cpudata->sched_flags |= flags;
+
+	if (smp_processor_id() == cpudata->cpu)
+		amd_pstate_boost_update_util(cpudata, time);
+}
+
+static void amd_pstate_clear_update_util_hook(unsigned int cpu)
+{
+	struct amd_cpudata *cpudata = all_cpu_data[cpu];
+
+	if (!cpudata->update_util_set)
+		return;
+
+	cpufreq_remove_update_util_hook(cpu);
+	cpudata->update_util_set = false;
+	synchronize_rcu();
+}
+
+static void amd_pstate_set_update_util_hook(unsigned int cpu_num)
+{
+	struct amd_cpudata *cpudata = all_cpu_data[cpu_num];
+
+	if (!cppc_boost) {
+		if (cpudata->update_util_set)
+			amd_pstate_clear_update_util_hook(cpudata->cpu);
+		return;
+	}
+
+	if (cpudata->update_util_set)
+		return;
+
+	cpudata->sample.time = 0;
+	cpufreq_add_update_util_hook(cpu_num, &cpudata->update_util,
+						amd_pstate_cppc_update_hook);
+	cpudata->update_util_set = true;
+}
+
+static void amd_pstate_epp_init(unsigned int cpu)
+{
+	struct amd_cpudata *cpudata = all_cpu_data[cpu];
+	u32 max_perf, min_perf;
+	u64 value;
+	s16 epp;
+	int ret;
+
+	max_perf = READ_ONCE(cpudata->highest_perf);
+	min_perf = READ_ONCE(cpudata->lowest_perf);
+
+	value = READ_ONCE(cpudata->cppc_req_cached);
+
+	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
+		min_perf = max_perf;
+
+	/* Initial min/max values for CPPC Performance Controls Register */
+	value &= ~AMD_CPPC_MIN_PERF(~0L);
+	value |= AMD_CPPC_MIN_PERF(min_perf);
+
+	value &= ~AMD_CPPC_MAX_PERF(~0L);
+	value |= AMD_CPPC_MAX_PERF(max_perf);
+
+	/* CPPC EPP feature require to set zero to the desire perf bit */
+	value &= ~AMD_CPPC_DES_PERF(~0L);
+	value |= AMD_CPPC_DES_PERF(0);
+
+	if (cpudata->epp_policy == cpudata->policy)
+		goto skip_epp;
+
+	cpudata->epp_policy = cpudata->policy;
+
+	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
+		epp = amd_pstate_get_epp(cpudata, value);
+		cpudata->epp_powersave = epp;
+		if (epp < 0)
+			goto skip_epp;
+		/* force the epp value to be zero for performance policy */
+		epp = 0;
+	} else {
+		if (cpudata->epp_powersave < 0)
+			goto skip_epp;
+		/* Get BIOS pre-defined epp value */
+		epp = amd_pstate_get_epp(cpudata, value);
+		if (epp)
+			goto skip_epp;
+		epp = cpudata->epp_powersave;
+	}
+	/* Set initial EPP value */
+	if (boot_cpu_has(X86_FEATURE_CPPC)) {
+		value &= ~GENMASK_ULL(31, 24);
+		value |= (u64)epp << 24;
+	}
+
+skip_epp:
+	WRITE_ONCE(cpudata->cppc_req_cached, value);
+	ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
+	if (!ret)
+		cpudata->epp_cached = epp;
+}
+
+static void amd_pstate_set_max_limits(struct amd_cpudata *cpudata)
+{
+	u64 hwp_cap = READ_ONCE(cpudata->cppc_cap1_cached);
+	u64 hwp_req = READ_ONCE(cpudata->cppc_req_cached);
+	u32 max_limit = (hwp_cap >> 24) & 0xff;
+
+	hwp_req &= ~AMD_CPPC_MIN_PERF(~0L);
+	hwp_req |= AMD_CPPC_MIN_PERF(max_limit);
+	wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, hwp_req);
+}
+
+static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
+{
+	struct amd_cpudata *cpudata;
+
+	if (!policy->cpuinfo.max_freq)
+		return -ENODEV;
+
+	pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
+				policy->cpuinfo.max_freq, policy->max);
+
+	cpudata = all_cpu_data[policy->cpu];
+	cpudata->policy = policy->policy;
+
+	if (boot_cpu_has(X86_FEATURE_CPPC)) {
+		mutex_lock(&amd_pstate_limits_lock);
+
+		if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
+			amd_pstate_clear_update_util_hook(policy->cpu);
+			amd_pstate_set_max_limits(cpudata);
+		} else {
+			amd_pstate_set_update_util_hook(policy->cpu);
+		}
+
+		if (boot_cpu_has(X86_FEATURE_CPPC))
+			amd_pstate_epp_init(policy->cpu);
+
+		mutex_unlock(&amd_pstate_limits_lock);
+	}
+
+	return 0;
+}
+
+static void amd_pstate_verify_cpu_policy(struct amd_cpudata *cpudata,
+					   struct cpufreq_policy_data *policy)
+{
+	update_boost_state();
+	cpufreq_verify_within_cpu_limits(policy);
+}
+
+static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
+{
+	amd_pstate_verify_cpu_policy(all_cpu_data[policy->cpu], policy);
+	pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
+	return 0;
+}
+
 static struct cpufreq_driver amd_pstate_driver = {
 	.flags		= CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
 	.verify		= amd_pstate_verify,
@@ -616,8 +1224,20 @@  static struct cpufreq_driver amd_pstate_driver = {
 	.attr		= amd_pstate_attr,
 };
 
+static struct cpufreq_driver amd_pstate_epp_driver = {
+	.flags		= CPUFREQ_CONST_LOOPS,
+	.verify		= amd_pstate_epp_verify_policy,
+	.setpolicy	= amd_pstate_epp_set_policy,
+	.init		= amd_pstate_epp_cpu_init,
+	.exit		= amd_pstate_epp_cpu_exit,
+	.update_limits	= amd_pstate_epp_update_limits,
+	.name		= "amd_pstate_epp",
+	.attr		= amd_pstate_epp_attr,
+};
+
 static int __init amd_pstate_init(void)
 {
+	static struct amd_cpudata **cpudata;
 	int ret;
 
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
@@ -644,7 +1264,8 @@  static int __init amd_pstate_init(void)
 	/* capability check */
 	if (boot_cpu_has(X86_FEATURE_CPPC)) {
 		pr_debug("AMD CPPC MSR based functionality is supported\n");
-		amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
+		if (!cppc_active)
+			default_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
 	} else {
 		pr_debug("AMD CPPC shared memory based functionality is supported\n");
 		static_call_update(amd_pstate_enable, cppc_enable);
@@ -652,6 +1273,10 @@  static int __init amd_pstate_init(void)
 		static_call_update(amd_pstate_update_perf, cppc_update_perf);
 	}
 
+	cpudata = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
+	if (!cpudata)
+		return -ENOMEM;
+	WRITE_ONCE(all_cpu_data, cpudata);
 	/* enable amd pstate feature */
 	ret = amd_pstate_enable(true);
 	if (ret) {
@@ -659,9 +1284,9 @@  static int __init amd_pstate_init(void)
 		return ret;
 	}
 
-	ret = cpufreq_register_driver(&amd_pstate_driver);
+	ret = cpufreq_register_driver(default_pstate_driver);
 	if (ret)
-		pr_err("failed to register amd_pstate_driver with return %d\n",
+		pr_err("failed to register amd pstate driver with return %d\n",
 		       ret);
 
 	return ret;
@@ -676,8 +1301,14 @@  static int __init amd_pstate_param(char *str)
 	if (!strcmp(str, "disable")) {
 		cppc_load = 0;
 		pr_info("driver is explicitly disabled\n");
-	} else if (!strcmp(str, "passive"))
+	} else if (!strcmp(str, "passive")) {
 		cppc_load = 1;
+		default_pstate_driver = &amd_pstate_driver;
+	} else if (!strcmp(str, "active")) {
+		cppc_active = 1;
+		cppc_load = 1;
+		default_pstate_driver = &amd_pstate_epp_driver;
+	}
 
 	return 0;
 }
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
index 1c4b8659f171..888af62040f1 100644
--- a/include/linux/amd-pstate.h
+++ b/include/linux/amd-pstate.h
@@ -25,6 +25,7 @@  struct amd_aperf_mperf {
 	u64 aperf;
 	u64 mperf;
 	u64 tsc;
+	u64 time;
 };
 
 /**
@@ -47,6 +48,18 @@  struct amd_aperf_mperf {
  * @prev: Last Aperf/Mperf/tsc count value read from register
  * @freq: current cpu frequency value
  * @boost_supported: check whether the Processor or SBIOS supports boost mode
+ * @epp_powersave: Last saved CPPC energy performance preference
+				when policy switched to performance
+ * @epp_policy: Last saved policy used to set energy-performance preference
+ * @epp_cached: Cached CPPC energy-performance preference value
+ * @policy: Cpufreq policy value
+ * @sched_flags: Store scheduler flags for possible cross CPU update
+ * @update_util_set: CPUFreq utility callback is set
+ * @last_update: Time stamp of the last performance state update
+ * @cppc_boost_min: Last CPPC boosted min performance state
+ * @cppc_cap1_cached: Cached value of the last CPPC Capabilities MSR
+ * @update_util: Cpufreq utility callback information
+ * @sample: the stored performance sample
  *
  * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
  * represents all the attributes and goals that AMD P-State requests at runtime.
@@ -72,6 +85,28 @@  struct amd_cpudata {
 
 	u64	freq;
 	bool	boost_supported;
+
+	/* EPP feature related attributes*/
+	s16	epp_powersave;
+	s16	epp_policy;
+	s16	epp_cached;
+	u32	policy;
+	u32	sched_flags;
+	bool	update_util_set;
+	u64	last_update;
+	u64	last_io_update;
+	u32	cppc_boost_min;
+	u64	cppc_cap1_cached;
+	struct	update_util_data update_util;
+	struct	amd_aperf_mperf sample;
+};
+
+/**
+ * struct amd_pstate_params - global parameters for the performance control
+ * @ cppc_boost_disabled wheher the core performance boost disabled
+ */
+struct amd_pstate_params {
+	bool cppc_boost_disabled;
 };
 
 #endif /* _LINUX_AMD_PSTATE_H */