[4/4] perf: arm_cspmu: ampere_cspmu: Add support for Ampere SoC PMU

Message ID 20230622011141.328029-5-ilkka@os.amperecomputing.com
State New
Headers
Series perf: ampere: Add support for Ampere SoC PMUs |

Commit Message

Ilkka Koskinen June 22, 2023, 1:11 a.m. UTC
  Ampere SoC PMU follows CoreSight PMU architecture. It uses implementation
specific registers to filter events rather than PMEVFILTnR registers.

Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
---
 .../admin-guide/perf/ampere_cspmu.rst         |  29 +++
 drivers/perf/arm_cspmu/Makefile               |   2 +-
 drivers/perf/arm_cspmu/ampere_cspmu.c         | 232 ++++++++++++++++++
 drivers/perf/arm_cspmu/ampere_cspmu.h         |  17 ++
 drivers/perf/arm_cspmu/arm_cspmu.c            |   7 +
 5 files changed, 286 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/admin-guide/perf/ampere_cspmu.rst
 create mode 100644 drivers/perf/arm_cspmu/ampere_cspmu.c
 create mode 100644 drivers/perf/arm_cspmu/ampere_cspmu.h
  

Comments

Jonathan Cameron June 22, 2023, 8:51 a.m. UTC | #1
On Wed, 21 Jun 2023 18:11:41 -0700
Ilkka Koskinen <ilkka@os.amperecomputing.com> wrote:

> Ampere SoC PMU follows CoreSight PMU architecture. It uses implementation
> specific registers to filter events rather than PMEVFILTnR registers.
> 
> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
Hi Ilkka,

Drive by review so not super detailed (I was curious) but a few questions/comments inline.

Jonathan

> ---
>  .../admin-guide/perf/ampere_cspmu.rst         |  29 +++
>  drivers/perf/arm_cspmu/Makefile               |   2 +-
>  drivers/perf/arm_cspmu/ampere_cspmu.c         | 232 ++++++++++++++++++
>  drivers/perf/arm_cspmu/ampere_cspmu.h         |  17 ++
>  drivers/perf/arm_cspmu/arm_cspmu.c            |   7 +
>  5 files changed, 286 insertions(+), 1 deletion(-)
>  create mode 100644 Documentation/admin-guide/perf/ampere_cspmu.rst
>  create mode 100644 drivers/perf/arm_cspmu/ampere_cspmu.c
>  create mode 100644 drivers/perf/arm_cspmu/ampere_cspmu.h
> 
> diff --git a/Documentation/admin-guide/perf/ampere_cspmu.rst b/Documentation/admin-guide/perf/ampere_cspmu.rst
> new file mode 100644
> index 000000000000..bf86bffeef63
> --- /dev/null
> +++ b/Documentation/admin-guide/perf/ampere_cspmu.rst
> @@ -0,0 +1,29 @@

> +
> +Example for perf tool use::
> +
> +  / # perf list ampere
> +
> +    ampere_mcu_pmu_0/act_sent/                         [Kernel PMU event]
> +    <...>
> +    ampere_mcu_pmu_1/rd_sent/                          [Kernel PMU event]
> +    <...>
> +
> +  / # perf stat -a -e ampere_mcu_pmu_0/act_sent,filter_enable=3,bank=5,rank=3,threshold=2/,ampere_mcu_pmu_1/rd_sent/ \
> +        sleep 1

Why filter_enable=3?



> +static u32 ampere_cspmu_event_filter(const struct perf_event *event)
> +{

Whilst lots of other comments on this - perhaps add another one here to
why this is a noop.

> +	return 0;
> +}
> +
> +static void ampere_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
> +				       struct hw_perf_event *hwc,
> +				       u32 filter)
> +{
> +	struct perf_event *event;
> +	unsigned int idx;
> +	u32 threshold, rank, bank;
> +
> +	/*
> +	 * At this point, all the events have the same filter settings.
> +	 * Therefore, take the first event and use its configuration.
> +	 */
> +	idx = find_first_bit(cspmu->hw_events.used_ctrs,
> +			     cspmu->cycle_counter_logical_idx);
> +
> +	event = cspmu->hw_events.events[idx];
> +
> +	threshold	= get_threshold(event);
> +	rank		= get_rank(event);
> +	bank		= get_bank(event);
> +
> +	writel(threshold, cspmu->base0 + PMAUXR0);
> +	writel(rank, cspmu->base0 + PMAUXR1);
> +	writel(bank, cspmu->base0 + PMAUXR2);
> +}
> +
> +static int ampere_cspmu_validate_configs(struct perf_event *event,
> +					 struct perf_event *event2)
> +{
> +	if (get_threshold(event) != get_threshold(event2) ||
> +	    get_rank(event) != get_rank(event2) ||
> +	    get_bank(event) != get_bank(event2))
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +static int ampere_cspmu_validate_event(struct arm_cspmu *cspmu,
> +				       struct perf_event *new)
> +{
> +	struct perf_event *curr, *leader = new->group_leader;
> +	unsigned int idx;
> +	int ret;
> +
> +	ret = ampere_cspmu_validate_configs(new, leader);
> +	if (ret)
> +		return ret;
> +
> +	/* We compare the global filter settings to existing events */
> +	idx = find_first_bit(cspmu->hw_events.used_ctrs,
> +			     cspmu->cycle_counter_logical_idx);
> +
> +	/* This is the first event */

Maybe add why that matters to the comment?

> +	if (idx == cspmu->cycle_counter_logical_idx)
> +		return 0;
> +
> +	curr = cspmu->hw_events.events[idx];
> +
> +	return ampere_cspmu_validate_configs(curr, new);
> +}
> +
> +static char *ampere_cspmu_format_name(const struct arm_cspmu *cspmu,
> +				      const char *name_pattern)
> +{
> +	struct device *dev = cspmu->dev;
> +	static atomic_t pmu_generic_idx = {0};

Why not an ida?

If the pmu drivers ever become easy to unbind then you won't get ID
reusage like this an eventually you will run into overflow problems.

> +
> +	return devm_kasprintf(dev, GFP_KERNEL, name_pattern,
> +			      atomic_fetch_inc(&pmu_generic_idx));
> +}
> +
> +int ampere_cspmu_init_ops(struct arm_cspmu *cspmu)
> +{
> +	struct device *dev = cspmu->dev;
> +	struct ampere_cspmu_ctx *ctx;
> +	struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
> +
> +	ctx = devm_kzalloc(dev, sizeof(struct ampere_cspmu_ctx), GFP_KERNEL);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	ctx->event_attr		= ampereone_mcu_pmu_event_attrs;
> +	ctx->format_attr	= ampereone_mcu_format_attrs;
> +	ctx->name		= ampere_cspmu_format_name(cspmu,
> +							   "ampere_mcu_pmu_%u");

Long line and need to break avoided if you don't bother trying to align the = signs...
Personally I don't like this style as it causes a lot of churn as drivers
evolve, but meh, it's up to you.

Given the result is confusing if the allocation fails (name not what is expected)
I would also check that allocation and error out if it fails.  Obviously it won't
under realistic circumstances, but a bit of paranoia never hurt anyone.

> +	cspmu->impl.ctx = ctx;
> +
> +	impl_ops->event_filter		= ampere_cspmu_event_filter;
> +	impl_ops->set_ev_filter		= ampere_cspmu_set_ev_filter;
> +	impl_ops->validate_event	= ampere_cspmu_validate_event;
> +	impl_ops->get_name		= ampere_cspmu_get_name;
> +	impl_ops->get_event_attrs	= ampere_cspmu_get_event_attrs;
> +	impl_ops->get_format_attrs	= ampere_cspmu_get_format_attrs;
> +
> +	return 0;
> +}
> +
> +MODULE_LICENSE("GPL v2");

...

> diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
> index 471d6d7ac81a..587515eea0b4 100644
> --- a/drivers/perf/arm_cspmu/arm_cspmu.c
> +++ b/drivers/perf/arm_cspmu/arm_cspmu.c
> @@ -29,6 +29,7 @@
>  #include <linux/perf_event.h>
>  #include <linux/platform_device.h>
>  
> +#include "ampere_cspmu.h"

I'd be tempted to keep the generic header in a separate block then
follow with the vendor ones.  Not particularly important though.

>  #include "arm_cspmu.h"
>  #include "nvidia_cspmu.h"
>  
> @@ -114,6 +115,7 @@
>  
>  /* JEDEC-assigned JEP106 identification code */
>  #define ARM_CSPMU_IMPL_ID_NVIDIA		0x36B
> +#define ARM_CSPMU_IMPL_ID_AMPERE		0xA16
>  
>  static unsigned long arm_cspmu_cpuhp_state;
>  
> @@ -388,6 +390,11 @@ static const struct impl_match impl_match[] = {
>  	  .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
>  	  .impl_init_ops = nv_cspmu_init_ops
>  	},
> +	{
> +	  .pmiidr = ARM_CSPMU_IMPL_ID_AMPERE,
> +	  .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
> +	  .impl_init_ops = ampere_cspmu_init_ops
> +	},
>  	{}
>  };
>
  
Ilkka Koskinen June 22, 2023, 11:21 p.m. UTC | #2
Hi Jonathan

On Thu, 22 Jun 2023, Jonathan Cameron wrote:
> On Wed, 21 Jun 2023 18:11:41 -0700
> Ilkka Koskinen <ilkka@os.amperecomputing.com> wrote:
>
>> Ampere SoC PMU follows CoreSight PMU architecture. It uses implementation
>> specific registers to filter events rather than PMEVFILTnR registers.
>>
>> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
> Hi Ilkka,
>
> Drive by review so not super detailed (I was curious) but a few questions/comments inline.
>
> Jonathan
>
>> ---
>>  .../admin-guide/perf/ampere_cspmu.rst         |  29 +++
>>  drivers/perf/arm_cspmu/Makefile               |   2 +-
>>  drivers/perf/arm_cspmu/ampere_cspmu.c         | 232 ++++++++++++++++++
>>  drivers/perf/arm_cspmu/ampere_cspmu.h         |  17 ++
>>  drivers/perf/arm_cspmu/arm_cspmu.c            |   7 +
>>  5 files changed, 286 insertions(+), 1 deletion(-)
>>  create mode 100644 Documentation/admin-guide/perf/ampere_cspmu.rst
>>  create mode 100644 drivers/perf/arm_cspmu/ampere_cspmu.c
>>  create mode 100644 drivers/perf/arm_cspmu/ampere_cspmu.h
>>
>> diff --git a/Documentation/admin-guide/perf/ampere_cspmu.rst b/Documentation/admin-guide/perf/ampere_cspmu.rst
>> new file mode 100644
>> index 000000000000..bf86bffeef63
>> --- /dev/null
>> +++ b/Documentation/admin-guide/perf/ampere_cspmu.rst
>> @@ -0,0 +1,29 @@
>
>> +
>> +Example for perf tool use::
>> +
>> +  / # perf list ampere
>> +
>> +    ampere_mcu_pmu_0/act_sent/                         [Kernel PMU event]
>> +    <...>
>> +    ampere_mcu_pmu_1/rd_sent/                          [Kernel PMU event]
>> +    <...>
>> +
>> +  / # perf stat -a -e ampere_mcu_pmu_0/act_sent,filter_enable=3,bank=5,rank=3,threshold=2/,ampere_mcu_pmu_1/rd_sent/ \
>> +        sleep 1
>
> Why filter_enable=3?

Thanks for catching. That's from the first cspmu version and got 
accidentally back there. The rest of the patch looks like what it was 
supposed to be though.


>> +static u32 ampere_cspmu_event_filter(const struct perf_event *event)
>> +{
>
> Whilst lots of other comments on this - perhaps add another one here to
> why this is a noop.

Sure, makes sense.

>
>> +	return 0;
>> +}

...

>> +static int ampere_cspmu_validate_event(struct arm_cspmu *cspmu,
>> +				       struct perf_event *new)
>> +{
>> +	struct perf_event *curr, *leader = new->group_leader;
>> +	unsigned int idx;
>> +	int ret;
>> +
>> +	ret = ampere_cspmu_validate_configs(new, leader);
>> +	if (ret)
>> +		return ret;
>> +
>> +	/* We compare the global filter settings to existing events */
>> +	idx = find_first_bit(cspmu->hw_events.used_ctrs,
>> +			     cspmu->cycle_counter_logical_idx);
>> +
>> +	/* This is the first event */
>
> Maybe add why that matters to the comment?

Sure, I'll do that.

>
>> +	if (idx == cspmu->cycle_counter_logical_idx)
>> +		return 0;
>> +
>> +	curr = cspmu->hw_events.events[idx];
>> +
>> +	return ampere_cspmu_validate_configs(curr, new);
>> +}
>> +
>> +static char *ampere_cspmu_format_name(const struct arm_cspmu *cspmu,
>> +				      const char *name_pattern)
>> +{
>> +	struct device *dev = cspmu->dev;
>> +	static atomic_t pmu_generic_idx = {0};
>
> Why not an ida?
>
> If the pmu drivers ever become easy to unbind then you won't get ID
> reusage like this an eventually you will run into overflow problems.

Didn't appear in my mind when I wrote the submodule. I'll change it.

Releasing devices would eventually require a release hook too to support 
reusing the IDs but we can add that later.

>
>> +
>> +	return devm_kasprintf(dev, GFP_KERNEL, name_pattern,
>> +			      atomic_fetch_inc(&pmu_generic_idx));
>> +}
>> +
>> +int ampere_cspmu_init_ops(struct arm_cspmu *cspmu)
>> +{
>> +	struct device *dev = cspmu->dev;
>> +	struct ampere_cspmu_ctx *ctx;
>> +	struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
>> +
>> +	ctx = devm_kzalloc(dev, sizeof(struct ampere_cspmu_ctx), GFP_KERNEL);
>> +	if (!ctx)
>> +		return -ENOMEM;
>> +
>> +	ctx->event_attr		= ampereone_mcu_pmu_event_attrs;
>> +	ctx->format_attr	= ampereone_mcu_format_attrs;
>> +	ctx->name		= ampere_cspmu_format_name(cspmu,
>> +							   "ampere_mcu_pmu_%u");
>
> Long line and need to break avoided if you don't bother trying to align the = signs...
> Personally I don't like this style as it causes a lot of churn as drivers
> evolve, but meh, it's up to you.

I don't really have preference on either way. I remove the aligment and 
combine those two lines.

>
> Given the result is confusing if the allocation fails (name not what is expected)
> I would also check that allocation and error out if it fails.  Obviously it won't
> under realistic circumstances, but a bit of paranoia never hurt anyone.

I agree, I fix it.

>
>> +	cspmu->impl.ctx = ctx;
>> +
>> +	impl_ops->event_filter		= ampere_cspmu_event_filter;
>> +	impl_ops->set_ev_filter		= ampere_cspmu_set_ev_filter;
>> +	impl_ops->validate_event	= ampere_cspmu_validate_event;
>> +	impl_ops->get_name		= ampere_cspmu_get_name;
>> +	impl_ops->get_event_attrs	= ampere_cspmu_get_event_attrs;
>> +	impl_ops->get_format_attrs	= ampere_cspmu_get_format_attrs;
>> +
>> +	return 0;
>> +}
>> +
>> +MODULE_LICENSE("GPL v2");
>
> ...
>
>> diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
>> index 471d6d7ac81a..587515eea0b4 100644
>> --- a/drivers/perf/arm_cspmu/arm_cspmu.c
>> +++ b/drivers/perf/arm_cspmu/arm_cspmu.c
>> @@ -29,6 +29,7 @@
>>  #include <linux/perf_event.h>
>>  #include <linux/platform_device.h>
>>
>> +#include "ampere_cspmu.h"
>
> I'd be tempted to keep the generic header in a separate block then
> follow with the vendor ones.  Not particularly important though.

I like your idea

>
>>  #include "arm_cspmu.h"
>>  #include "nvidia_cspmu.h"
>>
>> @@ -114,6 +115,7 @@
>>
>>  /* JEDEC-assigned JEP106 identification code */
>>  #define ARM_CSPMU_IMPL_ID_NVIDIA		0x36B
>> +#define ARM_CSPMU_IMPL_ID_AMPERE		0xA16
>>
>>  static unsigned long arm_cspmu_cpuhp_state;
>>
>> @@ -388,6 +390,11 @@ static const struct impl_match impl_match[] = {
>>  	  .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
>>  	  .impl_init_ops = nv_cspmu_init_ops
>>  	},
>> +	{
>> +	  .pmiidr = ARM_CSPMU_IMPL_ID_AMPERE,
>> +	  .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
>> +	  .impl_init_ops = ampere_cspmu_init_ops
>> +	},
>>  	{}
>>  };
>>
>
>
  

Patch

diff --git a/Documentation/admin-guide/perf/ampere_cspmu.rst b/Documentation/admin-guide/perf/ampere_cspmu.rst
new file mode 100644
index 000000000000..bf86bffeef63
--- /dev/null
+++ b/Documentation/admin-guide/perf/ampere_cspmu.rst
@@ -0,0 +1,29 @@ 
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================
+Ampere SoC Performance Monitoring Unit (PMU)
+============================================
+
+Ampere SoC PMU is a generic PMU IP that follows Arm CoreSight PMU architecture.
+Therefore, the driver is implemented as a submodule of arm_cspmu driver. At the
+first phase it's used for counting MCU events on AmpereOne.
+
+
+MCU PMU events
+--------------
+
+The PMU driver supports setting filters for "rank", "bank", and "threshold".
+Note, that the filters are per PMU instance rather than per event.
+
+
+Example for perf tool use::
+
+  / # perf list ampere
+
+    ampere_mcu_pmu_0/act_sent/                         [Kernel PMU event]
+    <...>
+    ampere_mcu_pmu_1/rd_sent/                          [Kernel PMU event]
+    <...>
+
+  / # perf stat -a -e ampere_mcu_pmu_0/act_sent,filter_enable=3,bank=5,rank=3,threshold=2/,ampere_mcu_pmu_1/rd_sent/ \
+        sleep 1
diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile
index fedb17df982d..b80a8bd8da54 100644
--- a/drivers/perf/arm_cspmu/Makefile
+++ b/drivers/perf/arm_cspmu/Makefile
@@ -3,4 +3,4 @@ 
 # SPDX-License-Identifier: GPL-2.0
 
 obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
-arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
+arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o ampere_cspmu.o
diff --git a/drivers/perf/arm_cspmu/ampere_cspmu.c b/drivers/perf/arm_cspmu/ampere_cspmu.c
new file mode 100644
index 000000000000..62dc893c94c1
--- /dev/null
+++ b/drivers/perf/arm_cspmu/ampere_cspmu.c
@@ -0,0 +1,232 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ampere SoC PMU (Performance Monitor Unit)
+ *
+ * Copyright (c) 2023, Ampere Computing LLC
+ */
+
+#include "ampere_cspmu.h"
+
+#define PMAUXR0		0xD80
+#define PMAUXR1		0xD84
+#define PMAUXR2		0xD88
+#define PMAUXR3		0xD8C
+
+#define to_ampere_cspmu_ctx(cspmu)	((struct ampere_cspmu_ctx *)(cspmu->impl.ctx))
+
+struct ampere_cspmu_ctx {
+	const char *name;
+	struct attribute **event_attr;
+	struct attribute **format_attr;
+};
+
+#define SOC_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end)        \
+	static inline u32 get_##_name(const struct perf_event *event)     \
+	{                                                                 \
+		return FIELD_GET(GENMASK_ULL(_end, _start),               \
+				 event->attr._config);                    \
+	}                                                                 \
+
+SOC_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 8);
+SOC_PMU_EVENT_ATTR_EXTRACTOR(threshold, config1, 0, 7);
+SOC_PMU_EVENT_ATTR_EXTRACTOR(rank, config1, 8, 23);
+SOC_PMU_EVENT_ATTR_EXTRACTOR(bank, config2, 0, 31);
+
+static struct attribute *ampereone_mcu_pmu_event_attrs[] = {
+	ARM_CSPMU_EVENT_ATTR(cycle_count,		0x00),
+	ARM_CSPMU_EVENT_ATTR(act_sent,			0x01),
+	ARM_CSPMU_EVENT_ATTR(pre_sent,			0x02),
+	ARM_CSPMU_EVENT_ATTR(rd_sent,			0x03),
+	ARM_CSPMU_EVENT_ATTR(rda_sent,			0x04),
+	ARM_CSPMU_EVENT_ATTR(wr_sent,			0x05),
+	ARM_CSPMU_EVENT_ATTR(wra_sent,			0x06),
+	ARM_CSPMU_EVENT_ATTR(pd_entry_vld,		0x07),
+	ARM_CSPMU_EVENT_ATTR(sref_entry_vld,		0x08),
+	ARM_CSPMU_EVENT_ATTR(prea_sent,			0x09),
+	ARM_CSPMU_EVENT_ATTR(pre_sb_sent,		0x0a),
+	ARM_CSPMU_EVENT_ATTR(ref_sent,			0x0b),
+	ARM_CSPMU_EVENT_ATTR(rfm_sent,			0x0c),
+	ARM_CSPMU_EVENT_ATTR(ref_sb_sent,		0x0d),
+	ARM_CSPMU_EVENT_ATTR(rfm_sb_sent,		0x0e),
+	ARM_CSPMU_EVENT_ATTR(rd_rda_sent,		0x0f),
+	ARM_CSPMU_EVENT_ATTR(wr_wra_sent,		0x10),
+	ARM_CSPMU_EVENT_ATTR(raw_hazard,		0x11),
+	ARM_CSPMU_EVENT_ATTR(war_hazard,		0x12),
+	ARM_CSPMU_EVENT_ATTR(waw_hazard,		0x13),
+	ARM_CSPMU_EVENT_ATTR(rar_hazard,		0x14),
+	ARM_CSPMU_EVENT_ATTR(raw_war_waw_hazard,	0x15),
+	ARM_CSPMU_EVENT_ATTR(hprd_lprd_wr_req_vld,	0x16),
+	ARM_CSPMU_EVENT_ATTR(lprd_req_vld,		0x17),
+	ARM_CSPMU_EVENT_ATTR(hprd_req_vld,		0x18),
+	ARM_CSPMU_EVENT_ATTR(hprd_lprd_req_vld,		0x19),
+	ARM_CSPMU_EVENT_ATTR(prefetch_tgt,		0x1a),
+	ARM_CSPMU_EVENT_ATTR(wr_req_vld,		0x1b),
+	ARM_CSPMU_EVENT_ATTR(partial_wr_req_vld,	0x1c),
+	ARM_CSPMU_EVENT_ATTR(rd_retry,			0x1d),
+	ARM_CSPMU_EVENT_ATTR(wr_retry,			0x1e),
+	ARM_CSPMU_EVENT_ATTR(retry_gnt,			0x1f),
+	ARM_CSPMU_EVENT_ATTR(rank_change,		0x20),
+	ARM_CSPMU_EVENT_ATTR(dir_change,		0x21),
+	ARM_CSPMU_EVENT_ATTR(rank_dir_change,		0x22),
+	ARM_CSPMU_EVENT_ATTR(rank_active,		0x23),
+	ARM_CSPMU_EVENT_ATTR(rank_idle,			0x24),
+	ARM_CSPMU_EVENT_ATTR(rank_pd,			0x25),
+	ARM_CSPMU_EVENT_ATTR(rank_sref,			0x26),
+	ARM_CSPMU_EVENT_ATTR(queue_fill_gt_thresh,	0x27),
+	ARM_CSPMU_EVENT_ATTR(queue_rds_gt_thresh,	0x28),
+	ARM_CSPMU_EVENT_ATTR(queue_wrs_gt_thresh,	0x29),
+	ARM_CSPMU_EVENT_ATTR(phy_updt_complt,		0x2a),
+	ARM_CSPMU_EVENT_ATTR(tz_fail,			0x2b),
+	ARM_CSPMU_EVENT_ATTR(dram_errc,			0x2c),
+	ARM_CSPMU_EVENT_ATTR(dram_errd,			0x2d),
+	ARM_CSPMU_EVENT_ATTR(read_data_return,		0x32),
+	ARM_CSPMU_EVENT_ATTR(chi_wr_data_delta,		0x33),
+	ARM_CSPMU_EVENT_ATTR(zq_start,			0x34),
+	ARM_CSPMU_EVENT_ATTR(zq_latch,			0x35),
+	ARM_CSPMU_EVENT_ATTR(wr_fifo_full,		0x36),
+	ARM_CSPMU_EVENT_ATTR(info_fifo_full,		0x37),
+	ARM_CSPMU_EVENT_ATTR(cmd_fifo_full,		0x38),
+	ARM_CSPMU_EVENT_ATTR(dfi_nop,			0x39),
+	ARM_CSPMU_EVENT_ATTR(dfi_cmd,			0x3a),
+	ARM_CSPMU_EVENT_ATTR(rd_run_len,		0x3b),
+	ARM_CSPMU_EVENT_ATTR(wr_run_len,		0x3c),
+
+	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+	NULL,
+};
+
+static struct attribute *ampereone_mcu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_ATTR(threshold, "config1:0-7"),
+	ARM_CSPMU_FORMAT_ATTR(rank, "config1:8-23"),
+	ARM_CSPMU_FORMAT_ATTR(bank, "config2:0-31"),
+	NULL,
+};
+
+static struct attribute **
+ampere_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
+{
+	const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
+
+	return ctx->event_attr;
+}
+
+static struct attribute **
+ampere_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
+{
+	const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
+
+	return ctx->format_attr;
+}
+
+static const char *
+ampere_cspmu_get_name(const struct arm_cspmu *cspmu)
+{
+	const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
+
+	return ctx->name;
+}
+
+static u32 ampere_cspmu_event_filter(const struct perf_event *event)
+{
+	return 0;
+}
+
+static void ampere_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+				       struct hw_perf_event *hwc,
+				       u32 filter)
+{
+	struct perf_event *event;
+	unsigned int idx;
+	u32 threshold, rank, bank;
+
+	/*
+	 * At this point, all the events have the same filter settings.
+	 * Therefore, take the first event and use its configuration.
+	 */
+	idx = find_first_bit(cspmu->hw_events.used_ctrs,
+			     cspmu->cycle_counter_logical_idx);
+
+	event = cspmu->hw_events.events[idx];
+
+	threshold	= get_threshold(event);
+	rank		= get_rank(event);
+	bank		= get_bank(event);
+
+	writel(threshold, cspmu->base0 + PMAUXR0);
+	writel(rank, cspmu->base0 + PMAUXR1);
+	writel(bank, cspmu->base0 + PMAUXR2);
+}
+
+static int ampere_cspmu_validate_configs(struct perf_event *event,
+					 struct perf_event *event2)
+{
+	if (get_threshold(event) != get_threshold(event2) ||
+	    get_rank(event) != get_rank(event2) ||
+	    get_bank(event) != get_bank(event2))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int ampere_cspmu_validate_event(struct arm_cspmu *cspmu,
+				       struct perf_event *new)
+{
+	struct perf_event *curr, *leader = new->group_leader;
+	unsigned int idx;
+	int ret;
+
+	ret = ampere_cspmu_validate_configs(new, leader);
+	if (ret)
+		return ret;
+
+	/* We compare the global filter settings to existing events */
+	idx = find_first_bit(cspmu->hw_events.used_ctrs,
+			     cspmu->cycle_counter_logical_idx);
+
+	/* This is the first event */
+	if (idx == cspmu->cycle_counter_logical_idx)
+		return 0;
+
+	curr = cspmu->hw_events.events[idx];
+
+	return ampere_cspmu_validate_configs(curr, new);
+}
+
+static char *ampere_cspmu_format_name(const struct arm_cspmu *cspmu,
+				      const char *name_pattern)
+{
+	struct device *dev = cspmu->dev;
+	static atomic_t pmu_generic_idx = {0};
+
+	return devm_kasprintf(dev, GFP_KERNEL, name_pattern,
+			      atomic_fetch_inc(&pmu_generic_idx));
+}
+
+int ampere_cspmu_init_ops(struct arm_cspmu *cspmu)
+{
+	struct device *dev = cspmu->dev;
+	struct ampere_cspmu_ctx *ctx;
+	struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+
+	ctx = devm_kzalloc(dev, sizeof(struct ampere_cspmu_ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->event_attr		= ampereone_mcu_pmu_event_attrs;
+	ctx->format_attr	= ampereone_mcu_format_attrs;
+	ctx->name		= ampere_cspmu_format_name(cspmu,
+							   "ampere_mcu_pmu_%u");
+	cspmu->impl.ctx = ctx;
+
+	impl_ops->event_filter		= ampere_cspmu_event_filter;
+	impl_ops->set_ev_filter		= ampere_cspmu_set_ev_filter;
+	impl_ops->validate_event	= ampere_cspmu_validate_event;
+	impl_ops->get_name		= ampere_cspmu_get_name;
+	impl_ops->get_event_attrs	= ampere_cspmu_get_event_attrs;
+	impl_ops->get_format_attrs	= ampere_cspmu_get_format_attrs;
+
+	return 0;
+}
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_cspmu/ampere_cspmu.h b/drivers/perf/arm_cspmu/ampere_cspmu.h
new file mode 100644
index 000000000000..9b3e1628d1d6
--- /dev/null
+++ b/drivers/perf/arm_cspmu/ampere_cspmu.h
@@ -0,0 +1,17 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Ampere SoC PMU (Performance Monitor Unit)
+ *
+ * Copyright (c) 2023, Ampere Computing LLC
+ */
+
+#ifndef __AMPERE_CSPMU_H__
+#define __AMPERE_CSPMU_H__
+
+#include "arm_cspmu.h"
+
+/* Allocate AMPERE descriptor. */
+int ampere_cspmu_init_ops(struct arm_cspmu *cspmu);
+
+#endif /* __AMPERE_CSPMU_H__ */
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index 471d6d7ac81a..587515eea0b4 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -29,6 +29,7 @@ 
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 
+#include "ampere_cspmu.h"
 #include "arm_cspmu.h"
 #include "nvidia_cspmu.h"
 
@@ -114,6 +115,7 @@ 
 
 /* JEDEC-assigned JEP106 identification code */
 #define ARM_CSPMU_IMPL_ID_NVIDIA		0x36B
+#define ARM_CSPMU_IMPL_ID_AMPERE		0xA16
 
 static unsigned long arm_cspmu_cpuhp_state;
 
@@ -388,6 +390,11 @@  static const struct impl_match impl_match[] = {
 	  .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
 	  .impl_init_ops = nv_cspmu_init_ops
 	},
+	{
+	  .pmiidr = ARM_CSPMU_IMPL_ID_AMPERE,
+	  .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
+	  .impl_init_ops = ampere_cspmu_init_ops
+	},
 	{}
 };