[v3] arm64/arm: arm_pmuv3: perf: Don't truncate 64-bit registers
Commit Message
The driver used to truncate several 64-bit registers such as PMCEID[n]
registers used to describe whether architectural and microarchitectural
events in range 0x4000-0x401f exist. Due to discarding the bits, the
driver made the events invisible, even if they existed.
Moreover, PMCCFILTR and PMCR registers have additional bits in the upper
32 bits. This patch makes them available although they aren't currently
used. Finally, functions handling PMXEVCNTR and PMXEVTYPER registers are
removed as they not being used at all.
Fixes: df29ddf4f04b ("arm64: perf: Abstract system register accesses away")
Reported-by: Carl Worth <carl@os.amperecomputing.com>
Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
---
v2:
* Took arm32 specific code from Marc Zyngier's review comment
* Fixed a couple of typos in the commit message
I have tested the patch on Arm64. However, what comes to Arm32 part, I have
only compared the code with Arm32 specification and cross compiled it.
* https://lore.kernel.org/all/20231027012243.111070-1-ilkka@os.amperecomputing.com/
v3:
* Removed read/write_pmxevcntr() and read/write_pmxevtyper() as suggested
by Mark Rutland
* Changed handling of PMCCFILTR and PMCR to 64-bit on Aarch64. Aarch32 doesn't
seem to use the upper 32 bits.
Tested the patch on Arm64. Arm32 version was only built and not tested on actual
hardware.
---
arch/arm/include/asm/arm_pmuv3.h | 48 ++++++++++++++----------------
arch/arm64/include/asm/arm_pmuv3.h | 25 ++++------------
drivers/perf/arm_pmuv3.c | 6 ++--
3 files changed, 31 insertions(+), 48 deletions(-)
Comments
On 11/3/23 00:00, Ilkka Koskinen wrote:
> The driver used to truncate several 64-bit registers such as PMCEID[n]
> registers used to describe whether architectural and microarchitectural
> events in range 0x4000-0x401f exist. Due to discarding the bits, the
> driver made the events invisible, even if they existed.
>
> Moreover, PMCCFILTR and PMCR registers have additional bits in the upper
> 32 bits. This patch makes them available although they aren't currently
> used. Finally, functions handling PMXEVCNTR and PMXEVTYPER registers are
> removed as they not being used at all.
>
> Fixes: df29ddf4f04b ("arm64: perf: Abstract system register accesses away")
> Reported-by: Carl Worth <carl@os.amperecomputing.com>
This needs an URL for the original bug report in the following format.
Reported-by: Carl Worth <carl@os.amperecomputing.com>
Closes: https://lore.kernel.org/..
Otherwise, the following checkpatch warning shows up.
WARNING: Reported-by: should be immediately followed by Closes: with a URL to the report
#17:
Reported-by: Carl Worth <carl@os.amperecomputing.com>
Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
> ---
>
> v2:
>
> * Took arm32 specific code from Marc Zyngier's review comment
> * Fixed a couple of typos in the commit message
>
> I have tested the patch on Arm64. However, what comes to Arm32 part, I have
> only compared the code with Arm32 specification and cross compiled it.
>
> * https://lore.kernel.org/all/20231027012243.111070-1-ilkka@os.amperecomputing.com/
>
> v3:
>
> * Removed read/write_pmxevcntr() and read/write_pmxevtyper() as suggested
> by Mark Rutland
> * Changed handling of PMCCFILTR and PMCR to 64-bit on Aarch64. Aarch32 doesn't
> seem to use the upper 32 bits.
>
>
> Tested the patch on Arm64. Arm32 version was only built and not tested on actual
> hardware.
>
> ---
>
> arch/arm/include/asm/arm_pmuv3.h | 48 ++++++++++++++----------------
> arch/arm64/include/asm/arm_pmuv3.h | 25 ++++------------
> drivers/perf/arm_pmuv3.c | 6 ++--
> 3 files changed, 31 insertions(+), 48 deletions(-)
>
> diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h
> index 72529f5e2bed..a41b503b7dcd 100644
> --- a/arch/arm/include/asm/arm_pmuv3.h
> +++ b/arch/arm/include/asm/arm_pmuv3.h
> @@ -23,6 +23,8 @@
> #define PMUSERENR __ACCESS_CP15(c9, 0, c14, 0)
> #define PMINTENSET __ACCESS_CP15(c9, 0, c14, 1)
> #define PMINTENCLR __ACCESS_CP15(c9, 0, c14, 2)
> +#define PMCEID2 __ACCESS_CP15(c9, 0, c14, 4)
> +#define PMCEID3 __ACCESS_CP15(c9, 0, c14, 5)
> #define PMMIR __ACCESS_CP15(c9, 0, c14, 6)
> #define PMCCFILTR __ACCESS_CP15(c14, 0, c15, 7)
>
> @@ -150,21 +152,6 @@ static inline u64 read_pmccntr(void)
> return read_sysreg(PMCCNTR);
> }
>
> -static inline void write_pmxevcntr(u32 val)
> -{
> - write_sysreg(val, PMXEVCNTR);
> -}
> -
> -static inline u32 read_pmxevcntr(void)
> -{
> - return read_sysreg(PMXEVCNTR);
> -}
> -
> -static inline void write_pmxevtyper(u32 val)
> -{
> - write_sysreg(val, PMXEVTYPER);
> -}
> -
> static inline void write_pmcntenset(u32 val)
> {
> write_sysreg(val, PMCNTENSET);
> @@ -205,16 +192,6 @@ static inline void write_pmuserenr(u32 val)
> write_sysreg(val, PMUSERENR);
> }
>
> -static inline u32 read_pmceid0(void)
> -{
> - return read_sysreg(PMCEID0);
> -}
> -
> -static inline u32 read_pmceid1(void)
> -{
> - return read_sysreg(PMCEID1);
> -}
> -
> static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
> static inline void kvm_clr_pmu_events(u32 clr) {}
> static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
> @@ -231,6 +208,7 @@ static inline void kvm_vcpu_pmu_resync_el0(void) {}
>
> /* PMU Version in DFR Register */
> #define ARMV8_PMU_DFR_VER_NI 0
> +#define ARMV8_PMU_DFR_VER_V3P1 0x4
> #define ARMV8_PMU_DFR_VER_V3P4 0x5
> #define ARMV8_PMU_DFR_VER_V3P5 0x6
> #define ARMV8_PMU_DFR_VER_IMP_DEF 0xF
> @@ -251,4 +229,24 @@ static inline bool is_pmuv3p5(int pmuver)
> return pmuver >= ARMV8_PMU_DFR_VER_V3P5;
> }
>
> +static inline u64 read_pmceid0(void)
> +{
> + u64 val = read_sysreg(PMCEID0);
> +
> + if (read_pmuver() >= ARMV8_PMU_DFR_VER_V3P1)
> + val |= (u64)read_sysreg(PMCEID2) << 32;
> +
> + return val;
> +}
> +
> +static inline u64 read_pmceid1(void)
> +{
> + u64 val = read_sysreg(PMCEID1);
> +
> + if (read_pmuver() >= ARMV8_PMU_DFR_VER_V3P1)
> + val |= (u64)read_sysreg(PMCEID3) << 32;
> +
> + return val;
> +}
> +
> #endif
> diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h
> index 18dc2fb3d7b7..c27404fa4418 100644
> --- a/arch/arm64/include/asm/arm_pmuv3.h
> +++ b/arch/arm64/include/asm/arm_pmuv3.h
> @@ -46,12 +46,12 @@ static inline u32 read_pmuver(void)
> ID_AA64DFR0_EL1_PMUVer_SHIFT);
> }
>
> -static inline void write_pmcr(u32 val)
> +static inline void write_pmcr(u64 val)
> {
> write_sysreg(val, pmcr_el0);
> }
>
> -static inline u32 read_pmcr(void)
> +static inline u64 read_pmcr(void)
> {
> return read_sysreg(pmcr_el0);
> }
> @@ -71,21 +71,6 @@ static inline u64 read_pmccntr(void)
> return read_sysreg(pmccntr_el0);
> }
>
> -static inline void write_pmxevcntr(u32 val)
> -{
> - write_sysreg(val, pmxevcntr_el0);
> -}
> -
> -static inline u32 read_pmxevcntr(void)
> -{
> - return read_sysreg(pmxevcntr_el0);
> -}
> -
> -static inline void write_pmxevtyper(u32 val)
> -{
> - write_sysreg(val, pmxevtyper_el0);
> -}
> -
> static inline void write_pmcntenset(u32 val)
> {
> write_sysreg(val, pmcntenset_el0);
> @@ -106,7 +91,7 @@ static inline void write_pmintenclr(u32 val)
> write_sysreg(val, pmintenclr_el1);
> }
>
> -static inline void write_pmccfiltr(u32 val)
> +static inline void write_pmccfiltr(u64 val)
> {
> write_sysreg(val, pmccfiltr_el0);
> }
> @@ -126,12 +111,12 @@ static inline void write_pmuserenr(u32 val)
> write_sysreg(val, pmuserenr_el0);
> }
>
> -static inline u32 read_pmceid0(void)
> +static inline u64 read_pmceid0(void)
> {
> return read_sysreg(pmceid0_el0);
> }
>
> -static inline u32 read_pmceid1(void)
> +static inline u64 read_pmceid1(void)
> {
> return read_sysreg(pmceid1_el0);
> }
> diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
> index 18b91b56af1d..6ca7be05229c 100644
> --- a/drivers/perf/arm_pmuv3.c
> +++ b/drivers/perf/arm_pmuv3.c
> @@ -428,12 +428,12 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event)
> #define ARMV8_IDX_TO_COUNTER(x) \
> (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK)
>
> -static inline u32 armv8pmu_pmcr_read(void)
> +static inline u64 armv8pmu_pmcr_read(void)
> {
> return read_pmcr();
> }
>
> -static inline void armv8pmu_pmcr_write(u32 val)
> +static inline void armv8pmu_pmcr_write(u64 val)
> {
> val &= ARMV8_PMU_PMCR_MASK;
> isb();
> @@ -957,7 +957,7 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
> static void armv8pmu_reset(void *info)
> {
> struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
> - u32 pmcr;
> + u64 pmcr;
>
> /* The counter and interrupt enable registers are unknown at reset. */
> armv8pmu_disable_counter(U32_MAX);
Otherwise, this LGTM.
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
On Fri, 03 Nov 2023 06:09:36 +0000,
Anshuman Khandual <anshuman.khandual@arm.com> wrote:
>
>
>
> On 11/3/23 00:00, Ilkka Koskinen wrote:
> > The driver used to truncate several 64-bit registers such as PMCEID[n]
> > registers used to describe whether architectural and microarchitectural
> > events in range 0x4000-0x401f exist. Due to discarding the bits, the
> > driver made the events invisible, even if they existed.
> >
> > Moreover, PMCCFILTR and PMCR registers have additional bits in the upper
> > 32 bits. This patch makes them available although they aren't currently
> > used. Finally, functions handling PMXEVCNTR and PMXEVTYPER registers are
> > removed as they not being used at all.
> >
> > Fixes: df29ddf4f04b ("arm64: perf: Abstract system register accesses away")
> > Reported-by: Carl Worth <carl@os.amperecomputing.com>
>
> This needs an URL for the original bug report in the following format.
>
> Reported-by: Carl Worth <carl@os.amperecomputing.com>
> Closes: https://lore.kernel.org/..
A report is not necessarily done in a public list. And yet there is *a
lot* of value in recognising the reporter of the bug.
>
> Otherwise, the following checkpatch warning shows up.
>
> WARNING: Reported-by: should be immediately followed by Closes: with a URL to the report
> #17:
> Reported-by: Carl Worth <carl@os.amperecomputing.com>
> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
Checkpatch can say what it wants, but that doesn't make it true.
M.
On Fri, 3 Nov 2023, Marc Zyngier wrote:
> On Fri, 03 Nov 2023 06:09:36 +0000,
> Anshuman Khandual <anshuman.khandual@arm.com> wrote:
>>
>>
>>
>> On 11/3/23 00:00, Ilkka Koskinen wrote:
>>> The driver used to truncate several 64-bit registers such as PMCEID[n]
>>> registers used to describe whether architectural and microarchitectural
>>> events in range 0x4000-0x401f exist. Due to discarding the bits, the
>>> driver made the events invisible, even if they existed.
>>>
>>> Moreover, PMCCFILTR and PMCR registers have additional bits in the upper
>>> 32 bits. This patch makes them available although they aren't currently
>>> used. Finally, functions handling PMXEVCNTR and PMXEVTYPER registers are
>>> removed as they not being used at all.
>>>
>>> Fixes: df29ddf4f04b ("arm64: perf: Abstract system register accesses away")
>>> Reported-by: Carl Worth <carl@os.amperecomputing.com>
>>
>> This needs an URL for the original bug report in the following format.
>>
>> Reported-by: Carl Worth <carl@os.amperecomputing.com>
>> Closes: https://lore.kernel.org/..
>
> A report is not necessarily done in a public list. And yet there is *a
> lot* of value in recognising the reporter of the bug.
Thanks, Marc. That's exactly the case here. Carl reported the bug to me
internally and, therefore, I can't provide an URL to the bug report.
Cheers, Ilkka
>
>>
>> Otherwise, the following checkpatch warning shows up.
>>
>> WARNING: Reported-by: should be immediately followed by Closes: with a URL to the report
>> #17:
>> Reported-by: Carl Worth <carl@os.amperecomputing.com>
>> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
>
> Checkpatch can say what it wants, but that doesn't make it true.
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
>
On Thu, Nov 02, 2023 at 11:30:12AM -0700, Ilkka Koskinen wrote:
> The driver used to truncate several 64-bit registers such as PMCEID[n]
> registers used to describe whether architectural and microarchitectural
> events in range 0x4000-0x401f exist. Due to discarding the bits, the
> driver made the events invisible, even if they existed.
>
> Moreover, PMCCFILTR and PMCR registers have additional bits in the upper
> 32 bits. This patch makes them available although they aren't currently
> used. Finally, functions handling PMXEVCNTR and PMXEVTYPER registers are
> removed as they not being used at all.
>
> Fixes: df29ddf4f04b ("arm64: perf: Abstract system register accesses away")
> Reported-by: Carl Worth <carl@os.amperecomputing.com>
> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
Acked-by: Will Deacon <will@kernel.org>
Catalin -- another perf fix for you, thanks!
Will
On Thu, 02 Nov 2023 11:30:12 -0700, Ilkka Koskinen wrote:
> The driver used to truncate several 64-bit registers such as PMCEID[n]
> registers used to describe whether architectural and microarchitectural
> events in range 0x4000-0x401f exist. Due to discarding the bits, the
> driver made the events invisible, even if they existed.
>
> Moreover, PMCCFILTR and PMCR registers have additional bits in the upper
> 32 bits. This patch makes them available although they aren't currently
> used. Finally, functions handling PMXEVCNTR and PMXEVTYPER registers are
> removed as they not being used at all.
>
> [...]
Applied to arm64 (for-next/core), thanks!
[1/1] arm64/arm: arm_pmuv3: perf: Don't truncate 64-bit registers
https://git.kernel.org/arm64/c/403edfa43628
@@ -23,6 +23,8 @@
#define PMUSERENR __ACCESS_CP15(c9, 0, c14, 0)
#define PMINTENSET __ACCESS_CP15(c9, 0, c14, 1)
#define PMINTENCLR __ACCESS_CP15(c9, 0, c14, 2)
+#define PMCEID2 __ACCESS_CP15(c9, 0, c14, 4)
+#define PMCEID3 __ACCESS_CP15(c9, 0, c14, 5)
#define PMMIR __ACCESS_CP15(c9, 0, c14, 6)
#define PMCCFILTR __ACCESS_CP15(c14, 0, c15, 7)
@@ -150,21 +152,6 @@ static inline u64 read_pmccntr(void)
return read_sysreg(PMCCNTR);
}
-static inline void write_pmxevcntr(u32 val)
-{
- write_sysreg(val, PMXEVCNTR);
-}
-
-static inline u32 read_pmxevcntr(void)
-{
- return read_sysreg(PMXEVCNTR);
-}
-
-static inline void write_pmxevtyper(u32 val)
-{
- write_sysreg(val, PMXEVTYPER);
-}
-
static inline void write_pmcntenset(u32 val)
{
write_sysreg(val, PMCNTENSET);
@@ -205,16 +192,6 @@ static inline void write_pmuserenr(u32 val)
write_sysreg(val, PMUSERENR);
}
-static inline u32 read_pmceid0(void)
-{
- return read_sysreg(PMCEID0);
-}
-
-static inline u32 read_pmceid1(void)
-{
- return read_sysreg(PMCEID1);
-}
-
static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
static inline void kvm_clr_pmu_events(u32 clr) {}
static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
@@ -231,6 +208,7 @@ static inline void kvm_vcpu_pmu_resync_el0(void) {}
/* PMU Version in DFR Register */
#define ARMV8_PMU_DFR_VER_NI 0
+#define ARMV8_PMU_DFR_VER_V3P1 0x4
#define ARMV8_PMU_DFR_VER_V3P4 0x5
#define ARMV8_PMU_DFR_VER_V3P5 0x6
#define ARMV8_PMU_DFR_VER_IMP_DEF 0xF
@@ -251,4 +229,24 @@ static inline bool is_pmuv3p5(int pmuver)
return pmuver >= ARMV8_PMU_DFR_VER_V3P5;
}
+static inline u64 read_pmceid0(void)
+{
+ u64 val = read_sysreg(PMCEID0);
+
+ if (read_pmuver() >= ARMV8_PMU_DFR_VER_V3P1)
+ val |= (u64)read_sysreg(PMCEID2) << 32;
+
+ return val;
+}
+
+static inline u64 read_pmceid1(void)
+{
+ u64 val = read_sysreg(PMCEID1);
+
+ if (read_pmuver() >= ARMV8_PMU_DFR_VER_V3P1)
+ val |= (u64)read_sysreg(PMCEID3) << 32;
+
+ return val;
+}
+
#endif
@@ -46,12 +46,12 @@ static inline u32 read_pmuver(void)
ID_AA64DFR0_EL1_PMUVer_SHIFT);
}
-static inline void write_pmcr(u32 val)
+static inline void write_pmcr(u64 val)
{
write_sysreg(val, pmcr_el0);
}
-static inline u32 read_pmcr(void)
+static inline u64 read_pmcr(void)
{
return read_sysreg(pmcr_el0);
}
@@ -71,21 +71,6 @@ static inline u64 read_pmccntr(void)
return read_sysreg(pmccntr_el0);
}
-static inline void write_pmxevcntr(u32 val)
-{
- write_sysreg(val, pmxevcntr_el0);
-}
-
-static inline u32 read_pmxevcntr(void)
-{
- return read_sysreg(pmxevcntr_el0);
-}
-
-static inline void write_pmxevtyper(u32 val)
-{
- write_sysreg(val, pmxevtyper_el0);
-}
-
static inline void write_pmcntenset(u32 val)
{
write_sysreg(val, pmcntenset_el0);
@@ -106,7 +91,7 @@ static inline void write_pmintenclr(u32 val)
write_sysreg(val, pmintenclr_el1);
}
-static inline void write_pmccfiltr(u32 val)
+static inline void write_pmccfiltr(u64 val)
{
write_sysreg(val, pmccfiltr_el0);
}
@@ -126,12 +111,12 @@ static inline void write_pmuserenr(u32 val)
write_sysreg(val, pmuserenr_el0);
}
-static inline u32 read_pmceid0(void)
+static inline u64 read_pmceid0(void)
{
return read_sysreg(pmceid0_el0);
}
-static inline u32 read_pmceid1(void)
+static inline u64 read_pmceid1(void)
{
return read_sysreg(pmceid1_el0);
}
@@ -428,12 +428,12 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event)
#define ARMV8_IDX_TO_COUNTER(x) \
(((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK)
-static inline u32 armv8pmu_pmcr_read(void)
+static inline u64 armv8pmu_pmcr_read(void)
{
return read_pmcr();
}
-static inline void armv8pmu_pmcr_write(u32 val)
+static inline void armv8pmu_pmcr_write(u64 val)
{
val &= ARMV8_PMU_PMCR_MASK;
isb();
@@ -957,7 +957,7 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
static void armv8pmu_reset(void *info)
{
struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
- u32 pmcr;
+ u64 pmcr;
/* The counter and interrupt enable registers are unknown at reset. */
armv8pmu_disable_counter(U32_MAX);