[V8,5/6] arm64/perf: Add branch stack support in ARMV8 PMU
Commit Message
This enables support for branch stack sampling event in ARMV8 PMU, checking
has_branch_stack() on the event inside 'struct arm_pmu' callbacks. Although
these branch stack helpers armv8pmu_branch_XXXXX() are just dummy functions
for now. While here, this also defines arm_pmu's sched_task() callback with
armv8pmu_sched_task(), which resets the branch record buffer on a sched_in.
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
arch/arm64/include/asm/perf_event.h | 31 ++++++++++++
arch/arm64/kernel/perf_event.c | 78 ++++++++++++++++++++---------
2 files changed, 86 insertions(+), 23 deletions(-)
Comments
Hi Anshuman,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on arm64/for-next/core]
[also build test WARNING on tip/perf/core acme/perf/core linus/master v6.2-rc6 next-20230130]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230123-210254
base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
patch link: https://lore.kernel.org/r/20230123125956.1350336-6-anshuman.khandual%40arm.com
patch subject: [PATCH V8 5/6] arm64/perf: Add branch stack support in ARMV8 PMU
config: arm64-randconfig-r016-20230130 (https://download.01.org/0day-ci/archive/20230131/202301310047.b5iv9hM8-lkp@intel.com/config)
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 4196ca3278f78c6e19246e54ab0ecb364e37d66a)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install arm64 cross compiling tool for clang build
# apt-get install binutils-aarch64-linux-gnu
# https://github.com/intel-lab-lkp/linux/commit/0deba04ac45f8632b8579cb5cbf908b9f4428402
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230123-210254
git checkout 0deba04ac45f8632b8579cb5cbf908b9f4428402
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 prepare
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
In file included from arch/arm64/kernel/asm-offsets.c:15:
In file included from include/linux/kvm_host.h:45:
In file included from arch/arm64/include/asm/kvm_host.h:36:
In file included from include/kvm/arm_pmu.h:11:
>> arch/arm64/include/asm/perf_event.h:280:20: warning: function 'has_branch_stack' has internal linkage but is not defined [-Wundefined-internal]
static inline bool has_branch_stack(struct perf_event *event);
^
arch/arm64/include/asm/perf_event.h:284:16: note: used here
WARN_ON_ONCE(!has_branch_stack(event));
^
1 warning generated.
--
In file included from arch/arm64/kernel/asm-offsets.c:15:
In file included from include/linux/kvm_host.h:45:
In file included from arch/arm64/include/asm/kvm_host.h:36:
In file included from include/kvm/arm_pmu.h:11:
>> arch/arm64/include/asm/perf_event.h:280:20: warning: function 'has_branch_stack' has internal linkage but is not defined [-Wundefined-internal]
static inline bool has_branch_stack(struct perf_event *event);
^
arch/arm64/include/asm/perf_event.h:284:16: note: used here
WARN_ON_ONCE(!has_branch_stack(event));
^
1 warning generated.
vim +/has_branch_stack +280 arch/arm64/include/asm/perf_event.h
279
> 280 static inline bool has_branch_stack(struct perf_event *event);
281
On Tue, Jan 31, 2023 at 12:28:17AM +0800, kernel test robot wrote:
> Hi Anshuman,
>
> Thank you for the patch! Perhaps something to improve:
>
> [auto build test WARNING on arm64/for-next/core]
> [also build test WARNING on tip/perf/core acme/perf/core linus/master v6.2-rc6 next-20230130]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230123-210254
> base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
> patch link: https://lore.kernel.org/r/20230123125956.1350336-6-anshuman.khandual%40arm.com
> patch subject: [PATCH V8 5/6] arm64/perf: Add branch stack support in ARMV8 PMU
> config: arm64-randconfig-r016-20230130 (https://download.01.org/0day-ci/archive/20230131/202301310047.b5iv9hM8-lkp@intel.com/config)
> compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 4196ca3278f78c6e19246e54ab0ecb364e37d66a)
> reproduce (this is a W=1 build):
> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # install arm64 cross compiling tool for clang build
> # apt-get install binutils-aarch64-linux-gnu
> # https://github.com/intel-lab-lkp/linux/commit/0deba04ac45f8632b8579cb5cbf908b9f4428402
> git remote add linux-review https://github.com/intel-lab-lkp/linux
> git fetch --no-tags linux-review Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230123-210254
> git checkout 0deba04ac45f8632b8579cb5cbf908b9f4428402
> # save the config file
> mkdir build_dir && cp config build_dir/.config
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 olddefconfig
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 prepare
>
> If you fix the issue, kindly add following tag where applicable
> | Reported-by: kernel test robot <lkp@intel.com>
>
> All warnings (new ones prefixed by >>):
>
> In file included from arch/arm64/kernel/asm-offsets.c:15:
> In file included from include/linux/kvm_host.h:45:
> In file included from arch/arm64/include/asm/kvm_host.h:36:
> In file included from include/kvm/arm_pmu.h:11:
> >> arch/arm64/include/asm/perf_event.h:280:20: warning: function 'has_branch_stack' has internal linkage but is not defined [-Wundefined-internal]
> static inline bool has_branch_stack(struct perf_event *event);
> ^
> arch/arm64/include/asm/perf_event.h:284:16: note: used here
> WARN_ON_ONCE(!has_branch_stack(event));
> ^
> 1 warning generated.
> --
> In file included from arch/arm64/kernel/asm-offsets.c:15:
> In file included from include/linux/kvm_host.h:45:
> In file included from arch/arm64/include/asm/kvm_host.h:36:
> In file included from include/kvm/arm_pmu.h:11:
> >> arch/arm64/include/asm/perf_event.h:280:20: warning: function 'has_branch_stack' has internal linkage but is not defined [-Wundefined-internal]
> static inline bool has_branch_stack(struct perf_event *event);
> ^
> arch/arm64/include/asm/perf_event.h:284:16: note: used here
> WARN_ON_ONCE(!has_branch_stack(event));
> ^
> 1 warning generated.
This looks like it should be fixed. I'd also like to see Mark's ack on the
final series, since he had some detailed comments on the previous version.
Will
On 2/3/23 17:01, Will Deacon wrote:
> On Tue, Jan 31, 2023 at 12:28:17AM +0800, kernel test robot wrote:
>> Hi Anshuman,
>>
>> Thank you for the patch! Perhaps something to improve:
>>
>> [auto build test WARNING on arm64/for-next/core]
>> [also build test WARNING on tip/perf/core acme/perf/core linus/master v6.2-rc6 next-20230130]
>> [If your patch is applied to the wrong git tree, kindly drop us a note.
>> And when submitting patch, we suggest to use '--base' as documented in
>> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>>
>> url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230123-210254
>> base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
>> patch link: https://lore.kernel.org/r/20230123125956.1350336-6-anshuman.khandual%40arm.com
>> patch subject: [PATCH V8 5/6] arm64/perf: Add branch stack support in ARMV8 PMU
>> config: arm64-randconfig-r016-20230130 (https://download.01.org/0day-ci/archive/20230131/202301310047.b5iv9hM8-lkp@intel.com/config)
>> compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 4196ca3278f78c6e19246e54ab0ecb364e37d66a)
>> reproduce (this is a W=1 build):
>> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>> chmod +x ~/bin/make.cross
>> # install arm64 cross compiling tool for clang build
>> # apt-get install binutils-aarch64-linux-gnu
>> # https://github.com/intel-lab-lkp/linux/commit/0deba04ac45f8632b8579cb5cbf908b9f4428402
>> git remote add linux-review https://github.com/intel-lab-lkp/linux
>> git fetch --no-tags linux-review Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230123-210254
>> git checkout 0deba04ac45f8632b8579cb5cbf908b9f4428402
>> # save the config file
>> mkdir build_dir && cp config build_dir/.config
>> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 olddefconfig
>> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 prepare
>>
>> If you fix the issue, kindly add following tag where applicable
>> | Reported-by: kernel test robot <lkp@intel.com>
>>
>> All warnings (new ones prefixed by >>):
>>
>> In file included from arch/arm64/kernel/asm-offsets.c:15:
>> In file included from include/linux/kvm_host.h:45:
>> In file included from arch/arm64/include/asm/kvm_host.h:36:
>> In file included from include/kvm/arm_pmu.h:11:
>>>> arch/arm64/include/asm/perf_event.h:280:20: warning: function 'has_branch_stack' has internal linkage but is not defined [-Wundefined-internal]
>> static inline bool has_branch_stack(struct perf_event *event);
>> ^
>> arch/arm64/include/asm/perf_event.h:284:16: note: used here
>> WARN_ON_ONCE(!has_branch_stack(event));
>> ^
>> 1 warning generated.
>> --
>> In file included from arch/arm64/kernel/asm-offsets.c:15:
>> In file included from include/linux/kvm_host.h:45:
>> In file included from arch/arm64/include/asm/kvm_host.h:36:
>> In file included from include/kvm/arm_pmu.h:11:
>>>> arch/arm64/include/asm/perf_event.h:280:20: warning: function 'has_branch_stack' has internal linkage but is not defined [-Wundefined-internal]
>> static inline bool has_branch_stack(struct perf_event *event);
>> ^
>> arch/arm64/include/asm/perf_event.h:284:16: note: used here
>> WARN_ON_ONCE(!has_branch_stack(event));
>> ^
>> 1 warning generated.
>
> This looks like it should be fixed. I'd also like to see Mark's ack on the
This build warning is triggered when CONFIG_PERF_EVENTS is not enabled, when
all the fallback stubs in there try to use has_branch_stack() which does not
get defined. The following change, fixes the problem.
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 97db96326e13..1f68c125eb1a 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -277,8 +277,6 @@ struct pmu_hw_events;
struct arm_pmu;
struct perf_event;
-static inline bool has_branch_stack(struct perf_event *event);
-
#ifdef CONFIG_ARM64_BRBE
void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event);
bool armv8pmu_branch_valid(struct perf_event *event);
@@ -289,6 +287,9 @@ void armv8pmu_branch_reset(void);
int armv8pmu_private_alloc(struct arm_pmu *arm_pmu);
void armv8pmu_private_free(struct arm_pmu *arm_pmu);
#else
+#ifdef CONFIG_PERF_EVENTS
+static inline bool has_branch_stack(struct perf_event *event);
+
static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event)
{
WARN_ON_ONCE(!has_branch_stack(event));
@@ -316,3 +317,4 @@ static inline int armv8pmu_private_alloc(struct arm_pmu *arm_pmu) { return 0; }
static inline void armv8pmu_private_free(struct arm_pmu *arm_pmu) { }
#endif
#endif
+#endif
> final series, since he had some detailed comments on the previous version.
>
> Will
On Mon, Feb 06, 2023 at 09:46:25AM +0530, Anshuman Khandual wrote:
> On 2/3/23 17:01, Will Deacon wrote:
> > On Tue, Jan 31, 2023 at 12:28:17AM +0800, kernel test robot wrote:
[...]
> > This looks like it should be fixed. I'd also like to see Mark's ack on the
>
> This build warning is triggered when CONFIG_PERF_EVENTS is not enabled, when
> all the fallback stubs in there try to use has_branch_stack() which does not
> get defined. The following change, fixes the problem.
I expect that you'll post a v9 with that folded in, and I've just made some
comments on v7 which I expect will require some changes, so I'm going to wait
for v9 for further review to keep this manageable.
Thanks,
Mark.
On 2/9/23 01:36, Mark Rutland wrote:
> On Mon, Feb 06, 2023 at 09:46:25AM +0530, Anshuman Khandual wrote:
>> On 2/3/23 17:01, Will Deacon wrote:
>>> On Tue, Jan 31, 2023 at 12:28:17AM +0800, kernel test robot wrote:
>
> [...]
>
>>> This looks like it should be fixed. I'd also like to see Mark's ack on the
>>
>> This build warning is triggered when CONFIG_PERF_EVENTS is not enabled, when
>> all the fallback stubs in there try to use has_branch_stack() which does not
>> get defined. The following change, fixes the problem.
>
> I expect that you'll post a v9 with that folded in, and I've just made some
> comments on v7 which I expect will require some changes, so I'm going to wait
> for v9 for further review to keep this manageable.
Sure, will work on a V9 and post when ready.
@@ -273,4 +273,35 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
(regs)->pstate = PSR_MODE_EL1h; \
}
+struct pmu_hw_events;
+struct arm_pmu;
+struct perf_event;
+
+static inline bool has_branch_stack(struct perf_event *event);
+
+static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event)
+{
+ WARN_ON_ONCE(!has_branch_stack(event));
+}
+
+static inline bool armv8pmu_branch_valid(struct perf_event *event)
+{
+ WARN_ON_ONCE(!has_branch_stack(event));
+ return false;
+}
+
+static inline void armv8pmu_branch_enable(struct perf_event *event)
+{
+ WARN_ON_ONCE(!has_branch_stack(event));
+}
+
+static inline void armv8pmu_branch_disable(struct perf_event *event)
+{
+ WARN_ON_ONCE(!has_branch_stack(event));
+}
+
+static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) { }
+static inline void armv8pmu_branch_reset(void) { }
+static inline int armv8pmu_private_alloc(struct arm_pmu *arm_pmu) { return 0; }
+static inline void armv8pmu_private_free(struct arm_pmu *arm_pmu) { }
#endif
@@ -769,38 +769,21 @@ static void armv8pmu_enable_event(struct perf_event *event)
* Enable counter and interrupt, and set the counter to count
* the event that we're interested in.
*/
-
- /*
- * Disable counter
- */
armv8pmu_disable_event_counter(event);
-
- /*
- * Set event.
- */
armv8pmu_write_event_type(event);
-
- /*
- * Enable interrupt for this counter
- */
armv8pmu_enable_event_irq(event);
-
- /*
- * Enable counter
- */
armv8pmu_enable_event_counter(event);
+
+ if (has_branch_stack(event))
+ armv8pmu_branch_enable(event);
}
static void armv8pmu_disable_event(struct perf_event *event)
{
- /*
- * Disable counter
- */
- armv8pmu_disable_event_counter(event);
+ if (has_branch_stack(event))
+ armv8pmu_branch_disable(event);
- /*
- * Disable interrupt for this counter
- */
+ armv8pmu_disable_event_counter(event);
armv8pmu_disable_event_irq(event);
}
@@ -878,6 +861,13 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
if (!armpmu_event_set_period(event))
continue;
+ if (has_branch_stack(event)) {
+ WARN_ON(!cpuc->branches);
+ armv8pmu_branch_read(cpuc, event);
+ data.br_stack = &cpuc->branches->branch_stack;
+ data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
+ }
+
/*
* Perf event overflow will queue the processing of the event as
* an irq_work which will be taken care of in the handling of
@@ -976,6 +966,14 @@ static int armv8pmu_user_event_idx(struct perf_event *event)
return event->hw.idx;
}
+static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+{
+ struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu);
+
+ if (sched_in && arm_pmu_branch_stack_supported(armpmu))
+ armv8pmu_branch_reset();
+}
+
/*
* Add an event filter to a given event.
*/
@@ -1052,6 +1050,9 @@ static void armv8pmu_reset(void *info)
pmcr |= ARMV8_PMU_PMCR_LP;
armv8pmu_pmcr_write(pmcr);
+
+ if (arm_pmu_branch_stack_supported(cpu_pmu))
+ armv8pmu_branch_reset();
}
static int __armv8_pmuv3_map_event(struct perf_event *event,
@@ -1069,6 +1070,9 @@ static int __armv8_pmuv3_map_event(struct perf_event *event,
&armv8_pmuv3_perf_cache_map,
ARMV8_PMU_EVTYPE_EVENT);
+ if (has_branch_stack(event) && !armv8pmu_branch_valid(event))
+ return -EOPNOTSUPP;
+
if (armv8pmu_event_is_64bit(event))
event->hw.flags |= ARMPMU_EVT_64BIT;
@@ -1181,6 +1185,21 @@ static void __armv8pmu_probe_pmu(void *info)
cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1);
else
cpu_pmu->reg_pmmir = 0;
+ armv8pmu_branch_probe(cpu_pmu);
+}
+
+static int branch_records_alloc(struct arm_pmu *armpmu)
+{
+ struct pmu_hw_events *events;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ events = per_cpu_ptr(armpmu->hw_events, cpu);
+ events->branches = kzalloc(sizeof(struct branch_records), GFP_KERNEL);
+ if (!events->branches)
+ return -ENOMEM;
+ }
+ return 0;
}
static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
@@ -1191,12 +1210,24 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
};
int ret;
+ ret = armv8pmu_private_alloc(cpu_pmu);
+ if (ret)
+ return ret;
+
ret = smp_call_function_any(&cpu_pmu->supported_cpus,
__armv8pmu_probe_pmu,
&probe, 1);
if (ret)
return ret;
+ if (arm_pmu_branch_stack_supported(cpu_pmu)) {
+ ret = branch_records_alloc(cpu_pmu);
+ if (ret)
+ return ret;
+ } else {
+ armv8pmu_private_free(cpu_pmu);
+ }
+
return probe.present ? 0 : -ENODEV;
}
@@ -1261,6 +1292,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
cpu_pmu->filter = armv8pmu_filter;
cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx;
+ cpu_pmu->sched_task = armv8pmu_sched_task;
cpu_pmu->name = name;
cpu_pmu->map_event = map_event;