[V8,5/6] arm64/perf: Add branch stack support in ARMV8 PMU

Message ID 20230123125956.1350336-6-anshuman.khandual@arm.com
State New
Headers
Series arm64/perf: Enable branch stack sampling |

Commit Message

Anshuman Khandual Jan. 23, 2023, 12:59 p.m. UTC
  This enables support for branch stack sampling event in ARMV8 PMU, checking
has_branch_stack() on the event inside 'struct arm_pmu' callbacks. Although
these branch stack helpers armv8pmu_branch_XXXXX() are just dummy functions
for now. While here, this also defines arm_pmu's sched_task() callback with
armv8pmu_sched_task(), which resets the branch record buffer on a sched_in.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
 arch/arm64/include/asm/perf_event.h | 31 ++++++++++++
 arch/arm64/kernel/perf_event.c      | 78 ++++++++++++++++++++---------
 2 files changed, 86 insertions(+), 23 deletions(-)
  

Comments

kernel test robot Jan. 30, 2023, 4:28 p.m. UTC | #1
Hi Anshuman,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on arm64/for-next/core]
[also build test WARNING on tip/perf/core acme/perf/core linus/master v6.2-rc6 next-20230130]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230123-210254
base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
patch link:    https://lore.kernel.org/r/20230123125956.1350336-6-anshuman.khandual%40arm.com
patch subject: [PATCH V8 5/6] arm64/perf: Add branch stack support in ARMV8 PMU
config: arm64-randconfig-r016-20230130 (https://download.01.org/0day-ci/archive/20230131/202301310047.b5iv9hM8-lkp@intel.com/config)
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 4196ca3278f78c6e19246e54ab0ecb364e37d66a)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install arm64 cross compiling tool for clang build
        # apt-get install binutils-aarch64-linux-gnu
        # https://github.com/intel-lab-lkp/linux/commit/0deba04ac45f8632b8579cb5cbf908b9f4428402
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230123-210254
        git checkout 0deba04ac45f8632b8579cb5cbf908b9f4428402
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 prepare

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from arch/arm64/kernel/asm-offsets.c:15:
   In file included from include/linux/kvm_host.h:45:
   In file included from arch/arm64/include/asm/kvm_host.h:36:
   In file included from include/kvm/arm_pmu.h:11:
>> arch/arm64/include/asm/perf_event.h:280:20: warning: function 'has_branch_stack' has internal linkage but is not defined [-Wundefined-internal]
   static inline bool has_branch_stack(struct perf_event *event);
                      ^
   arch/arm64/include/asm/perf_event.h:284:16: note: used here
           WARN_ON_ONCE(!has_branch_stack(event));
                         ^
   1 warning generated.
--
   In file included from arch/arm64/kernel/asm-offsets.c:15:
   In file included from include/linux/kvm_host.h:45:
   In file included from arch/arm64/include/asm/kvm_host.h:36:
   In file included from include/kvm/arm_pmu.h:11:
>> arch/arm64/include/asm/perf_event.h:280:20: warning: function 'has_branch_stack' has internal linkage but is not defined [-Wundefined-internal]
   static inline bool has_branch_stack(struct perf_event *event);
                      ^
   arch/arm64/include/asm/perf_event.h:284:16: note: used here
           WARN_ON_ONCE(!has_branch_stack(event));
                         ^
   1 warning generated.


vim +/has_branch_stack +280 arch/arm64/include/asm/perf_event.h

   279	
 > 280	static inline bool has_branch_stack(struct perf_event *event);
   281
  
Will Deacon Feb. 3, 2023, 11:31 a.m. UTC | #2
On Tue, Jan 31, 2023 at 12:28:17AM +0800, kernel test robot wrote:
> Hi Anshuman,
> 
> Thank you for the patch! Perhaps something to improve:
> 
> [auto build test WARNING on arm64/for-next/core]
> [also build test WARNING on tip/perf/core acme/perf/core linus/master v6.2-rc6 next-20230130]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
> 
> url:    https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230123-210254
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
> patch link:    https://lore.kernel.org/r/20230123125956.1350336-6-anshuman.khandual%40arm.com
> patch subject: [PATCH V8 5/6] arm64/perf: Add branch stack support in ARMV8 PMU
> config: arm64-randconfig-r016-20230130 (https://download.01.org/0day-ci/archive/20230131/202301310047.b5iv9hM8-lkp@intel.com/config)
> compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 4196ca3278f78c6e19246e54ab0ecb364e37d66a)
> reproduce (this is a W=1 build):
>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # install arm64 cross compiling tool for clang build
>         # apt-get install binutils-aarch64-linux-gnu
>         # https://github.com/intel-lab-lkp/linux/commit/0deba04ac45f8632b8579cb5cbf908b9f4428402
>         git remote add linux-review https://github.com/intel-lab-lkp/linux
>         git fetch --no-tags linux-review Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230123-210254
>         git checkout 0deba04ac45f8632b8579cb5cbf908b9f4428402
>         # save the config file
>         mkdir build_dir && cp config build_dir/.config
>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 olddefconfig
>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 prepare
> 
> If you fix the issue, kindly add following tag where applicable
> | Reported-by: kernel test robot <lkp@intel.com>
> 
> All warnings (new ones prefixed by >>):
> 
>    In file included from arch/arm64/kernel/asm-offsets.c:15:
>    In file included from include/linux/kvm_host.h:45:
>    In file included from arch/arm64/include/asm/kvm_host.h:36:
>    In file included from include/kvm/arm_pmu.h:11:
> >> arch/arm64/include/asm/perf_event.h:280:20: warning: function 'has_branch_stack' has internal linkage but is not defined [-Wundefined-internal]
>    static inline bool has_branch_stack(struct perf_event *event);
>                       ^
>    arch/arm64/include/asm/perf_event.h:284:16: note: used here
>            WARN_ON_ONCE(!has_branch_stack(event));
>                          ^
>    1 warning generated.
> --
>    In file included from arch/arm64/kernel/asm-offsets.c:15:
>    In file included from include/linux/kvm_host.h:45:
>    In file included from arch/arm64/include/asm/kvm_host.h:36:
>    In file included from include/kvm/arm_pmu.h:11:
> >> arch/arm64/include/asm/perf_event.h:280:20: warning: function 'has_branch_stack' has internal linkage but is not defined [-Wundefined-internal]
>    static inline bool has_branch_stack(struct perf_event *event);
>                       ^
>    arch/arm64/include/asm/perf_event.h:284:16: note: used here
>            WARN_ON_ONCE(!has_branch_stack(event));
>                          ^
>    1 warning generated.

This looks like it should be fixed. I'd also like to see Mark's ack on the
final series, since he had some detailed comments on the previous version.

Will
  
Anshuman Khandual Feb. 6, 2023, 4:16 a.m. UTC | #3
On 2/3/23 17:01, Will Deacon wrote:
> On Tue, Jan 31, 2023 at 12:28:17AM +0800, kernel test robot wrote:
>> Hi Anshuman,
>>
>> Thank you for the patch! Perhaps something to improve:
>>
>> [auto build test WARNING on arm64/for-next/core]
>> [also build test WARNING on tip/perf/core acme/perf/core linus/master v6.2-rc6 next-20230130]
>> [If your patch is applied to the wrong git tree, kindly drop us a note.
>> And when submitting patch, we suggest to use '--base' as documented in
>> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>>
>> url:    https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230123-210254
>> base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
>> patch link:    https://lore.kernel.org/r/20230123125956.1350336-6-anshuman.khandual%40arm.com
>> patch subject: [PATCH V8 5/6] arm64/perf: Add branch stack support in ARMV8 PMU
>> config: arm64-randconfig-r016-20230130 (https://download.01.org/0day-ci/archive/20230131/202301310047.b5iv9hM8-lkp@intel.com/config)
>> compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 4196ca3278f78c6e19246e54ab0ecb364e37d66a)
>> reproduce (this is a W=1 build):
>>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>>         chmod +x ~/bin/make.cross
>>         # install arm64 cross compiling tool for clang build
>>         # apt-get install binutils-aarch64-linux-gnu
>>         # https://github.com/intel-lab-lkp/linux/commit/0deba04ac45f8632b8579cb5cbf908b9f4428402
>>         git remote add linux-review https://github.com/intel-lab-lkp/linux
>>         git fetch --no-tags linux-review Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230123-210254
>>         git checkout 0deba04ac45f8632b8579cb5cbf908b9f4428402
>>         # save the config file
>>         mkdir build_dir && cp config build_dir/.config
>>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 olddefconfig
>>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 prepare
>>
>> If you fix the issue, kindly add following tag where applicable
>> | Reported-by: kernel test robot <lkp@intel.com>
>>
>> All warnings (new ones prefixed by >>):
>>
>>    In file included from arch/arm64/kernel/asm-offsets.c:15:
>>    In file included from include/linux/kvm_host.h:45:
>>    In file included from arch/arm64/include/asm/kvm_host.h:36:
>>    In file included from include/kvm/arm_pmu.h:11:
>>>> arch/arm64/include/asm/perf_event.h:280:20: warning: function 'has_branch_stack' has internal linkage but is not defined [-Wundefined-internal]
>>    static inline bool has_branch_stack(struct perf_event *event);
>>                       ^
>>    arch/arm64/include/asm/perf_event.h:284:16: note: used here
>>            WARN_ON_ONCE(!has_branch_stack(event));
>>                          ^
>>    1 warning generated.
>> --
>>    In file included from arch/arm64/kernel/asm-offsets.c:15:
>>    In file included from include/linux/kvm_host.h:45:
>>    In file included from arch/arm64/include/asm/kvm_host.h:36:
>>    In file included from include/kvm/arm_pmu.h:11:
>>>> arch/arm64/include/asm/perf_event.h:280:20: warning: function 'has_branch_stack' has internal linkage but is not defined [-Wundefined-internal]
>>    static inline bool has_branch_stack(struct perf_event *event);
>>                       ^
>>    arch/arm64/include/asm/perf_event.h:284:16: note: used here
>>            WARN_ON_ONCE(!has_branch_stack(event));
>>                          ^
>>    1 warning generated.
> 
> This looks like it should be fixed. I'd also like to see Mark's ack on the

This build warning is triggered when CONFIG_PERF_EVENTS is not enabled, when
all the fallback stubs in there try to use has_branch_stack() which does not
get defined. The following change, fixes the problem.

diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 97db96326e13..1f68c125eb1a 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -277,8 +277,6 @@ struct pmu_hw_events;
 struct arm_pmu;
 struct perf_event;
 
-static inline bool has_branch_stack(struct perf_event *event);
-
 #ifdef CONFIG_ARM64_BRBE
 void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event);
 bool armv8pmu_branch_valid(struct perf_event *event);
@@ -289,6 +287,9 @@ void armv8pmu_branch_reset(void);
 int armv8pmu_private_alloc(struct arm_pmu *arm_pmu);
 void armv8pmu_private_free(struct arm_pmu *arm_pmu);
 #else
+#ifdef CONFIG_PERF_EVENTS
+static inline bool has_branch_stack(struct perf_event *event);
+
 static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event)
 {
        WARN_ON_ONCE(!has_branch_stack(event));
@@ -316,3 +317,4 @@ static inline int armv8pmu_private_alloc(struct arm_pmu *arm_pmu) { return 0; }
 static inline void armv8pmu_private_free(struct arm_pmu *arm_pmu) { }
 #endif
 #endif
+#endif

> final series, since he had some detailed comments on the previous version.
> 
> Will
  
Mark Rutland Feb. 8, 2023, 8:06 p.m. UTC | #4
On Mon, Feb 06, 2023 at 09:46:25AM +0530, Anshuman Khandual wrote:
> On 2/3/23 17:01, Will Deacon wrote:
> > On Tue, Jan 31, 2023 at 12:28:17AM +0800, kernel test robot wrote:

[...]

> > This looks like it should be fixed. I'd also like to see Mark's ack on the
> 
> This build warning is triggered when CONFIG_PERF_EVENTS is not enabled, when
> all the fallback stubs in there try to use has_branch_stack() which does not
> get defined. The following change, fixes the problem.

I expect that you'll post a v9 with that folded in, and I've just made some
comments on v7 which I expect will require some changes, so I'm going to wait
for v9 for further review to keep this manageable.

Thanks,
Mark.
  
Anshuman Khandual Feb. 9, 2023, 2:48 a.m. UTC | #5
On 2/9/23 01:36, Mark Rutland wrote:
> On Mon, Feb 06, 2023 at 09:46:25AM +0530, Anshuman Khandual wrote:
>> On 2/3/23 17:01, Will Deacon wrote:
>>> On Tue, Jan 31, 2023 at 12:28:17AM +0800, kernel test robot wrote:
> 
> [...]
> 
>>> This looks like it should be fixed. I'd also like to see Mark's ack on the
>>
>> This build warning is triggered when CONFIG_PERF_EVENTS is not enabled, when
>> all the fallback stubs in there try to use has_branch_stack() which does not
>> get defined. The following change, fixes the problem.
> 
> I expect that you'll post a v9 with that folded in, and I've just made some
> comments on v7 which I expect will require some changes, so I'm going to wait
> for v9 for further review to keep this manageable.

Sure, will work on a V9 and post when ready.
  

Patch

diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 3eaf462f5752..83951fdeccf3 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -273,4 +273,35 @@  extern unsigned long perf_misc_flags(struct pt_regs *regs);
 	(regs)->pstate = PSR_MODE_EL1h;	\
 }
 
+struct pmu_hw_events;
+struct arm_pmu;
+struct perf_event;
+
+static inline bool has_branch_stack(struct perf_event *event);
+
+static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event)
+{
+	WARN_ON_ONCE(!has_branch_stack(event));
+}
+
+static inline bool armv8pmu_branch_valid(struct perf_event *event)
+{
+	WARN_ON_ONCE(!has_branch_stack(event));
+	return false;
+}
+
+static inline void armv8pmu_branch_enable(struct perf_event *event)
+{
+	WARN_ON_ONCE(!has_branch_stack(event));
+}
+
+static inline void armv8pmu_branch_disable(struct perf_event *event)
+{
+	WARN_ON_ONCE(!has_branch_stack(event));
+}
+
+static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) { }
+static inline void armv8pmu_branch_reset(void) { }
+static inline int armv8pmu_private_alloc(struct arm_pmu *arm_pmu) { return 0; }
+static inline void armv8pmu_private_free(struct arm_pmu *arm_pmu) { }
 #endif
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index a5193f2146a6..f0689c84530b 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -769,38 +769,21 @@  static void armv8pmu_enable_event(struct perf_event *event)
 	 * Enable counter and interrupt, and set the counter to count
 	 * the event that we're interested in.
 	 */
-
-	/*
-	 * Disable counter
-	 */
 	armv8pmu_disable_event_counter(event);
-
-	/*
-	 * Set event.
-	 */
 	armv8pmu_write_event_type(event);
-
-	/*
-	 * Enable interrupt for this counter
-	 */
 	armv8pmu_enable_event_irq(event);
-
-	/*
-	 * Enable counter
-	 */
 	armv8pmu_enable_event_counter(event);
+
+	if (has_branch_stack(event))
+		armv8pmu_branch_enable(event);
 }
 
 static void armv8pmu_disable_event(struct perf_event *event)
 {
-	/*
-	 * Disable counter
-	 */
-	armv8pmu_disable_event_counter(event);
+	if (has_branch_stack(event))
+		armv8pmu_branch_disable(event);
 
-	/*
-	 * Disable interrupt for this counter
-	 */
+	armv8pmu_disable_event_counter(event);
 	armv8pmu_disable_event_irq(event);
 }
 
@@ -878,6 +861,13 @@  static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
 		if (!armpmu_event_set_period(event))
 			continue;
 
+		if (has_branch_stack(event)) {
+			WARN_ON(!cpuc->branches);
+			armv8pmu_branch_read(cpuc, event);
+			data.br_stack = &cpuc->branches->branch_stack;
+			data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
+		}
+
 		/*
 		 * Perf event overflow will queue the processing of the event as
 		 * an irq_work which will be taken care of in the handling of
@@ -976,6 +966,14 @@  static int armv8pmu_user_event_idx(struct perf_event *event)
 	return event->hw.idx;
 }
 
+static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu);
+
+	if (sched_in && arm_pmu_branch_stack_supported(armpmu))
+		armv8pmu_branch_reset();
+}
+
 /*
  * Add an event filter to a given event.
  */
@@ -1052,6 +1050,9 @@  static void armv8pmu_reset(void *info)
 		pmcr |= ARMV8_PMU_PMCR_LP;
 
 	armv8pmu_pmcr_write(pmcr);
+
+	if (arm_pmu_branch_stack_supported(cpu_pmu))
+		armv8pmu_branch_reset();
 }
 
 static int __armv8_pmuv3_map_event(struct perf_event *event,
@@ -1069,6 +1070,9 @@  static int __armv8_pmuv3_map_event(struct perf_event *event,
 				       &armv8_pmuv3_perf_cache_map,
 				       ARMV8_PMU_EVTYPE_EVENT);
 
+	if (has_branch_stack(event) && !armv8pmu_branch_valid(event))
+		return -EOPNOTSUPP;
+
 	if (armv8pmu_event_is_64bit(event))
 		event->hw.flags |= ARMPMU_EVT_64BIT;
 
@@ -1181,6 +1185,21 @@  static void __armv8pmu_probe_pmu(void *info)
 		cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1);
 	else
 		cpu_pmu->reg_pmmir = 0;
+	armv8pmu_branch_probe(cpu_pmu);
+}
+
+static int branch_records_alloc(struct arm_pmu *armpmu)
+{
+	struct pmu_hw_events *events;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		events = per_cpu_ptr(armpmu->hw_events, cpu);
+		events->branches = kzalloc(sizeof(struct branch_records), GFP_KERNEL);
+		if (!events->branches)
+			return -ENOMEM;
+	}
+	return 0;
 }
 
 static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
@@ -1191,12 +1210,24 @@  static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
 	};
 	int ret;
 
+	ret = armv8pmu_private_alloc(cpu_pmu);
+	if (ret)
+		return ret;
+
 	ret = smp_call_function_any(&cpu_pmu->supported_cpus,
 				    __armv8pmu_probe_pmu,
 				    &probe, 1);
 	if (ret)
 		return ret;
 
+	if (arm_pmu_branch_stack_supported(cpu_pmu)) {
+		ret = branch_records_alloc(cpu_pmu);
+		if (ret)
+			return ret;
+	} else {
+		armv8pmu_private_free(cpu_pmu);
+	}
+
 	return probe.present ? 0 : -ENODEV;
 }
 
@@ -1261,6 +1292,7 @@  static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
 	cpu_pmu->filter			= armv8pmu_filter;
 
 	cpu_pmu->pmu.event_idx		= armv8pmu_user_event_idx;
+	cpu_pmu->sched_task		= armv8pmu_sched_task;
 
 	cpu_pmu->name			= name;
 	cpu_pmu->map_event		= map_event;