[V5,6/7] arm64/perf: Add BRBE driver

Message ID 20221107062514.2851047-7-anshuman.khandual@arm.com
State New
Headers
Series arm64/perf: Enable branch stack sampling |

Commit Message

Anshuman Khandual Nov. 7, 2022, 6:25 a.m. UTC
  This adds a BRBE driver which implements all the required helper functions
for struct arm_pmu. Following functions are defined by this driver which
will configure, enable, capture, reset and disable BRBE buffer HW as and
when requested via perf branch stack sampling framework.

- arm64_pmu_brbe_filter()
- arm64_pmu_brbe_enable()
- arm64_pmu_brbe_disable()
- arm64_pmu_brbe_read()
- arm64_pmu_brbe_probe()
- arm64_pmu_brbe_reset()
- arm64_pmu_brbe_supported()

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-perf-users@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
 arch/arm64/kernel/perf_event.c |   8 +-
 drivers/perf/Kconfig           |  11 +
 drivers/perf/Makefile          |   1 +
 drivers/perf/arm_pmu_brbe.c    | 441 +++++++++++++++++++++++++++++++++
 drivers/perf/arm_pmu_brbe.h    | 259 +++++++++++++++++++
 include/linux/perf/arm_pmu.h   |  20 ++
 6 files changed, 739 insertions(+), 1 deletion(-)
 create mode 100644 drivers/perf/arm_pmu_brbe.c
 create mode 100644 drivers/perf/arm_pmu_brbe.h
  

Comments

Anshuman Khandual Nov. 9, 2022, 3:08 a.m. UTC | #1
On 11/7/22 11:55, Anshuman Khandual wrote:
> +void arm64_pmu_brbe_read(struct pmu_hw_events *cpuc, struct perf_event *event)
> +{
> +	u64 brbinf;
> +	int idx;
> +
> +	if (brbe_disabled(cpuc))
> +		return;
> +
> +	set_brbe_paused();
> +	for (idx = 0; idx < cpuc->brbe_nr; idx++) {
> +		select_brbe_bank_index(idx);
> +		brbinf = get_brbinf_reg(idx);
> +		/*
> +		 * There are no valid entries anymore on the buffer.
> +		 * Abort the branch record processing to save some
> +		 * cycles and also reduce the capture/process load
> +		 * for the user space as well.
> +		 */
> +		if (brbe_invalid(brbinf))
> +			break;
> +
> +		if (brbe_valid(brbinf)) {
> +			cpuc->branches->brbe_entries[idx].from =  get_brbsrc_reg(idx);
> +			cpuc->branches->brbe_entries[idx].to =  get_brbtgt_reg(idx);
> +		} else if (brbe_source(brbinf)) {
> +			cpuc->branches->brbe_entries[idx].from =  get_brbsrc_reg(idx);
> +			cpuc->branches->brbe_entries[idx].to = 0;
> +		} else if (brbe_target(brbinf)) {
> +			cpuc->branches->brbe_entries[idx].from = 0;
> +			cpuc->branches->brbe_entries[idx].to =  get_brbtgt_reg(idx);
> +		}
> +		capture_brbe_flags(cpuc, event, brbinf, idx);
> +	}
> +	cpuc->branches->brbe_stack.nr = idx;
> +	cpuc->branches->brbe_stack.hw_idx = -1ULL;
> +	process_branch_aborts(cpuc);
> +}

The following additional changes are required to ensure that BRBE remains "un-paused" after
processing the branch records inside PMU interrupt handler. Without this change, there will
PMU interrupts without valid branch records, reducing branch stack sample collection during
given workload execution. I will fold this into the BRBE driver.

diff --git a/drivers/perf/arm_pmu_brbe.c b/drivers/perf/arm_pmu_brbe.c
index ce1aa4171481..c8154ddd341d 100644
--- a/drivers/perf/arm_pmu_brbe.c
+++ b/drivers/perf/arm_pmu_brbe.c
@@ -429,6 +429,7 @@ void arm64_pmu_brbe_read(struct pmu_hw_events *cpuc, struct perf_event *event)
        cpuc->branches->brbe_stack.nr = idx;
        cpuc->branches->brbe_stack.hw_idx = -1ULL;
        process_branch_aborts(cpuc);
+       clr_brbe_paused();
 }
 
 void arm64_pmu_brbe_reset(struct pmu_hw_events *cpuc)
diff --git a/drivers/perf/arm_pmu_brbe.h b/drivers/perf/arm_pmu_brbe.h
index 22c4b25b1777..33da6fc9aefa 100644
--- a/drivers/perf/arm_pmu_brbe.h
+++ b/drivers/perf/arm_pmu_brbe.h
@@ -257,3 +257,11 @@ static inline void set_brbe_paused(void)
        write_sysreg_s(brbfcr | BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1);
        isb();
 }
+
+static inline void clr_brbe_paused(void)
+{
+       u64 brbfcr = read_sysreg_s(SYS_BRBFCR_EL1);
+
+       write_sysreg_s(brbfcr & ~BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1);
+       isb();
+}

For example:

./perf record -j any,u,k,save_type ls
./perf report -D | grep branch

Before this change -

# cat out | grep "branch stack: nr:64" | wc -l
4
# cat out | grep "branch stack: nr:0" | wc -l
57
# perf report -D | grep branch

... branch stack: nr:64
... branch stack: nr:0
... branch stack: nr:64
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:64
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:64
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0
... branch stack: nr:0

But after this change -

$ cat out | grep "branch stack: nr:64" | wc -l
107
# cat out | grep "branch stack: nr:0" | wc -l
0
$ perf report -D | grep branch

.......................

... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64
... branch stack: nr:64

.......................

- Anshuman
  
James Clark Nov. 16, 2022, 4:42 p.m. UTC | #2
On 07/11/2022 06:25, Anshuman Khandual wrote:
[...]

> +static void perf_branch_to_brbcr(struct pmu_hw_events *cpuc, int branch_type)
> +{
> +	cpuc->brbcr = (BRBCR_EL1_CC | BRBCR_EL1_MPRED);
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_USER)
> +		cpuc->brbcr |= BRBCR_EL1_E0BRE;
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_NO_CYCLES)
> +		cpuc->brbcr &= ~BRBCR_EL1_CC;
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_NO_FLAGS)
> +		cpuc->brbcr &= ~BRBCR_EL1_MPRED;
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_KERNEL)
> +		cpuc->brbcr |= BRBCR_EL1_E1BRE;
> +	else
> +		return;
> +
> +	/*
> +	 * The exception and exception return branches could be
> +	 * captured only when the event has necessary privilege
> +	 * indicated via branch type PERF_SAMPLE_BRANCH_KERNEL,
> +	 * which has been ascertained in generic perf. Please
> +	 * refer perf_copy_attr() for more details.
> +	 */
> +	if (branch_type & PERF_SAMPLE_BRANCH_ANY) {
> +		cpuc->brbcr |= BRBCR_EL1_EXCEPTION;
> +		cpuc->brbcr |= BRBCR_EL1_ERTN;

Because this comes after the PERF_SAMPLE_BRANCH_KERNEL check, it's
impossible to get syscall records from userspace. When you enable kernel
branch records, the buffer always fills up before it gets to userspace.

Can you move this to the top so that it can be set if either
PERF_SAMPLE_BRANCH_USER or PERF_SAMPLE_BRANCH_KERNEL is set. The
hardware already handles the security by giving partial records with the
kernel part zeroed out so I don't think the driver needs to add any
additional rules other than setting BRBCR_EL1_E1BRE or BRBCR_EL1_E0BRE.

For example I moved it to the top, removed the return below and then I
get syscall partial records:

....  5: 0000000000745d0c -> 0000000000000000 0 cycles  P   9fbfbfbf SYSCALL

I also get ERETS but with only the userspace part set:

.....  4: 0000000000000000 -> 0000000000745d10 0 cycles  P   9fbfbfbf ERET

> +		return;
> +	}
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL)
> +		cpuc->brbcr |= BRBCR_EL1_EXCEPTION;
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
> +		cpuc->brbcr |= BRBCR_EL1_ERTN;
> +}
> +
> +
> +void arm64_pmu_brbe_filter(struct pmu_hw_events *cpuc, struct perf_event *event)
> +{
> +	u64 branch_type = event->attr.branch_sample_type;
> +
> +	if (brbe_disabled(cpuc))
> +		return;
> +
> +	perf_branch_to_brbfcr(cpuc, branch_type);
> +	perf_branch_to_brbcr(cpuc, branch_type);
> +}
> +
> +static int brbe_fetch_perf_type(u64 brbinf, bool *new_branch_type)
> +{
> +	int brbe_type = brbe_fetch_type(brbinf);
> +	*new_branch_type = false;
> +
> +	switch (brbe_type) {
> +	case BRBINF_EL1_TYPE_UNCOND_DIR:
> +		return PERF_BR_UNCOND;
> +	case BRBINF_EL1_TYPE_INDIR:
> +		return PERF_BR_IND;
> +	case BRBINF_EL1_TYPE_DIR_LINK:
> +		return PERF_BR_CALL;
> +	case BRBINF_EL1_TYPE_INDIR_LINK:
> +		return PERF_BR_IND_CALL;
> +	case BRBINF_EL1_TYPE_RET_SUB:
> +		return PERF_BR_RET;
> +	case BRBINF_EL1_TYPE_COND_DIR:
> +		return PERF_BR_COND;
> +	case BRBINF_EL1_TYPE_CALL:
> +		return PERF_BR_CALL;
> +	case BRBINF_EL1_TYPE_TRAP:
> +		return PERF_BR_SYSCALL;
> +	case BRBINF_EL1_TYPE_RET_EXCPT:
> +		return PERF_BR_ERET;
> +	case BRBINF_EL1_TYPE_IRQ:
> +		return PERF_BR_IRQ;
> +	case BRBINF_EL1_TYPE_DEBUG_HALT:
> +		*new_branch_type = true;
> +		return PERF_BR_ARM64_DEBUG_HALT;
> +	case BRBINF_EL1_TYPE_SERROR:
> +		return PERF_BR_SERROR;
> +	case BRBINF_EL1_TYPE_INST_DEBUG:
> +		*new_branch_type = true;
> +		return PERF_BR_ARM64_DEBUG_INST;
> +	case BRBINF_EL1_TYPE_DATA_DEBUG:
> +		*new_branch_type = true;
> +		return PERF_BR_ARM64_DEBUG_DATA;
> +	case BRBINF_EL1_TYPE_ALGN_FAULT:
> +		*new_branch_type = true;
> +		return PERF_BR_NEW_FAULT_ALGN;
> +	case BRBINF_EL1_TYPE_INST_FAULT:
> +		*new_branch_type = true;
> +		return PERF_BR_NEW_FAULT_INST;
> +	case BRBINF_EL1_TYPE_DATA_FAULT:
> +		*new_branch_type = true;
> +		return PERF_BR_NEW_FAULT_DATA;
> +	case BRBINF_EL1_TYPE_FIQ:
> +		*new_branch_type = true;
> +		return PERF_BR_ARM64_FIQ;
> +	case BRBINF_EL1_TYPE_DEBUG_EXIT:
> +		*new_branch_type = true;
> +		return PERF_BR_ARM64_DEBUG_EXIT;
> +	default:
> +		pr_warn("unknown branch type captured\n");
> +		return PERF_BR_UNKNOWN;
> +	}
> +}
> +
> +static int brbe_fetch_perf_priv(u64 brbinf)
> +{
> +	int brbe_el = brbe_fetch_el(brbinf);
> +
> +	switch (brbe_el) {
> +	case BRBINF_EL1_EL_EL0:
> +		return PERF_BR_PRIV_USER;
> +	case BRBINF_EL1_EL_EL1:
> +		return PERF_BR_PRIV_KERNEL;
> +	case BRBINF_EL1_EL_EL2:
> +		if (is_kernel_in_hyp_mode())
> +			return PERF_BR_PRIV_KERNEL;
> +		return PERF_BR_PRIV_HV;
> +	default:
> +		pr_warn("unknown branch privilege captured\n");
> +		return PERF_BR_PRIV_UNKNOWN;
> +       }
> +}
> +
> +static void capture_brbe_flags(struct pmu_hw_events *cpuc, struct perf_event *event,
> +			       u64 brbinf, int idx)
> +{
> +	int branch_type, type = brbe_record_valid(brbinf);
> +	bool new_branch_type;
> +
> +	if (!branch_sample_no_cycles(event))
> +		cpuc->branches->brbe_entries[idx].cycles = brbe_fetch_cycles(brbinf);
> +
> +	if (branch_sample_type(event)) {
> +		branch_type = brbe_fetch_perf_type(brbinf, &new_branch_type);
> +		if (new_branch_type) {
> +			cpuc->branches->brbe_entries[idx].type = PERF_BR_EXTEND_ABI;
> +			cpuc->branches->brbe_entries[idx].new_type = branch_type;
> +		} else {
> +			cpuc->branches->brbe_entries[idx].type = branch_type;
> +		}
> +	}
> +
> +	if (!branch_sample_no_flags(event)) {
> +		/*
> +		 * BRBINF_LASTFAILED does not indicate that the last transaction
> +		 * got failed or aborted during the current branch record itself.
> +		 * Rather, this indicates that all the branch records which were
> +		 * in transaction until the curret branch record have failed. So
> +		 * the entire BRBE buffer needs to be processed later on to find
> +		 * all branch records which might have failed.
> +		 */
> +		cpuc->branches->brbe_entries[idx].abort = brbinf & BRBINF_EL1_LASTFAILED;
> +
> +		/*
> +		 * All these information (i.e transaction state and mispredicts)
> +		 * are not available for target only branch records.
> +		 */
> +		if (type != BRBINF_EL1_VALID_TARGET) {
> +			cpuc->branches->brbe_entries[idx].mispred = brbinf & BRBINF_EL1_MPRED;
> +			cpuc->branches->brbe_entries[idx].predicted = !(brbinf & BRBINF_EL1_MPRED);
> +			cpuc->branches->brbe_entries[idx].in_tx = brbinf & BRBINF_EL1_T;
> +		}
> +	}
> +
> +	if (branch_sample_priv(event)) {
> +		/*
> +		 * All these information (i.e branch privilege level) are not
> +		 * available for source only branch records.
> +		 */
> +		if (type != BRBINF_EL1_VALID_SOURCE)
> +			cpuc->branches->brbe_entries[idx].priv = brbe_fetch_perf_priv(brbinf);
> +	}
> +}
> +
> +/*
> + * A branch record with BRBINF_EL1.LASTFAILED set, implies that all
> + * preceding consecutive branch records, that were in a transaction
> + * (i.e their BRBINF_EL1.TX set) have been aborted.
> + *
> + * Similarly BRBFCR_EL1.LASTFAILED set, indicate that all preceding
> + * consecutive branch records upto the last record, which were in a
> + * transaction (i.e their BRBINF_EL1.TX set) have been aborted.
> + *
> + * --------------------------------- -------------------
> + * | 00 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX success]
> + * --------------------------------- -------------------
> + * | 01 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX success]
> + * --------------------------------- -------------------
> + * | 02 | BRBSRC | BRBTGT | BRBINF | | TX = 0 | LF = 0 |
> + * --------------------------------- -------------------
> + * | 03 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed]
> + * --------------------------------- -------------------
> + * | 04 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed]
> + * --------------------------------- -------------------
> + * | 05 | BRBSRC | BRBTGT | BRBINF | | TX = 0 | LF = 1 |
> + * --------------------------------- -------------------
> + * | .. | BRBSRC | BRBTGT | BRBINF | | TX = 0 | LF = 0 |
> + * --------------------------------- -------------------
> + * | 61 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed]
> + * --------------------------------- -------------------
> + * | 62 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed]
> + * --------------------------------- -------------------
> + * | 63 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed]
> + * --------------------------------- -------------------
> + *
> + * BRBFCR_EL1.LASTFAILED == 1
> + *
> + * Here BRBFCR_EL1.LASTFAILED failes all those consecutive and also
> + * in transaction branches near the end of the BRBE buffer.
> + */
> +static void process_branch_aborts(struct pmu_hw_events *cpuc)
> +{
> +	u64 brbfcr = read_sysreg_s(SYS_BRBFCR_EL1);
> +	bool lastfailed = !!(brbfcr & BRBFCR_EL1_LASTFAILED);
> +	int idx = cpuc->brbe_nr - 1;
> +
> +	do {
> +		if (cpuc->branches->brbe_entries[idx].in_tx) {
> +			cpuc->branches->brbe_entries[idx].abort = lastfailed;
> +		} else {
> +			lastfailed = cpuc->branches->brbe_entries[idx].abort;
> +			cpuc->branches->brbe_entries[idx].abort = false;
> +		}
> +	} while (idx--, idx >= 0);
> +}
> +
> +void arm64_pmu_brbe_read(struct pmu_hw_events *cpuc, struct perf_event *event)
> +{
> +	u64 brbinf;
> +	int idx;
> +
> +	if (brbe_disabled(cpuc))
> +		return;
> +
> +	set_brbe_paused();
> +	for (idx = 0; idx < cpuc->brbe_nr; idx++) {
> +		select_brbe_bank_index(idx);
> +		brbinf = get_brbinf_reg(idx);
> +		/*
> +		 * There are no valid entries anymore on the buffer.
> +		 * Abort the branch record processing to save some
> +		 * cycles and also reduce the capture/process load
> +		 * for the user space as well.
> +		 */
> +		if (brbe_invalid(brbinf))
> +			break;
> +
> +		if (brbe_valid(brbinf)) {
> +			cpuc->branches->brbe_entries[idx].from =  get_brbsrc_reg(idx);
> +			cpuc->branches->brbe_entries[idx].to =  get_brbtgt_reg(idx);
> +		} else if (brbe_source(brbinf)) {
> +			cpuc->branches->brbe_entries[idx].from =  get_brbsrc_reg(idx);
> +			cpuc->branches->brbe_entries[idx].to = 0;
> +		} else if (brbe_target(brbinf)) {
> +			cpuc->branches->brbe_entries[idx].from = 0;
> +			cpuc->branches->brbe_entries[idx].to =  get_brbtgt_reg(idx);
> +		}
> +		capture_brbe_flags(cpuc, event, brbinf, idx);
> +	}
> +	cpuc->branches->brbe_stack.nr = idx;
> +	cpuc->branches->brbe_stack.hw_idx = -1ULL;
> +	process_branch_aborts(cpuc);
> +}
> +
> +void arm64_pmu_brbe_reset(struct pmu_hw_events *cpuc)
> +{
> +	if (brbe_disabled(cpuc))
> +		return;
> +
> +	asm volatile(BRB_IALL);
> +	isb();
> +}
> diff --git a/drivers/perf/arm_pmu_brbe.h b/drivers/perf/arm_pmu_brbe.h
> new file mode 100644
> index 000000000000..22c4b25b1777
> --- /dev/null
> +++ b/drivers/perf/arm_pmu_brbe.h
> @@ -0,0 +1,259 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Branch Record Buffer Extension Helpers.
> + *
> + * Copyright (C) 2021 ARM Limited
> + *
> + * Author: Anshuman Khandual <anshuman.khandual@arm.com>
> + */
> +#define pr_fmt(fmt) "brbe: " fmt
> +
> +#include <linux/perf/arm_pmu.h>
> +
> +/*
> + * BRBE Instructions
> + *
> + * BRB_IALL : Invalidate the entire buffer
> + * BRB_INJ  : Inject latest branch record derived from [BRBSRCINJ, BRBTGTINJ, BRBINFINJ]
> + */
> +#define BRB_IALL __emit_inst(0xD5000000 | sys_insn(1, 1, 7, 2, 4) | (0x1f))
> +#define BRB_INJ  __emit_inst(0xD5000000 | sys_insn(1, 1, 7, 2, 5) | (0x1f))
> +
> +/*
> + * BRBE Buffer Organization
> + *
> + * BRBE buffer is arranged as multiple banks of 32 branch record
> + * entries each. An indivdial branch record in a given bank could
> + * be accessedi, after selecting the bank in BRBFCR_EL1.BANK and
> + * accessing the registers i.e [BRBSRC, BRBTGT, BRBINF] set with
> + * indices [0..31].
> + *
> + * Bank 0
> + *
> + *	---------------------------------	------
> + *	| 00 | BRBSRC | BRBTGT | BRBINF |	| 00 |
> + *	---------------------------------	------
> + *	| 01 | BRBSRC | BRBTGT | BRBINF |	| 01 |
> + *	---------------------------------	------
> + *	| .. | BRBSRC | BRBTGT | BRBINF |	| .. |
> + *	---------------------------------	------
> + *	| 31 | BRBSRC | BRBTGT | BRBINF |	| 31 |
> + *	---------------------------------	------
> + *
> + * Bank 1
> + *
> + *	---------------------------------	------
> + *	| 32 | BRBSRC | BRBTGT | BRBINF |	| 00 |
> + *	---------------------------------	------
> + *	| 33 | BRBSRC | BRBTGT | BRBINF |	| 01 |
> + *	---------------------------------	------
> + *	| .. | BRBSRC | BRBTGT | BRBINF |	| .. |
> + *	---------------------------------	------
> + *	| 63 | BRBSRC | BRBTGT | BRBINF |	| 31 |
> + *	---------------------------------	------
> + */
> +#define BRBE_BANK0_IDX_MIN 0
> +#define BRBE_BANK0_IDX_MAX 31
> +#define BRBE_BANK1_IDX_MIN 32
> +#define BRBE_BANK1_IDX_MAX 63
> +
> +#define RETURN_READ_BRBSRCN(n) \
> +	read_sysreg_s(SYS_BRBSRC##n##_EL1)
> +
> +#define RETURN_READ_BRBTGTN(n) \
> +	read_sysreg_s(SYS_BRBTGT##n##_EL1)
> +
> +#define RETURN_READ_BRBINFN(n) \
> +	read_sysreg_s(SYS_BRBINF##n##_EL1)
> +
> +#define BRBE_REGN_CASE(n, case_macro) \
> +	case n: return case_macro(n); break
> +
> +#define BRBE_REGN_SWITCH(x, case_macro)				\
> +	do {							\
> +		switch (x) {					\
> +		BRBE_REGN_CASE(0, case_macro);			\
> +		BRBE_REGN_CASE(1, case_macro);			\
> +		BRBE_REGN_CASE(2, case_macro);			\
> +		BRBE_REGN_CASE(3, case_macro);			\
> +		BRBE_REGN_CASE(4, case_macro);			\
> +		BRBE_REGN_CASE(5, case_macro);			\
> +		BRBE_REGN_CASE(6, case_macro);			\
> +		BRBE_REGN_CASE(7, case_macro);			\
> +		BRBE_REGN_CASE(8, case_macro);			\
> +		BRBE_REGN_CASE(9, case_macro);			\
> +		BRBE_REGN_CASE(10, case_macro);			\
> +		BRBE_REGN_CASE(11, case_macro);			\
> +		BRBE_REGN_CASE(12, case_macro);			\
> +		BRBE_REGN_CASE(13, case_macro);			\
> +		BRBE_REGN_CASE(14, case_macro);			\
> +		BRBE_REGN_CASE(15, case_macro);			\
> +		BRBE_REGN_CASE(16, case_macro);			\
> +		BRBE_REGN_CASE(17, case_macro);			\
> +		BRBE_REGN_CASE(18, case_macro);			\
> +		BRBE_REGN_CASE(19, case_macro);			\
> +		BRBE_REGN_CASE(20, case_macro);			\
> +		BRBE_REGN_CASE(21, case_macro);			\
> +		BRBE_REGN_CASE(22, case_macro);			\
> +		BRBE_REGN_CASE(23, case_macro);			\
> +		BRBE_REGN_CASE(24, case_macro);			\
> +		BRBE_REGN_CASE(25, case_macro);			\
> +		BRBE_REGN_CASE(26, case_macro);			\
> +		BRBE_REGN_CASE(27, case_macro);			\
> +		BRBE_REGN_CASE(28, case_macro);			\
> +		BRBE_REGN_CASE(29, case_macro);			\
> +		BRBE_REGN_CASE(30, case_macro);			\
> +		BRBE_REGN_CASE(31, case_macro);			\
> +		default:					\
> +			pr_warn("unknown register index\n");	\
> +			return -1;				\
> +		}						\
> +	} while (0)
> +
> +static inline int buffer_to_brbe_idx(int buffer_idx)
> +{
> +	return buffer_idx % 32;
> +}
> +
> +static inline u64 get_brbsrc_reg(int buffer_idx)
> +{
> +	int brbe_idx = buffer_to_brbe_idx(buffer_idx);
> +
> +	BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBSRCN);
> +}
> +
> +static inline u64 get_brbtgt_reg(int buffer_idx)
> +{
> +	int brbe_idx = buffer_to_brbe_idx(buffer_idx);
> +
> +	BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBTGTN);
> +}
> +
> +static inline u64 get_brbinf_reg(int buffer_idx)
> +{
> +	int brbe_idx = buffer_to_brbe_idx(buffer_idx);
> +
> +	BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBINFN);
> +}
> +
> +static inline u64 brbe_record_valid(u64 brbinf)
> +{
> +	return (brbinf & BRBINF_EL1_VALID_MASK) >> BRBINF_EL1_VALID_SHIFT;
> +}
> +
> +static inline bool brbe_invalid(u64 brbinf)
> +{
> +	return brbe_record_valid(brbinf) == BRBINF_EL1_VALID_NONE;
> +}
> +
> +static inline bool brbe_valid(u64 brbinf)
> +{
> +	return brbe_record_valid(brbinf) == BRBINF_EL1_VALID_FULL;
> +}
> +
> +static inline bool brbe_source(u64 brbinf)
> +{
> +	return brbe_record_valid(brbinf) == BRBINF_EL1_VALID_SOURCE;
> +}
> +
> +static inline bool brbe_target(u64 brbinf)
> +{
> +	return brbe_record_valid(brbinf) == BRBINF_EL1_VALID_TARGET;
> +}
> +
> +static inline int brbe_fetch_cycles(u64 brbinf)
> +{
> +	/*
> +	 * Captured cycle count is unknown and hence
> +	 * should not be passed on the user space.
> +	 */
> +	if (brbinf & BRBINF_EL1_CCU)
> +		return 0;
> +
> +	return (brbinf & BRBINF_EL1_CC_MASK) >> BRBINF_EL1_CC_SHIFT;
> +}
> +
> +static inline int brbe_fetch_type(u64 brbinf)
> +{
> +	return (brbinf & BRBINF_EL1_TYPE_MASK) >> BRBINF_EL1_TYPE_SHIFT;
> +}
> +
> +static inline int brbe_fetch_el(u64 brbinf)
> +{
> +	return (brbinf & BRBINF_EL1_EL_MASK) >> BRBINF_EL1_EL_SHIFT;
> +}
> +
> +static inline int brbe_fetch_numrec(u64 brbidr)
> +{
> +	return (brbidr & BRBIDR0_EL1_NUMREC_MASK) >> BRBIDR0_EL1_NUMREC_SHIFT;
> +}
> +
> +static inline int brbe_fetch_format(u64 brbidr)
> +{
> +	return (brbidr & BRBIDR0_EL1_FORMAT_MASK) >> BRBIDR0_EL1_FORMAT_SHIFT;
> +}
> +
> +static inline int brbe_fetch_cc_bits(u64 brbidr)
> +{
> +	return (brbidr & BRBIDR0_EL1_CC_MASK) >> BRBIDR0_EL1_CC_SHIFT;
> +}
> +
> +static inline void select_brbe_bank(int bank)
> +{
> +	static int brbe_current_bank = -1;
> +	u64 brbfcr;
> +
> +	if (brbe_current_bank == bank)
> +		return;
> +
> +	WARN_ON(bank > 1);
> +	brbfcr = read_sysreg_s(SYS_BRBFCR_EL1);
> +	brbfcr &= ~BRBFCR_EL1_BANK_MASK;
> +	brbfcr |= ((bank << BRBFCR_EL1_BANK_SHIFT) & BRBFCR_EL1_BANK_MASK);
> +	write_sysreg_s(brbfcr, SYS_BRBFCR_EL1);
> +	isb();
> +	brbe_current_bank = bank;
> +}
> +
> +static inline void select_brbe_bank_index(int buffer_idx)
> +{
> +	switch (buffer_idx) {
> +	case BRBE_BANK0_IDX_MIN ... BRBE_BANK0_IDX_MAX:
> +		select_brbe_bank(0);
> +		break;
> +	case BRBE_BANK1_IDX_MIN ... BRBE_BANK1_IDX_MAX:
> +		select_brbe_bank(1);
> +		break;
> +	default:
> +		pr_warn("unsupported BRBE index\n");
> +	}
> +}
> +
> +static inline bool valid_brbe_nr(int brbe_nr)
> +{
> +	switch (brbe_nr) {
> +	case BRBIDR0_EL1_NUMREC_8:
> +	case BRBIDR0_EL1_NUMREC_16:
> +	case BRBIDR0_EL1_NUMREC_32:
> +	case BRBIDR0_EL1_NUMREC_64:
> +		return true;
> +	default:
> +		pr_warn("unsupported BRBE entries\n");
> +		return false;
> +	}
> +}
> +
> +static inline bool brbe_paused(void)
> +{
> +	u64 brbfcr = read_sysreg_s(SYS_BRBFCR_EL1);
> +
> +	return brbfcr & BRBFCR_EL1_PAUSED;
> +}
> +
> +static inline void set_brbe_paused(void)
> +{
> +	u64 brbfcr = read_sysreg_s(SYS_BRBFCR_EL1);
> +
> +	write_sysreg_s(brbfcr | BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1);
> +	isb();
> +}
> diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
> index bda0d9984a98..9c23b2b58b3d 100644
> --- a/include/linux/perf/arm_pmu.h
> +++ b/include/linux/perf/arm_pmu.h
> @@ -168,6 +168,26 @@ struct arm_pmu {
>  	unsigned long acpi_cpuid;
>  };
>  
> +#ifdef CONFIG_ARM_BRBE_PMU
> +void arm64_pmu_brbe_filter(struct pmu_hw_events *hw_events, struct perf_event *event);
> +void arm64_pmu_brbe_read(struct pmu_hw_events *cpuc, struct perf_event *event);
> +void arm64_pmu_brbe_disable(struct pmu_hw_events *cpuc);
> +void arm64_pmu_brbe_enable(struct pmu_hw_events *cpuc);
> +void arm64_pmu_brbe_probe(struct pmu_hw_events *cpuc);
> +void arm64_pmu_brbe_reset(struct pmu_hw_events *cpuc);
> +bool arm64_pmu_brbe_supported(struct perf_event *event);
> +#else
> +static inline void arm64_pmu_brbe_filter(struct pmu_hw_events *hw_events, struct perf_event *event)
> +{
> +}
> +static inline void arm64_pmu_brbe_read(struct pmu_hw_events *cpuc, struct perf_event *event) { }
> +static inline void arm64_pmu_brbe_disable(struct pmu_hw_events *cpuc) { }
> +static inline void arm64_pmu_brbe_enable(struct pmu_hw_events *cpuc) { }
> +static inline void arm64_pmu_brbe_probe(struct pmu_hw_events *cpuc) { }
> +static inline void arm64_pmu_brbe_reset(struct pmu_hw_events *cpuc) { }
> +static inline bool arm64_pmu_brbe_supported(struct perf_event *event) {return false; }
> +#endif
> +
>  #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
>  
>  u64 armpmu_event_update(struct perf_event *event);
  
Anshuman Khandual Nov. 17, 2022, 5:45 a.m. UTC | #3
On 11/16/22 22:12, James Clark wrote:
> 
> 
> On 07/11/2022 06:25, Anshuman Khandual wrote:
> [...]
> 
>> +static void perf_branch_to_brbcr(struct pmu_hw_events *cpuc, int branch_type)
>> +{
>> +	cpuc->brbcr = (BRBCR_EL1_CC | BRBCR_EL1_MPRED);
>> +
>> +	if (branch_type & PERF_SAMPLE_BRANCH_USER)
>> +		cpuc->brbcr |= BRBCR_EL1_E0BRE;
>> +
>> +	if (branch_type & PERF_SAMPLE_BRANCH_NO_CYCLES)
>> +		cpuc->brbcr &= ~BRBCR_EL1_CC;
>> +
>> +	if (branch_type & PERF_SAMPLE_BRANCH_NO_FLAGS)
>> +		cpuc->brbcr &= ~BRBCR_EL1_MPRED;
>> +
>> +	if (branch_type & PERF_SAMPLE_BRANCH_KERNEL)
>> +		cpuc->brbcr |= BRBCR_EL1_E1BRE;
>> +	else
>> +		return;
>> +
>> +	/*
>> +	 * The exception and exception return branches could be
>> +	 * captured only when the event has necessary privilege
>> +	 * indicated via branch type PERF_SAMPLE_BRANCH_KERNEL,
>> +	 * which has been ascertained in generic perf. Please
>> +	 * refer perf_copy_attr() for more details.
>> +	 */
>> +	if (branch_type & PERF_SAMPLE_BRANCH_ANY) {
>> +		cpuc->brbcr |= BRBCR_EL1_EXCEPTION;
>> +		cpuc->brbcr |= BRBCR_EL1_ERTN;
> 
> Because this comes after the PERF_SAMPLE_BRANCH_KERNEL check, it's
> impossible to get syscall records from userspace. When you enable kernel
> branch records, the buffer always fills up before it gets to userspace.

Just to summerize.

System call [user_addr -> kernel_addr] and return [kernel_addr -> user_addr]
records are impossible to be captured, because

- Without PERF_SAMPLE_BRANCH_KERNEL, BRBCR_EL1_EXCEPTION/ERTN are not set
- With PERF_SAMPLE_BRANCH_KERNEL, buffer fills up with in kernel branches

Did you try with latest fix, that clears the paused BRBE after reading branch
records during PMU interrupt ? That fix creates much more samples than before.

> 
> Can you move this to the top so that it can be set if either
> PERF_SAMPLE_BRANCH_USER or PERF_SAMPLE_BRANCH_KERNEL is set. The

Why should they depend on privilege filters i.e PERF_SAMPLE_BRANCH_USER/KERNEL
rather than just branch filters PERF_SAMPLE_BRANCH_ANY/ANY_CALL/ANY_RETURN ?

> hardware already handles the security by giving partial records with the
> kernel part zeroed out so I don't think the driver needs to add any
> additional rules other than setting BRBCR_EL1_E1BRE or BRBCR_EL1_E0BRE.

Basically BRBCR_EL1_EXCEPTION/BRBCR_EL1_ERTN should be treated like any other
branch filter rather than privilege filters as is the case now ?

> 
> For example I moved it to the top, removed the return below and then I
> get syscall partial records:
> 
> ....  5: 0000000000745d0c -> 0000000000000000 0 cycles  P   9fbfbfbf SYSCALL
> 
> I also get ERETS but with only the userspace part set:
> 
> .....  4: 0000000000000000 -> 0000000000745d10 0 cycles  P   9fbfbfbf ERET
But with both user and kernel privilege filters being set, these should have
been complete branch records containing both user and kernel addresses ?

> 
>> +		return;
>> +	}
>> +
>> +	if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL)
>> +		cpuc->brbcr |= BRBCR_EL1_EXCEPTION;
>> +
>> +	if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
>> +		cpuc->brbcr |= BRBCR_EL1_ERTN;
>> +}
>> +

[....]
  
James Clark Nov. 17, 2022, 10:09 a.m. UTC | #4
On 17/11/2022 05:45, Anshuman Khandual wrote:
> 
> 
> On 11/16/22 22:12, James Clark wrote:
>>
>>
>> On 07/11/2022 06:25, Anshuman Khandual wrote:
>> [...]
>>
>>> +static void perf_branch_to_brbcr(struct pmu_hw_events *cpuc, int branch_type)
>>> +{
>>> +	cpuc->brbcr = (BRBCR_EL1_CC | BRBCR_EL1_MPRED);
>>> +
>>> +	if (branch_type & PERF_SAMPLE_BRANCH_USER)
>>> +		cpuc->brbcr |= BRBCR_EL1_E0BRE;
>>> +
>>> +	if (branch_type & PERF_SAMPLE_BRANCH_NO_CYCLES)
>>> +		cpuc->brbcr &= ~BRBCR_EL1_CC;
>>> +
>>> +	if (branch_type & PERF_SAMPLE_BRANCH_NO_FLAGS)
>>> +		cpuc->brbcr &= ~BRBCR_EL1_MPRED;
>>> +
>>> +	if (branch_type & PERF_SAMPLE_BRANCH_KERNEL)
>>> +		cpuc->brbcr |= BRBCR_EL1_E1BRE;
>>> +	else
>>> +		return;
>>> +
>>> +	/*
>>> +	 * The exception and exception return branches could be
>>> +	 * captured only when the event has necessary privilege
>>> +	 * indicated via branch type PERF_SAMPLE_BRANCH_KERNEL,
>>> +	 * which has been ascertained in generic perf. Please
>>> +	 * refer perf_copy_attr() for more details.
>>> +	 */
>>> +	if (branch_type & PERF_SAMPLE_BRANCH_ANY) {
>>> +		cpuc->brbcr |= BRBCR_EL1_EXCEPTION;
>>> +		cpuc->brbcr |= BRBCR_EL1_ERTN;
>>
>> Because this comes after the PERF_SAMPLE_BRANCH_KERNEL check, it's
>> impossible to get syscall records from userspace. When you enable kernel
>> branch records, the buffer always fills up before it gets to userspace.
> 
> Just to summerize.
> 
> System call [user_addr -> kernel_addr] and return [kernel_addr -> user_addr]
> records are impossible to be captured, because
> 
> - Without PERF_SAMPLE_BRANCH_KERNEL, BRBCR_EL1_EXCEPTION/ERTN are not set
> - With PERF_SAMPLE_BRANCH_KERNEL, buffer fills up with in kernel branches
> 

Yep that's it

> Did you try with latest fix, that clears the paused BRBE after reading branch
> records during PMU interrupt ? That fix creates much more samples than before.
> 

Yes that's with the latest fix. It may even make the problem more
obvious with the fix rather than without.

>>
>> Can you move this to the top so that it can be set if either
>> PERF_SAMPLE_BRANCH_USER or PERF_SAMPLE_BRANCH_KERNEL is set. The
> 
> Why should they depend on privilege filters i.e PERF_SAMPLE_BRANCH_USER/KERNEL
> rather than just branch filters PERF_SAMPLE_BRANCH_ANY/ANY_CALL/ANY_RETURN ?
> 

Exactly, I don't think they should depend on the privilege level. But at
the moment we return before setting them unless
PERF_SAMPLE_BRANCH_KERNEL is set.

>> hardware already handles the security by giving partial records with the
>> kernel part zeroed out so I don't think the driver needs to add any
>> additional rules other than setting BRBCR_EL1_E1BRE or BRBCR_EL1_E0BRE.
> 
> Basically BRBCR_EL1_EXCEPTION/BRBCR_EL1_ERTN should be treated like any other
> branch filter rather than privilege filters as is the case now ?

I think so yes

> 
>>
>> For example I moved it to the top, removed the return below and then I
>> get syscall partial records:
>>
>> ....  5: 0000000000745d0c -> 0000000000000000 0 cycles  P   9fbfbfbf SYSCALL
>>
>> I also get ERETS but with only the userspace part set:
>>
>> .....  4: 0000000000000000 -> 0000000000745d10 0 cycles  P   9fbfbfbf ERET
> But with both user and kernel privilege filters being set, these should have
> been complete branch records containing both user and kernel addresses ?

Yes, but I only set PERF_SAMPLE_BRANCH_USER, I should have given the
perf command as well:

  perf record -j any,save_type,u -- syscall_loop

Where syscall_loop obviously generates lots of SYSCALLS and ERETS. But
with both user and kernel you just don't get to that point before the
buffer fills up. At least in per process mode, maybe with -a the timings
are different.

> 
>>
>>> +		return;
>>> +	}
>>> +
>>> +	if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL)
>>> +		cpuc->brbcr |= BRBCR_EL1_EXCEPTION;
>>> +
>>> +	if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
>>> +		cpuc->brbcr |= BRBCR_EL1_ERTN;
>>> +}
>>> +
> 
> [....]
  
Anshuman Khandual Nov. 18, 2022, 6:14 a.m. UTC | #5
On 11/17/22 15:39, James Clark wrote:
> 
> 
> On 17/11/2022 05:45, Anshuman Khandual wrote:
>>
>>
>> On 11/16/22 22:12, James Clark wrote:
>>>
>>>
>>> On 07/11/2022 06:25, Anshuman Khandual wrote:
>>> [...]
>>>
>>>> +static void perf_branch_to_brbcr(struct pmu_hw_events *cpuc, int branch_type)
>>>> +{
>>>> +	cpuc->brbcr = (BRBCR_EL1_CC | BRBCR_EL1_MPRED);
>>>> +
>>>> +	if (branch_type & PERF_SAMPLE_BRANCH_USER)
>>>> +		cpuc->brbcr |= BRBCR_EL1_E0BRE;
>>>> +
>>>> +	if (branch_type & PERF_SAMPLE_BRANCH_NO_CYCLES)
>>>> +		cpuc->brbcr &= ~BRBCR_EL1_CC;
>>>> +
>>>> +	if (branch_type & PERF_SAMPLE_BRANCH_NO_FLAGS)
>>>> +		cpuc->brbcr &= ~BRBCR_EL1_MPRED;
>>>> +
>>>> +	if (branch_type & PERF_SAMPLE_BRANCH_KERNEL)
>>>> +		cpuc->brbcr |= BRBCR_EL1_E1BRE;
>>>> +	else
>>>> +		return;
>>>> +
>>>> +	/*
>>>> +	 * The exception and exception return branches could be
>>>> +	 * captured only when the event has necessary privilege
>>>> +	 * indicated via branch type PERF_SAMPLE_BRANCH_KERNEL,
>>>> +	 * which has been ascertained in generic perf. Please
>>>> +	 * refer perf_copy_attr() for more details.
>>>> +	 */
>>>> +	if (branch_type & PERF_SAMPLE_BRANCH_ANY) {
>>>> +		cpuc->brbcr |= BRBCR_EL1_EXCEPTION;
>>>> +		cpuc->brbcr |= BRBCR_EL1_ERTN;
>>>
>>> Because this comes after the PERF_SAMPLE_BRANCH_KERNEL check, it's
>>> impossible to get syscall records from userspace. When you enable kernel
>>> branch records, the buffer always fills up before it gets to userspace.
>>
>> Just to summerize.
>>
>> System call [user_addr -> kernel_addr] and return [kernel_addr -> user_addr]
>> records are impossible to be captured, because
>>
>> - Without PERF_SAMPLE_BRANCH_KERNEL, BRBCR_EL1_EXCEPTION/ERTN are not set
>> - With PERF_SAMPLE_BRANCH_KERNEL, buffer fills up with in kernel branches
>>
> 
> Yep that's it
> 
>> Did you try with latest fix, that clears the paused BRBE after reading branch
>> records during PMU interrupt ? That fix creates much more samples than before.
>>
> 
> Yes that's with the latest fix. It may even make the problem more
> obvious with the fix rather than without.

Okay.

> 
>>>
>>> Can you move this to the top so that it can be set if either
>>> PERF_SAMPLE_BRANCH_USER or PERF_SAMPLE_BRANCH_KERNEL is set. The
>>
>> Why should they depend on privilege filters i.e PERF_SAMPLE_BRANCH_USER/KERNEL
>> rather than just branch filters PERF_SAMPLE_BRANCH_ANY/ANY_CALL/ANY_RETURN ?
>>
> 
> Exactly, I don't think they should depend on the privilege level. But at
> the moment we return before setting them unless
> PERF_SAMPLE_BRANCH_KERNEL is set.

Okay.

> 
>>> hardware already handles the security by giving partial records with the
>>> kernel part zeroed out so I don't think the driver needs to add any
>>> additional rules other than setting BRBCR_EL1_E1BRE or BRBCR_EL1_E0BRE.
>>
>> Basically BRBCR_EL1_EXCEPTION/BRBCR_EL1_ERTN should be treated like any other
>> branch filter rather than privilege filters as is the case now ?
> 
> I think so yes
> 
>>
>>>
>>> For example I moved it to the top, removed the return below and then I
>>> get syscall partial records:
>>>
>>> ....  5: 0000000000745d0c -> 0000000000000000 0 cycles  P   9fbfbfbf SYSCALL
>>>
>>> I also get ERETS but with only the userspace part set:
>>>
>>> .....  4: 0000000000000000 -> 0000000000745d10 0 cycles  P   9fbfbfbf ERET
>> But with both user and kernel privilege filters being set, these should have
>> been complete branch records containing both user and kernel addresses ?
> 
> Yes, but I only set PERF_SAMPLE_BRANCH_USER, I should have given the
> perf command as well:
> 
>   perf record -j any,save_type,u -- syscall_loop
> 
> Where syscall_loop obviously generates lots of SYSCALLS and ERETS. But
> with both user and kernel you just don't get to that point before the
> buffer fills up. At least in per process mode, maybe with -a the timings
> are different.

Fair enough, will change BRBCR_EL1_EXCEPTION/BRBCR_EL1_ERTN as discussed.
  
James Clark Nov. 29, 2022, 3:53 p.m. UTC | #6
On 07/11/2022 06:25, Anshuman Khandual wrote:
> This adds a BRBE driver which implements all the required helper functions
> for struct arm_pmu. Following functions are defined by this driver which
> will configure, enable, capture, reset and disable BRBE buffer HW as and
> when requested via perf branch stack sampling framework.

Hi Anshuman,

I've got a rough version of an updated test for branch stacks here [1].
A couple of interesting things that I've noticed running it:

First one is that sometimes I get (null) for the branch type. Debugging
in GDB shows me that the type is actually type == PERF_BR_EXTEND_ABI &&
new_type == 11. I can't see how this is possible looking at the driver
code. I think I saw this on a previous version of the patchset too but
didn't mention it because I thought it wasn't significant, but now I see
that something strange is going on. An interesting pattern is that they
are always after ERET samples and go from userspace to kernel:

41992866945460 0x6e8 [0x360]: PERF_RECORD_SAMPLE(IP, 0x1): 501/501:
0xffff800008010118 period: 1229 addr: 0
... branch stack: nr:34
.. 007a9988 -> 00000000 0 cycles  P   9fbfbfbf IRQ
.. 00000000 -> 007a9988 0 cycles  P   9fbfbfbf ERET
.. 007a9988 -> 00000000 0 cycles  P   9fbfbfbf (null)
.. 00747668 -> 007a9988 0 cycles  P   9fbfbfbf CALL
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
.. 00000000 -> 00747658 0 cycles  P   9fbfbfbf ERET
.. 00747658 -> 00000000 0 cycles  P   9fbfbfbf ARM64_DEBUG_DATA
.. 00000000 -> 00747650 0 cycles  P   9fbfbfbf ERET
.. 00747650 -> 00000000 0 cycles  P   9fbfbfbf ARM64_DEBUG_DATA
.. 00747624 -> 00747634 0 cycles  P   9fbfbfbf COND
.. 00000000 -> 007475f4 0 cycles  P   9fbfbfbf ERET
.. 007475f4 -> 00000000 0 cycles  P   9fbfbfbf ARM64_DEBUG_DATA
.. 00000000 -> 007475e8 0 cycles  P   9fbfbfbf ERET
.. 007475e8 -> 00000000 0 cycles  P   9fbfbfbf (null)
.. 004005ac -> 007475e8 0 cycles  P   9fbfbfbf CALL
.. 00000000 -> 00400564 0 cycles  P   9fbfbfbf ERET
.. 00400564 -> 00000000 0 cycles  P   9fbfbfbf (null)
.. 00000000 -> 00400564 0 cycles  P   9fbfbfbf ERET
 .. thread: perf:501
 ...... dso: [kernel.kallsyms]

The second one is that sometimes I get kernel addresses and RET branches
even if the option is any_call,u. The pattern here is that it's the last
non empty branch stack of a run, so maybe there is some disable path
where the filters aren't configured properly:

armv8pmu_brbe_enable+0xc/arm64_pmu_brbe_enable+0x0/P/-/-/0/CALL
armpmu_start+0xe0/armv8pmu_brbe_enable+0x0/P/-/-/0/IND_CALL
armv8pmu_brbe_reset+0x18/armpmu_start+0xd0/P/-/-/0/RET
arm64_pmu_brbe_reset+0x18/armv8pmu_brbe_reset+0x10/P/-/-/0/RET


[1]:
https://gitlab.arm.com/linux-arm/linux-jc/-/commit/7260b7bef06ac161eac88d05266e8c5c303d9881
  
Anshuman Khandual Nov. 30, 2022, 4:49 a.m. UTC | #7
On 11/29/22 21:23, James Clark wrote:
> 
> 
> On 07/11/2022 06:25, Anshuman Khandual wrote:
>> This adds a BRBE driver which implements all the required helper functions
>> for struct arm_pmu. Following functions are defined by this driver which
>> will configure, enable, capture, reset and disable BRBE buffer HW as and
>> when requested via perf branch stack sampling framework.
> 
> Hi Anshuman,
> 
> I've got a rough version of an updated test for branch stacks here [1].
> A couple of interesting things that I've noticed running it:
> 
> First one is that sometimes I get (null) for the branch type. Debugging
> in GDB shows me that the type is actually type == PERF_BR_EXTEND_ABI &&
> new_type == 11. I can't see how this is possible looking at the driver

Hmm, that is strange.

brbe_fetch_perf_type() evaluates captured brbinf and extracts BRBE branch
type and later maps into perf branch types. All new perf branch types are
contained inside [PERF_BR_NEW_FAULT_ALGN = 0 .. PERF_BR_NEW_ARCH_5 = 7].
Hence wondering how '11' can be a new_type value after PERF_BR_EXTEND_ABI
switch.

> code. I think I saw this on a previous version of the patchset too but
> didn't mention it because I thought it wasn't significant, but now I see
> that something strange is going on. An interesting pattern is that they
> are always after ERET samples and go from userspace to kernel:

Unless it can be ascertained that wrong values are getting passed into the
perf ring buffer via cpuc->branches->brbe_entries[idx].[type | new_type],
the problem might be with perf report parsing the branch records ?

There are valid new branch types such as ARM64_DEBUG_DATA reported after
ERET records as well. I guess the only way to figure out the problem here
is to track the errant branch record from cpuc->branches->brbe_entries to
all the way upto perf report processing.

> 
> 41992866945460 0x6e8 [0x360]: PERF_RECORD_SAMPLE(IP, 0x1): 501/501:
> 0xffff800008010118 period: 1229 addr: 0
> ... branch stack: nr:34
> .. 007a9988 -> 00000000 0 cycles  P   9fbfbfbf IRQ
> .. 00000000 -> 007a9988 0 cycles  P   9fbfbfbf ERET
> .. 007a9988 -> 00000000 0 cycles  P   9fbfbfbf (null)
> .. 00747668 -> 007a9988 0 cycles  P   9fbfbfbf CALL
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
> .. 00000000 -> 00747658 0 cycles  P   9fbfbfbf ERET
> .. 00747658 -> 00000000 0 cycles  P   9fbfbfbf ARM64_DEBUG_DATA
> .. 00000000 -> 00747650 0 cycles  P   9fbfbfbf ERET
> .. 00747650 -> 00000000 0 cycles  P   9fbfbfbf ARM64_DEBUG_DATA
> .. 00747624 -> 00747634 0 cycles  P   9fbfbfbf COND
> .. 00000000 -> 007475f4 0 cycles  P   9fbfbfbf ERET
> .. 007475f4 -> 00000000 0 cycles  P   9fbfbfbf ARM64_DEBUG_DATA
> .. 00000000 -> 007475e8 0 cycles  P   9fbfbfbf ERET
> .. 007475e8 -> 00000000 0 cycles  P   9fbfbfbf (null)
> .. 004005ac -> 007475e8 0 cycles  P   9fbfbfbf CALL
> .. 00000000 -> 00400564 0 cycles  P   9fbfbfbf ERET
> .. 00400564 -> 00000000 0 cycles  P   9fbfbfbf (null)
> .. 00000000 -> 00400564 0 cycles  P   9fbfbfbf ERET
>  .. thread: perf:501
>  ...... dso: [kernel.kallsyms]
> 
> The second one is that sometimes I get kernel addresses and RET branches
> even if the option is any_call,u. The pattern here is that it's the last
> non empty branch stack of a run, so maybe there is some disable path
> where the filters aren't configured properly:

The latest code (not posted), disables TRBE completely while reading the
branch records during PMU interrupt. Could you please apply those changes
as well, or rather just use the branch instead.

https://gitlab.arm.com/linux-arm/linux-anshuman/-/commit/ab17879711f0e61c280ed52400ccde172b67e04a

> 
> armv8pmu_brbe_enable+0xc/arm64_pmu_brbe_enable+0x0/P/-/-/0/CALL
> armpmu_start+0xe0/armv8pmu_brbe_enable+0x0/P/-/-/0/IND_CALL
> armv8pmu_brbe_reset+0x18/armpmu_start+0xd0/P/-/-/0/RET
> arm64_pmu_brbe_reset+0x18/armv8pmu_brbe_reset+0x10/P/-/-/0/RET

I am wondering how the different privilege branch record leak is possible.
Because arm64_pmu_brbe_enable() should start BRBE from a clean state with
respect to privilege level filters.

void arm64_pmu_brbe_enable(struct pmu_hw_events *cpuc)
{
        u64 brbfcr, brbcr;

        if (brbe_disabled(cpuc))
                return;

        brbfcr = read_sysreg_s(SYS_BRBFCR_EL1);
        brbfcr &= ~BRBFCR_EL1_BANK_MASK;
        brbfcr &= ~(BRBFCR_EL1_EnI | BRBFCR_EL1_PAUSED | BRBE_FCR_MASK);
        brbfcr |= (cpuc->brbfcr & BRBE_FCR_MASK);
        write_sysreg_s(brbfcr, SYS_BRBFCR_EL1);
        isb();

        brbcr = read_sysreg_s(SYS_BRBCR_EL1);
        brbcr &= ~BRBE_CR_MASK;			--> Contains BRBCR_EL1_E1BRE and BRBCR_EL1_E0BRE
        brbcr |= BRBCR_EL1_FZP;
        brbcr |= (BRBCR_EL1_TS_PHYSICAL << BRBCR_EL1_TS_SHIFT);
        brbcr |= (cpuc->brbcr & BRBE_CR_MASK);
        write_sysreg_s(brbcr, SYS_BRBCR_EL1);
        isb();
}

Could these samples are from a previous session ? But they should have been
flushed in armpmu_start().

static void armpmu_start(struct perf_event *event, int flags)
{
	.........
        if (has_branch_stack(event)) {
                if (event->ctx->task && hw_events->brbe_context != event->ctx) {
                        armpmu->brbe_reset(hw_events);
                        hw_events->brbe_context = event->ctx;
                }
                armpmu->brbe_enable(hw_events);
                hw_events->brbe_users++;
        }
	.........
}

> 
> 
> [1]:
> https://gitlab.arm.com/linux-arm/linux-jc/-/commit/7260b7bef06ac161eac88d05266e8c5c303d9881
  
James Clark Nov. 30, 2022, 4:56 p.m. UTC | #8
On 30/11/2022 04:49, Anshuman Khandual wrote:
> 
> 
> On 11/29/22 21:23, James Clark wrote:
>>
>>
>> On 07/11/2022 06:25, Anshuman Khandual wrote:
>>> This adds a BRBE driver which implements all the required helper functions
>>> for struct arm_pmu. Following functions are defined by this driver which
>>> will configure, enable, capture, reset and disable BRBE buffer HW as and
>>> when requested via perf branch stack sampling framework.
>>
>> Hi Anshuman,
>>
>> I've got a rough version of an updated test for branch stacks here [1].
>> A couple of interesting things that I've noticed running it:
>>
>> First one is that sometimes I get (null) for the branch type. Debugging
>> in GDB shows me that the type is actually type == PERF_BR_EXTEND_ABI &&
>> new_type == 11. I can't see how this is possible looking at the driver
> 
> Hmm, that is strange.
> 
> brbe_fetch_perf_type() evaluates captured brbinf and extracts BRBE branch
> type and later maps into perf branch types. All new perf branch types are
> contained inside [PERF_BR_NEW_FAULT_ALGN = 0 .. PERF_BR_NEW_ARCH_5 = 7].
> Hence wondering how '11' can be a new_type value after PERF_BR_EXTEND_ABI
> switch.

I got to the bottom of the issue and posted the fix here [2]. A new
entry was added to the branch records somewhere around the time new_type
was added and it wasn't added to Perf so the records weren't being
interpreted properly.

> 
>> code. I think I saw this on a previous version of the patchset too but
>> didn't mention it because I thought it wasn't significant, but now I see
>> that something strange is going on. An interesting pattern is that they
>> are always after ERET samples and go from userspace to kernel:
> 
> Unless it can be ascertained that wrong values are getting passed into the
> perf ring buffer via cpuc->branches->brbe_entries[idx].[type | new_type],
> the problem might be with perf report parsing the branch records ?
> 
> There are valid new branch types such as ARM64_DEBUG_DATA reported after
> ERET records as well. I guess the only way to figure out the problem here
> is to track the errant branch record from cpuc->branches->brbe_entries to
> all the way upto perf report processing.
> 
>>
>> 41992866945460 0x6e8 [0x360]: PERF_RECORD_SAMPLE(IP, 0x1): 501/501:
>> 0xffff800008010118 period: 1229 addr: 0
>> ... branch stack: nr:34
>> .. 007a9988 -> 00000000 0 cycles  P   9fbfbfbf IRQ
>> .. 00000000 -> 007a9988 0 cycles  P   9fbfbfbf ERET
>> .. 007a9988 -> 00000000 0 cycles  P   9fbfbfbf (null)
>> .. 00747668 -> 007a9988 0 cycles  P   9fbfbfbf CALL
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00747664 -> 00747660 0 cycles  P   9fbfbfbf COND
>> .. 00000000 -> 00747658 0 cycles  P   9fbfbfbf ERET
>> .. 00747658 -> 00000000 0 cycles  P   9fbfbfbf ARM64_DEBUG_DATA
>> .. 00000000 -> 00747650 0 cycles  P   9fbfbfbf ERET
>> .. 00747650 -> 00000000 0 cycles  P   9fbfbfbf ARM64_DEBUG_DATA
>> .. 00747624 -> 00747634 0 cycles  P   9fbfbfbf COND
>> .. 00000000 -> 007475f4 0 cycles  P   9fbfbfbf ERET
>> .. 007475f4 -> 00000000 0 cycles  P   9fbfbfbf ARM64_DEBUG_DATA
>> .. 00000000 -> 007475e8 0 cycles  P   9fbfbfbf ERET
>> .. 007475e8 -> 00000000 0 cycles  P   9fbfbfbf (null)
>> .. 004005ac -> 007475e8 0 cycles  P   9fbfbfbf CALL
>> .. 00000000 -> 00400564 0 cycles  P   9fbfbfbf ERET
>> .. 00400564 -> 00000000 0 cycles  P   9fbfbfbf (null)
>> .. 00000000 -> 00400564 0 cycles  P   9fbfbfbf ERET
>>  .. thread: perf:501
>>  ...... dso: [kernel.kallsyms]
>>
>> The second one is that sometimes I get kernel addresses and RET branches
>> even if the option is any_call,u. The pattern here is that it's the last
>> non empty branch stack of a run, so maybe there is some disable path
>> where the filters aren't configured properly:
> 
> The latest code (not posted), disables TRBE completely while reading the
> branch records during PMU interrupt. Could you please apply those changes
> as well, or rather just use the branch instead.
> 
> https://gitlab.arm.com/linux-arm/linux-anshuman/-/commit/ab17879711f0e61c280ed52400ccde172b67e04a
> 

I don't think I've seen it on that version, but I need to run it a bit
more to be sure.

> 
>>
>>
>> [1]:
>> https://gitlab.arm.com/linux-arm/linux-jc/-/commit/7260b7bef06ac161eac88d05266e8c5c303d9881

[2]:
https://lore.kernel.org/linux-perf-users/20221130165158.517385-1-james.clark@arm.com/T/#u
  
James Clark Dec. 6, 2022, 5:05 p.m. UTC | #9
On 30/11/2022 04:49, Anshuman Khandual wrote:
> 
> 
> On 11/29/22 21:23, James Clark wrote:
[...]
> 
> The latest code (not posted), disables TRBE completely while reading the
> branch records during PMU interrupt. Could you please apply those changes
> as well, or rather just use the branch instead.
> 
> https://gitlab.arm.com/linux-arm/linux-anshuman/-/commit/ab17879711f0e61c280ed52400ccde172b67e04a
> 
>>
>> armv8pmu_brbe_enable+0xc/arm64_pmu_brbe_enable+0x0/P/-/-/0/CALL
>> armpmu_start+0xe0/armv8pmu_brbe_enable+0x0/P/-/-/0/IND_CALL
>> armv8pmu_brbe_reset+0x18/armpmu_start+0xd0/P/-/-/0/RET
>> arm64_pmu_brbe_reset+0x18/armv8pmu_brbe_reset+0x10/P/-/-/0/RET
> 
> I am wondering how the different privilege branch record leak is possible.
> Because arm64_pmu_brbe_enable() should start BRBE from a clean state with
> respect to privilege level filters.
> 
> void arm64_pmu_brbe_enable(struct pmu_hw_events *cpuc)
> {
>         u64 brbfcr, brbcr;
> 
>         if (brbe_disabled(cpuc))
>                 return;
> 
>         brbfcr = read_sysreg_s(SYS_BRBFCR_EL1);
>         brbfcr &= ~BRBFCR_EL1_BANK_MASK;
>         brbfcr &= ~(BRBFCR_EL1_EnI | BRBFCR_EL1_PAUSED | BRBE_FCR_MASK);
>         brbfcr |= (cpuc->brbfcr & BRBE_FCR_MASK);
>         write_sysreg_s(brbfcr, SYS_BRBFCR_EL1);
>         isb();
> 
>         brbcr = read_sysreg_s(SYS_BRBCR_EL1);
>         brbcr &= ~BRBE_CR_MASK;			--> Contains BRBCR_EL1_E1BRE and BRBCR_EL1_E0BRE
>         brbcr |= BRBCR_EL1_FZP;
>         brbcr |= (BRBCR_EL1_TS_PHYSICAL << BRBCR_EL1_TS_SHIFT);
>         brbcr |= (cpuc->brbcr & BRBE_CR_MASK);
>         write_sysreg_s(brbcr, SYS_BRBCR_EL1);
>         isb();

Yes I tracked down the problem to here as well. I added a
arm64_pmu_brbe_reset(cpuc) to the end of arm64_pmu_brbe_enable() and it
fixes the issue.

The problem is, without ensuring that BRBFCR_EL1_PAUSED is set, there is
no way to write to both BRBFCR and BRBCR in either order without new
records being produced based on a partial configuration.

BRBFCR_EL1_PAUSED isn't set from the previous session, and there is no
obvious place to add a paused at the end of the session with the current
callbacks. So the easiest fix is to flush the old kernel samples after
configuring both registers.

I'm not sure what the BRBFCR_EL1_PAUSED value is at power on either, so
the issue might also be present with the very first brbe session, but
less obvious than a userspace one following a kernel one. But the flush
solves that problem too.

> }
> 
> Could these samples are from a previous session ? But they should have been
> flushed in armpmu_start().
> 
> static void armpmu_start(struct perf_event *event, int flags)
> {
> 	.........
>         if (has_branch_stack(event)) {
>                 if (event->ctx->task && hw_events->brbe_context != event->ctx) {
>                         armpmu->brbe_reset(hw_events);
>                         hw_events->brbe_context = event->ctx;
>                 }
>                 armpmu->brbe_enable(hw_events);
>                 hw_events->brbe_users++;
>         }
> 	.........
> }
> 
>>
>>
>> [1]:
>> https://gitlab.arm.com/linux-arm/linux-jc/-/commit/7260b7bef06ac161eac88d05266e8c5c303d9881
  

Patch

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 97db333d1208..85a3aaefc0fb 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -1034,31 +1034,37 @@  static int armv8pmu_filter_match(struct perf_event *event)
 
 static void armv8pmu_brbe_filter(struct pmu_hw_events *hw_event, struct perf_event *event)
 {
+	arm64_pmu_brbe_filter(hw_event, event);
 }
 
 static void armv8pmu_brbe_enable(struct pmu_hw_events *hw_event)
 {
+	arm64_pmu_brbe_enable(hw_event);
 }
 
 static void armv8pmu_brbe_disable(struct pmu_hw_events *hw_event)
 {
+	arm64_pmu_brbe_disable(hw_event);
 }
 
 static void armv8pmu_brbe_read(struct pmu_hw_events *hw_event, struct perf_event *event)
 {
+	arm64_pmu_brbe_read(hw_event, event);
 }
 
 static void armv8pmu_brbe_probe(struct pmu_hw_events *hw_event)
 {
+	arm64_pmu_brbe_probe(hw_event);
 }
 
 static void armv8pmu_brbe_reset(struct pmu_hw_events *hw_event)
 {
+	arm64_pmu_brbe_reset(hw_event);
 }
 
 static bool armv8pmu_brbe_supported(struct perf_event *event)
 {
-	return false;
+	return arm64_pmu_brbe_supported(event);
 }
 
 static void armv8pmu_reset(void *info)
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 341010f20b77..cfb79eddeb02 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -190,6 +190,17 @@  config ALIBABA_UNCORE_DRW_PMU
 	  Support for Driveway PMU events monitoring on Yitian 710 DDR
 	  Sub-system.
 
+config ARM_BRBE_PMU
+	bool "Enable support for Branch Record Buffer Extension (BRBE)"
+	depends on ARM64 && ARM_PMU
+	default y
+	help
+	  Enable perf support for Branch Record Buffer Extension (BRBE) which
+	  records all branches taken in an execution path. This supports some
+	  branch types and privilege based filtering. It captured additional
+	  relevant information such as cycle count, misprediction and branch
+	  type, branch privilege level etc.
+
 source "drivers/perf/hisilicon/Kconfig"
 
 config MARVELL_CN10K_DDR_PMU
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 050d04ee19dd..00428793e66c 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -16,6 +16,7 @@  obj-$(CONFIG_RISCV_PMU_SBI) += riscv_pmu_sbi.o
 obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
 obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
+obj-$(CONFIG_ARM_BRBE_PMU) += arm_pmu_brbe.o
 obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
 obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
 obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
diff --git a/drivers/perf/arm_pmu_brbe.c b/drivers/perf/arm_pmu_brbe.c
new file mode 100644
index 000000000000..ce1aa4171481
--- /dev/null
+++ b/drivers/perf/arm_pmu_brbe.c
@@ -0,0 +1,441 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Branch Record Buffer Extension Driver.
+ *
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Author: Anshuman Khandual <anshuman.khandual@arm.com>
+ */
+#include "arm_pmu_brbe.h"
+
+#define BRBFCR_BRANCH_ALL	(BRBFCR_EL1_DIRECT | BRBFCR_EL1_INDIRECT | \
+				 BRBFCR_EL1_RTN | BRBFCR_EL1_INDCALL | \
+				 BRBFCR_EL1_DIRCALL | BRBFCR_EL1_CONDDIR)
+
+#define BRBE_FCR_MASK (BRBFCR_BRANCH_ALL)
+#define BRBE_CR_MASK  (BRBCR_EL1_EXCEPTION | BRBCR_EL1_ERTN | BRBCR_EL1_CC | \
+		       BRBCR_EL1_MPRED | BRBCR_EL1_E1BRE | BRBCR_EL1_E0BRE)
+
+static void set_brbe_disabled(struct pmu_hw_events *cpuc)
+{
+	cpuc->brbe_nr = 0;
+}
+
+static bool brbe_disabled(struct pmu_hw_events *cpuc)
+{
+	return !cpuc->brbe_nr;
+}
+
+bool arm64_pmu_brbe_supported(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = per_cpu_ptr(armpmu->hw_events, event->cpu);
+
+	/*
+	 * If the event does not have at least one of the privilege
+	 * branch filters as in PERF_SAMPLE_BRANCH_PLM_ALL, the core
+	 * perf will adjust its value based on perf event's existing
+	 * privilege level via attr.exclude_[user|kernel|hv].
+	 *
+	 * As event->attr.branch_sample_type might have been changed
+	 * when the event reaches here, it is not possible to figure
+	 * out whether the event originally had HV privilege request
+	 * or got added via the core perf. Just report this situation
+	 * once and continue ignoring if there are other instances.
+	 */
+	if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HV)
+		pr_warn_once("does not support hypervisor privilege branch filter\n");
+
+	if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_ABORT_TX) {
+		pr_warn_once("does not support aborted transaction branch filter\n");
+		return false;
+	}
+
+	if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_TX) {
+		pr_warn_once("does not support non transaction branch filter\n");
+		return false;
+	}
+
+	if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_IN_TX) {
+		pr_warn_once("does not support in transaction branch filter\n");
+		return false;
+	}
+	return !brbe_disabled(hw_events);
+}
+
+void arm64_pmu_brbe_probe(struct pmu_hw_events *cpuc)
+{
+	u64 aa64dfr0, brbidr;
+	unsigned int brbe;
+
+	aa64dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1);
+	brbe = cpuid_feature_extract_unsigned_field(aa64dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT);
+	if (!brbe) {
+		set_brbe_disabled(cpuc);
+		return;
+	} else if (brbe == ID_AA64DFR0_EL1_BRBE_IMP) {
+		cpuc->brbe_v1p1 = false;
+	} else if (brbe == ID_AA64DFR0_EL1_BRBE_BRBE_V1P1) {
+		cpuc->brbe_v1p1 = true;
+	}
+
+	brbidr = read_sysreg_s(SYS_BRBIDR0_EL1);
+	cpuc->brbe_format = brbe_fetch_format(brbidr);
+	if (cpuc->brbe_format != BRBIDR0_EL1_FORMAT_0) {
+		set_brbe_disabled(cpuc);
+		return;
+	}
+
+	cpuc->brbe_cc = brbe_fetch_cc_bits(brbidr);
+	if (cpuc->brbe_cc != BRBIDR0_EL1_CC_20_BIT) {
+		set_brbe_disabled(cpuc);
+		return;
+	}
+
+	cpuc->brbe_nr = brbe_fetch_numrec(brbidr);
+	if (!valid_brbe_nr(cpuc->brbe_nr)) {
+		set_brbe_disabled(cpuc);
+		return;
+	}
+}
+
+void arm64_pmu_brbe_enable(struct pmu_hw_events *cpuc)
+{
+	u64 brbfcr, brbcr;
+
+	if (brbe_disabled(cpuc))
+		return;
+
+	brbfcr = read_sysreg_s(SYS_BRBFCR_EL1);
+	brbfcr &= ~BRBFCR_EL1_BANK_MASK;
+	brbfcr &= ~(BRBFCR_EL1_EnI | BRBFCR_EL1_PAUSED | BRBE_FCR_MASK);
+	brbfcr |= (cpuc->brbfcr & BRBE_FCR_MASK);
+	write_sysreg_s(brbfcr, SYS_BRBFCR_EL1);
+	isb();
+
+	brbcr = read_sysreg_s(SYS_BRBCR_EL1);
+	brbcr &= ~BRBE_CR_MASK;
+	brbcr |= BRBCR_EL1_FZP;
+	brbcr |= (BRBCR_EL1_TS_PHYSICAL << BRBCR_EL1_TS_SHIFT);
+	brbcr |= (cpuc->brbcr & BRBE_CR_MASK);
+	write_sysreg_s(brbcr, SYS_BRBCR_EL1);
+	isb();
+}
+
+void arm64_pmu_brbe_disable(struct pmu_hw_events *cpuc)
+{
+	u64 brbcr;
+
+	if (brbe_disabled(cpuc))
+		return;
+
+	brbcr = read_sysreg_s(SYS_BRBCR_EL1);
+	brbcr &= ~(BRBCR_EL1_E0BRE | BRBCR_EL1_E1BRE);
+	write_sysreg_s(brbcr, SYS_BRBCR_EL1);
+	isb();
+}
+
+static void perf_branch_to_brbfcr(struct pmu_hw_events *cpuc, int branch_type)
+{
+	cpuc->brbfcr = 0;
+
+	if (branch_type & PERF_SAMPLE_BRANCH_ANY) {
+		cpuc->brbfcr |= BRBFCR_BRANCH_ALL;
+		return;
+	}
+
+	if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL)
+		cpuc->brbfcr |= (BRBFCR_EL1_INDCALL | BRBFCR_EL1_DIRCALL);
+
+	if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+		cpuc->brbfcr |= BRBFCR_EL1_RTN;
+
+	if (branch_type & PERF_SAMPLE_BRANCH_IND_CALL)
+		cpuc->brbfcr |= BRBFCR_EL1_INDCALL;
+
+	if (branch_type & PERF_SAMPLE_BRANCH_COND)
+		cpuc->brbfcr |= BRBFCR_EL1_CONDDIR;
+
+	if (branch_type & PERF_SAMPLE_BRANCH_IND_JUMP)
+		cpuc->brbfcr |= BRBFCR_EL1_INDIRECT;
+
+	if (branch_type & PERF_SAMPLE_BRANCH_CALL)
+		cpuc->brbfcr |= BRBFCR_EL1_DIRCALL;
+}
+
+static void perf_branch_to_brbcr(struct pmu_hw_events *cpuc, int branch_type)
+{
+	cpuc->brbcr = (BRBCR_EL1_CC | BRBCR_EL1_MPRED);
+
+	if (branch_type & PERF_SAMPLE_BRANCH_USER)
+		cpuc->brbcr |= BRBCR_EL1_E0BRE;
+
+	if (branch_type & PERF_SAMPLE_BRANCH_NO_CYCLES)
+		cpuc->brbcr &= ~BRBCR_EL1_CC;
+
+	if (branch_type & PERF_SAMPLE_BRANCH_NO_FLAGS)
+		cpuc->brbcr &= ~BRBCR_EL1_MPRED;
+
+	if (branch_type & PERF_SAMPLE_BRANCH_KERNEL)
+		cpuc->brbcr |= BRBCR_EL1_E1BRE;
+	else
+		return;
+
+	/*
+	 * The exception and exception return branches could be
+	 * captured only when the event has necessary privilege
+	 * indicated via branch type PERF_SAMPLE_BRANCH_KERNEL,
+	 * which has been ascertained in generic perf. Please
+	 * refer perf_copy_attr() for more details.
+	 */
+	if (branch_type & PERF_SAMPLE_BRANCH_ANY) {
+		cpuc->brbcr |= BRBCR_EL1_EXCEPTION;
+		cpuc->brbcr |= BRBCR_EL1_ERTN;
+		return;
+	}
+
+	if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL)
+		cpuc->brbcr |= BRBCR_EL1_EXCEPTION;
+
+	if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+		cpuc->brbcr |= BRBCR_EL1_ERTN;
+}
+
+
+void arm64_pmu_brbe_filter(struct pmu_hw_events *cpuc, struct perf_event *event)
+{
+	u64 branch_type = event->attr.branch_sample_type;
+
+	if (brbe_disabled(cpuc))
+		return;
+
+	perf_branch_to_brbfcr(cpuc, branch_type);
+	perf_branch_to_brbcr(cpuc, branch_type);
+}
+
+static int brbe_fetch_perf_type(u64 brbinf, bool *new_branch_type)
+{
+	int brbe_type = brbe_fetch_type(brbinf);
+	*new_branch_type = false;
+
+	switch (brbe_type) {
+	case BRBINF_EL1_TYPE_UNCOND_DIR:
+		return PERF_BR_UNCOND;
+	case BRBINF_EL1_TYPE_INDIR:
+		return PERF_BR_IND;
+	case BRBINF_EL1_TYPE_DIR_LINK:
+		return PERF_BR_CALL;
+	case BRBINF_EL1_TYPE_INDIR_LINK:
+		return PERF_BR_IND_CALL;
+	case BRBINF_EL1_TYPE_RET_SUB:
+		return PERF_BR_RET;
+	case BRBINF_EL1_TYPE_COND_DIR:
+		return PERF_BR_COND;
+	case BRBINF_EL1_TYPE_CALL:
+		return PERF_BR_CALL;
+	case BRBINF_EL1_TYPE_TRAP:
+		return PERF_BR_SYSCALL;
+	case BRBINF_EL1_TYPE_RET_EXCPT:
+		return PERF_BR_ERET;
+	case BRBINF_EL1_TYPE_IRQ:
+		return PERF_BR_IRQ;
+	case BRBINF_EL1_TYPE_DEBUG_HALT:
+		*new_branch_type = true;
+		return PERF_BR_ARM64_DEBUG_HALT;
+	case BRBINF_EL1_TYPE_SERROR:
+		return PERF_BR_SERROR;
+	case BRBINF_EL1_TYPE_INST_DEBUG:
+		*new_branch_type = true;
+		return PERF_BR_ARM64_DEBUG_INST;
+	case BRBINF_EL1_TYPE_DATA_DEBUG:
+		*new_branch_type = true;
+		return PERF_BR_ARM64_DEBUG_DATA;
+	case BRBINF_EL1_TYPE_ALGN_FAULT:
+		*new_branch_type = true;
+		return PERF_BR_NEW_FAULT_ALGN;
+	case BRBINF_EL1_TYPE_INST_FAULT:
+		*new_branch_type = true;
+		return PERF_BR_NEW_FAULT_INST;
+	case BRBINF_EL1_TYPE_DATA_FAULT:
+		*new_branch_type = true;
+		return PERF_BR_NEW_FAULT_DATA;
+	case BRBINF_EL1_TYPE_FIQ:
+		*new_branch_type = true;
+		return PERF_BR_ARM64_FIQ;
+	case BRBINF_EL1_TYPE_DEBUG_EXIT:
+		*new_branch_type = true;
+		return PERF_BR_ARM64_DEBUG_EXIT;
+	default:
+		pr_warn("unknown branch type captured\n");
+		return PERF_BR_UNKNOWN;
+	}
+}
+
+static int brbe_fetch_perf_priv(u64 brbinf)
+{
+	int brbe_el = brbe_fetch_el(brbinf);
+
+	switch (brbe_el) {
+	case BRBINF_EL1_EL_EL0:
+		return PERF_BR_PRIV_USER;
+	case BRBINF_EL1_EL_EL1:
+		return PERF_BR_PRIV_KERNEL;
+	case BRBINF_EL1_EL_EL2:
+		if (is_kernel_in_hyp_mode())
+			return PERF_BR_PRIV_KERNEL;
+		return PERF_BR_PRIV_HV;
+	default:
+		pr_warn("unknown branch privilege captured\n");
+		return PERF_BR_PRIV_UNKNOWN;
+       }
+}
+
+static void capture_brbe_flags(struct pmu_hw_events *cpuc, struct perf_event *event,
+			       u64 brbinf, int idx)
+{
+	int branch_type, type = brbe_record_valid(brbinf);
+	bool new_branch_type;
+
+	if (!branch_sample_no_cycles(event))
+		cpuc->branches->brbe_entries[idx].cycles = brbe_fetch_cycles(brbinf);
+
+	if (branch_sample_type(event)) {
+		branch_type = brbe_fetch_perf_type(brbinf, &new_branch_type);
+		if (new_branch_type) {
+			cpuc->branches->brbe_entries[idx].type = PERF_BR_EXTEND_ABI;
+			cpuc->branches->brbe_entries[idx].new_type = branch_type;
+		} else {
+			cpuc->branches->brbe_entries[idx].type = branch_type;
+		}
+	}
+
+	if (!branch_sample_no_flags(event)) {
+		/*
+		 * BRBINF_LASTFAILED does not indicate that the last transaction
+		 * got failed or aborted during the current branch record itself.
+		 * Rather, this indicates that all the branch records which were
+		 * in transaction until the curret branch record have failed. So
+		 * the entire BRBE buffer needs to be processed later on to find
+		 * all branch records which might have failed.
+		 */
+		cpuc->branches->brbe_entries[idx].abort = brbinf & BRBINF_EL1_LASTFAILED;
+
+		/*
+		 * All these information (i.e transaction state and mispredicts)
+		 * are not available for target only branch records.
+		 */
+		if (type != BRBINF_EL1_VALID_TARGET) {
+			cpuc->branches->brbe_entries[idx].mispred = brbinf & BRBINF_EL1_MPRED;
+			cpuc->branches->brbe_entries[idx].predicted = !(brbinf & BRBINF_EL1_MPRED);
+			cpuc->branches->brbe_entries[idx].in_tx = brbinf & BRBINF_EL1_T;
+		}
+	}
+
+	if (branch_sample_priv(event)) {
+		/*
+		 * All these information (i.e branch privilege level) are not
+		 * available for source only branch records.
+		 */
+		if (type != BRBINF_EL1_VALID_SOURCE)
+			cpuc->branches->brbe_entries[idx].priv = brbe_fetch_perf_priv(brbinf);
+	}
+}
+
+/*
+ * A branch record with BRBINF_EL1.LASTFAILED set, implies that all
+ * preceding consecutive branch records, that were in a transaction
+ * (i.e their BRBINF_EL1.TX set) have been aborted.
+ *
+ * Similarly BRBFCR_EL1.LASTFAILED set, indicate that all preceding
+ * consecutive branch records upto the last record, which were in a
+ * transaction (i.e their BRBINF_EL1.TX set) have been aborted.
+ *
+ * --------------------------------- -------------------
+ * | 00 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX success]
+ * --------------------------------- -------------------
+ * | 01 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX success]
+ * --------------------------------- -------------------
+ * | 02 | BRBSRC | BRBTGT | BRBINF | | TX = 0 | LF = 0 |
+ * --------------------------------- -------------------
+ * | 03 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed]
+ * --------------------------------- -------------------
+ * | 04 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed]
+ * --------------------------------- -------------------
+ * | 05 | BRBSRC | BRBTGT | BRBINF | | TX = 0 | LF = 1 |
+ * --------------------------------- -------------------
+ * | .. | BRBSRC | BRBTGT | BRBINF | | TX = 0 | LF = 0 |
+ * --------------------------------- -------------------
+ * | 61 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed]
+ * --------------------------------- -------------------
+ * | 62 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed]
+ * --------------------------------- -------------------
+ * | 63 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed]
+ * --------------------------------- -------------------
+ *
+ * BRBFCR_EL1.LASTFAILED == 1
+ *
+ * Here BRBFCR_EL1.LASTFAILED failes all those consecutive and also
+ * in transaction branches near the end of the BRBE buffer.
+ */
+static void process_branch_aborts(struct pmu_hw_events *cpuc)
+{
+	u64 brbfcr = read_sysreg_s(SYS_BRBFCR_EL1);
+	bool lastfailed = !!(brbfcr & BRBFCR_EL1_LASTFAILED);
+	int idx = cpuc->brbe_nr - 1;
+
+	do {
+		if (cpuc->branches->brbe_entries[idx].in_tx) {
+			cpuc->branches->brbe_entries[idx].abort = lastfailed;
+		} else {
+			lastfailed = cpuc->branches->brbe_entries[idx].abort;
+			cpuc->branches->brbe_entries[idx].abort = false;
+		}
+	} while (idx--, idx >= 0);
+}
+
+void arm64_pmu_brbe_read(struct pmu_hw_events *cpuc, struct perf_event *event)
+{
+	u64 brbinf;
+	int idx;
+
+	if (brbe_disabled(cpuc))
+		return;
+
+	set_brbe_paused();
+	for (idx = 0; idx < cpuc->brbe_nr; idx++) {
+		select_brbe_bank_index(idx);
+		brbinf = get_brbinf_reg(idx);
+		/*
+		 * There are no valid entries anymore on the buffer.
+		 * Abort the branch record processing to save some
+		 * cycles and also reduce the capture/process load
+		 * for the user space as well.
+		 */
+		if (brbe_invalid(brbinf))
+			break;
+
+		if (brbe_valid(brbinf)) {
+			cpuc->branches->brbe_entries[idx].from =  get_brbsrc_reg(idx);
+			cpuc->branches->brbe_entries[idx].to =  get_brbtgt_reg(idx);
+		} else if (brbe_source(brbinf)) {
+			cpuc->branches->brbe_entries[idx].from =  get_brbsrc_reg(idx);
+			cpuc->branches->brbe_entries[idx].to = 0;
+		} else if (brbe_target(brbinf)) {
+			cpuc->branches->brbe_entries[idx].from = 0;
+			cpuc->branches->brbe_entries[idx].to =  get_brbtgt_reg(idx);
+		}
+		capture_brbe_flags(cpuc, event, brbinf, idx);
+	}
+	cpuc->branches->brbe_stack.nr = idx;
+	cpuc->branches->brbe_stack.hw_idx = -1ULL;
+	process_branch_aborts(cpuc);
+}
+
+void arm64_pmu_brbe_reset(struct pmu_hw_events *cpuc)
+{
+	if (brbe_disabled(cpuc))
+		return;
+
+	asm volatile(BRB_IALL);
+	isb();
+}
diff --git a/drivers/perf/arm_pmu_brbe.h b/drivers/perf/arm_pmu_brbe.h
new file mode 100644
index 000000000000..22c4b25b1777
--- /dev/null
+++ b/drivers/perf/arm_pmu_brbe.h
@@ -0,0 +1,259 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Branch Record Buffer Extension Helpers.
+ *
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Author: Anshuman Khandual <anshuman.khandual@arm.com>
+ */
+#define pr_fmt(fmt) "brbe: " fmt
+
+#include <linux/perf/arm_pmu.h>
+
+/*
+ * BRBE Instructions
+ *
+ * BRB_IALL : Invalidate the entire buffer
+ * BRB_INJ  : Inject latest branch record derived from [BRBSRCINJ, BRBTGTINJ, BRBINFINJ]
+ */
+#define BRB_IALL __emit_inst(0xD5000000 | sys_insn(1, 1, 7, 2, 4) | (0x1f))
+#define BRB_INJ  __emit_inst(0xD5000000 | sys_insn(1, 1, 7, 2, 5) | (0x1f))
+
+/*
+ * BRBE Buffer Organization
+ *
+ * BRBE buffer is arranged as multiple banks of 32 branch record
+ * entries each. An indivdial branch record in a given bank could
+ * be accessedi, after selecting the bank in BRBFCR_EL1.BANK and
+ * accessing the registers i.e [BRBSRC, BRBTGT, BRBINF] set with
+ * indices [0..31].
+ *
+ * Bank 0
+ *
+ *	---------------------------------	------
+ *	| 00 | BRBSRC | BRBTGT | BRBINF |	| 00 |
+ *	---------------------------------	------
+ *	| 01 | BRBSRC | BRBTGT | BRBINF |	| 01 |
+ *	---------------------------------	------
+ *	| .. | BRBSRC | BRBTGT | BRBINF |	| .. |
+ *	---------------------------------	------
+ *	| 31 | BRBSRC | BRBTGT | BRBINF |	| 31 |
+ *	---------------------------------	------
+ *
+ * Bank 1
+ *
+ *	---------------------------------	------
+ *	| 32 | BRBSRC | BRBTGT | BRBINF |	| 00 |
+ *	---------------------------------	------
+ *	| 33 | BRBSRC | BRBTGT | BRBINF |	| 01 |
+ *	---------------------------------	------
+ *	| .. | BRBSRC | BRBTGT | BRBINF |	| .. |
+ *	---------------------------------	------
+ *	| 63 | BRBSRC | BRBTGT | BRBINF |	| 31 |
+ *	---------------------------------	------
+ */
+#define BRBE_BANK0_IDX_MIN 0
+#define BRBE_BANK0_IDX_MAX 31
+#define BRBE_BANK1_IDX_MIN 32
+#define BRBE_BANK1_IDX_MAX 63
+
+#define RETURN_READ_BRBSRCN(n) \
+	read_sysreg_s(SYS_BRBSRC##n##_EL1)
+
+#define RETURN_READ_BRBTGTN(n) \
+	read_sysreg_s(SYS_BRBTGT##n##_EL1)
+
+#define RETURN_READ_BRBINFN(n) \
+	read_sysreg_s(SYS_BRBINF##n##_EL1)
+
+#define BRBE_REGN_CASE(n, case_macro) \
+	case n: return case_macro(n); break
+
+#define BRBE_REGN_SWITCH(x, case_macro)				\
+	do {							\
+		switch (x) {					\
+		BRBE_REGN_CASE(0, case_macro);			\
+		BRBE_REGN_CASE(1, case_macro);			\
+		BRBE_REGN_CASE(2, case_macro);			\
+		BRBE_REGN_CASE(3, case_macro);			\
+		BRBE_REGN_CASE(4, case_macro);			\
+		BRBE_REGN_CASE(5, case_macro);			\
+		BRBE_REGN_CASE(6, case_macro);			\
+		BRBE_REGN_CASE(7, case_macro);			\
+		BRBE_REGN_CASE(8, case_macro);			\
+		BRBE_REGN_CASE(9, case_macro);			\
+		BRBE_REGN_CASE(10, case_macro);			\
+		BRBE_REGN_CASE(11, case_macro);			\
+		BRBE_REGN_CASE(12, case_macro);			\
+		BRBE_REGN_CASE(13, case_macro);			\
+		BRBE_REGN_CASE(14, case_macro);			\
+		BRBE_REGN_CASE(15, case_macro);			\
+		BRBE_REGN_CASE(16, case_macro);			\
+		BRBE_REGN_CASE(17, case_macro);			\
+		BRBE_REGN_CASE(18, case_macro);			\
+		BRBE_REGN_CASE(19, case_macro);			\
+		BRBE_REGN_CASE(20, case_macro);			\
+		BRBE_REGN_CASE(21, case_macro);			\
+		BRBE_REGN_CASE(22, case_macro);			\
+		BRBE_REGN_CASE(23, case_macro);			\
+		BRBE_REGN_CASE(24, case_macro);			\
+		BRBE_REGN_CASE(25, case_macro);			\
+		BRBE_REGN_CASE(26, case_macro);			\
+		BRBE_REGN_CASE(27, case_macro);			\
+		BRBE_REGN_CASE(28, case_macro);			\
+		BRBE_REGN_CASE(29, case_macro);			\
+		BRBE_REGN_CASE(30, case_macro);			\
+		BRBE_REGN_CASE(31, case_macro);			\
+		default:					\
+			pr_warn("unknown register index\n");	\
+			return -1;				\
+		}						\
+	} while (0)
+
+static inline int buffer_to_brbe_idx(int buffer_idx)
+{
+	return buffer_idx % 32;
+}
+
+static inline u64 get_brbsrc_reg(int buffer_idx)
+{
+	int brbe_idx = buffer_to_brbe_idx(buffer_idx);
+
+	BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBSRCN);
+}
+
+static inline u64 get_brbtgt_reg(int buffer_idx)
+{
+	int brbe_idx = buffer_to_brbe_idx(buffer_idx);
+
+	BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBTGTN);
+}
+
+static inline u64 get_brbinf_reg(int buffer_idx)
+{
+	int brbe_idx = buffer_to_brbe_idx(buffer_idx);
+
+	BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBINFN);
+}
+
+static inline u64 brbe_record_valid(u64 brbinf)
+{
+	return (brbinf & BRBINF_EL1_VALID_MASK) >> BRBINF_EL1_VALID_SHIFT;
+}
+
+static inline bool brbe_invalid(u64 brbinf)
+{
+	return brbe_record_valid(brbinf) == BRBINF_EL1_VALID_NONE;
+}
+
+static inline bool brbe_valid(u64 brbinf)
+{
+	return brbe_record_valid(brbinf) == BRBINF_EL1_VALID_FULL;
+}
+
+static inline bool brbe_source(u64 brbinf)
+{
+	return brbe_record_valid(brbinf) == BRBINF_EL1_VALID_SOURCE;
+}
+
+static inline bool brbe_target(u64 brbinf)
+{
+	return brbe_record_valid(brbinf) == BRBINF_EL1_VALID_TARGET;
+}
+
+static inline int brbe_fetch_cycles(u64 brbinf)
+{
+	/*
+	 * Captured cycle count is unknown and hence
+	 * should not be passed on the user space.
+	 */
+	if (brbinf & BRBINF_EL1_CCU)
+		return 0;
+
+	return (brbinf & BRBINF_EL1_CC_MASK) >> BRBINF_EL1_CC_SHIFT;
+}
+
+static inline int brbe_fetch_type(u64 brbinf)
+{
+	return (brbinf & BRBINF_EL1_TYPE_MASK) >> BRBINF_EL1_TYPE_SHIFT;
+}
+
+static inline int brbe_fetch_el(u64 brbinf)
+{
+	return (brbinf & BRBINF_EL1_EL_MASK) >> BRBINF_EL1_EL_SHIFT;
+}
+
+static inline int brbe_fetch_numrec(u64 brbidr)
+{
+	return (brbidr & BRBIDR0_EL1_NUMREC_MASK) >> BRBIDR0_EL1_NUMREC_SHIFT;
+}
+
+static inline int brbe_fetch_format(u64 brbidr)
+{
+	return (brbidr & BRBIDR0_EL1_FORMAT_MASK) >> BRBIDR0_EL1_FORMAT_SHIFT;
+}
+
+static inline int brbe_fetch_cc_bits(u64 brbidr)
+{
+	return (brbidr & BRBIDR0_EL1_CC_MASK) >> BRBIDR0_EL1_CC_SHIFT;
+}
+
+static inline void select_brbe_bank(int bank)
+{
+	static int brbe_current_bank = -1;
+	u64 brbfcr;
+
+	if (brbe_current_bank == bank)
+		return;
+
+	WARN_ON(bank > 1);
+	brbfcr = read_sysreg_s(SYS_BRBFCR_EL1);
+	brbfcr &= ~BRBFCR_EL1_BANK_MASK;
+	brbfcr |= ((bank << BRBFCR_EL1_BANK_SHIFT) & BRBFCR_EL1_BANK_MASK);
+	write_sysreg_s(brbfcr, SYS_BRBFCR_EL1);
+	isb();
+	brbe_current_bank = bank;
+}
+
+static inline void select_brbe_bank_index(int buffer_idx)
+{
+	switch (buffer_idx) {
+	case BRBE_BANK0_IDX_MIN ... BRBE_BANK0_IDX_MAX:
+		select_brbe_bank(0);
+		break;
+	case BRBE_BANK1_IDX_MIN ... BRBE_BANK1_IDX_MAX:
+		select_brbe_bank(1);
+		break;
+	default:
+		pr_warn("unsupported BRBE index\n");
+	}
+}
+
+static inline bool valid_brbe_nr(int brbe_nr)
+{
+	switch (brbe_nr) {
+	case BRBIDR0_EL1_NUMREC_8:
+	case BRBIDR0_EL1_NUMREC_16:
+	case BRBIDR0_EL1_NUMREC_32:
+	case BRBIDR0_EL1_NUMREC_64:
+		return true;
+	default:
+		pr_warn("unsupported BRBE entries\n");
+		return false;
+	}
+}
+
+static inline bool brbe_paused(void)
+{
+	u64 brbfcr = read_sysreg_s(SYS_BRBFCR_EL1);
+
+	return brbfcr & BRBFCR_EL1_PAUSED;
+}
+
+static inline void set_brbe_paused(void)
+{
+	u64 brbfcr = read_sysreg_s(SYS_BRBFCR_EL1);
+
+	write_sysreg_s(brbfcr | BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1);
+	isb();
+}
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index bda0d9984a98..9c23b2b58b3d 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -168,6 +168,26 @@  struct arm_pmu {
 	unsigned long acpi_cpuid;
 };
 
+#ifdef CONFIG_ARM_BRBE_PMU
+void arm64_pmu_brbe_filter(struct pmu_hw_events *hw_events, struct perf_event *event);
+void arm64_pmu_brbe_read(struct pmu_hw_events *cpuc, struct perf_event *event);
+void arm64_pmu_brbe_disable(struct pmu_hw_events *cpuc);
+void arm64_pmu_brbe_enable(struct pmu_hw_events *cpuc);
+void arm64_pmu_brbe_probe(struct pmu_hw_events *cpuc);
+void arm64_pmu_brbe_reset(struct pmu_hw_events *cpuc);
+bool arm64_pmu_brbe_supported(struct perf_event *event);
+#else
+static inline void arm64_pmu_brbe_filter(struct pmu_hw_events *hw_events, struct perf_event *event)
+{
+}
+static inline void arm64_pmu_brbe_read(struct pmu_hw_events *cpuc, struct perf_event *event) { }
+static inline void arm64_pmu_brbe_disable(struct pmu_hw_events *cpuc) { }
+static inline void arm64_pmu_brbe_enable(struct pmu_hw_events *cpuc) { }
+static inline void arm64_pmu_brbe_probe(struct pmu_hw_events *cpuc) { }
+static inline void arm64_pmu_brbe_reset(struct pmu_hw_events *cpuc) { }
+static inline bool arm64_pmu_brbe_supported(struct perf_event *event) {return false; }
+#endif
+
 #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
 
 u64 armpmu_event_update(struct perf_event *event);