[4/8] perf/core: Add perf_sample_save_brstack() helper
Commit Message
When it saves the branch stack to the perf sample data, it needs to
update the sample flags and the dynamic size. To make sure this,
add the perf_sample_save_brstack() helper and convert all call sites.
Cc: linuxppc-dev@lists.ozlabs.org
Cc: x86@kernel.org
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
arch/powerpc/perf/core-book3s.c | 3 +-
arch/x86/events/amd/core.c | 6 +--
arch/x86/events/intel/core.c | 6 +--
arch/x86/events/intel/ds.c | 9 ++---
include/linux/perf_event.h | 66 ++++++++++++++++++++-------------
kernel/events/core.c | 16 +++-----
6 files changed, 53 insertions(+), 53 deletions(-)
Comments
> On 18-Jan-2023, at 11:35 AM, Namhyung Kim <namhyung@kernel.org> wrote:
>
> When it saves the branch stack to the perf sample data, it needs to
> update the sample flags and the dynamic size. To make sure this,
> add the perf_sample_save_brstack() helper and convert all call sites.
>
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: x86@kernel.org
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Acked-by: Jiri Olsa <jolsa@kernel.org>
> Tested-by: Jiri Olsa <jolsa@kernel.org>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Hi Namhyung,
Changes looks good to me.
Acked-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Thanks
Athira
> ---
> arch/powerpc/perf/core-book3s.c | 3 +-
> arch/x86/events/amd/core.c | 6 +--
> arch/x86/events/intel/core.c | 6 +--
> arch/x86/events/intel/ds.c | 9 ++---
> include/linux/perf_event.h | 66 ++++++++++++++++++++-------------
> kernel/events/core.c | 16 +++-----
> 6 files changed, 53 insertions(+), 53 deletions(-)
>
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index bf318dd9b709..8c1f7def596e 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -2313,8 +2313,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
> struct cpu_hw_events *cpuhw;
> cpuhw = this_cpu_ptr(&cpu_hw_events);
> power_pmu_bhrb_read(event, cpuhw);
> - data.br_stack = &cpuhw->bhrb_stack;
> - data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
> + perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack);
> }
>
> if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
> diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
> index d6f3703e4119..463f3eb8bbd7 100644
> --- a/arch/x86/events/amd/core.c
> +++ b/arch/x86/events/amd/core.c
> @@ -928,10 +928,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
> if (!x86_perf_event_set_period(event))
> continue;
>
> - if (has_branch_stack(event)) {
> - data.br_stack = &cpuc->lbr_stack;
> - data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
> - }
> + if (has_branch_stack(event))
> + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
>
> if (perf_event_overflow(event, &data, regs))
> x86_pmu_stop(event, 0);
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index 29d2d0411caf..14f0a746257d 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -3036,10 +3036,8 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
>
> perf_sample_data_init(&data, 0, event->hw.last_period);
>
> - if (has_branch_stack(event)) {
> - data.br_stack = &cpuc->lbr_stack;
> - data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
> - }
> + if (has_branch_stack(event))
> + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
>
> if (perf_event_overflow(event, &data, regs))
> x86_pmu_stop(event, 0);
> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index 158cf845fc80..07c8a2cdc3ee 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -1720,10 +1720,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
> data->sample_flags |= PERF_SAMPLE_TIME;
> }
>
> - if (has_branch_stack(event)) {
> - data->br_stack = &cpuc->lbr_stack;
> - data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
> - }
> + if (has_branch_stack(event))
> + perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
> }
>
> static void adaptive_pebs_save_regs(struct pt_regs *regs,
> @@ -1883,8 +1881,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
>
> if (has_branch_stack(event)) {
> intel_pmu_store_pebs_lbrs(lbr);
> - data->br_stack = &cpuc->lbr_stack;
> - data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
> + perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
> }
> }
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 569dfac5887f..7db0e9cc2682 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1102,6 +1102,31 @@ extern u64 perf_event_read_value(struct perf_event *event,
>
> extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
>
> +static inline bool branch_sample_no_flags(const struct perf_event *event)
> +{
> + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
> +}
> +
> +static inline bool branch_sample_no_cycles(const struct perf_event *event)
> +{
> + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
> +}
> +
> +static inline bool branch_sample_type(const struct perf_event *event)
> +{
> + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
> +}
> +
> +static inline bool branch_sample_hw_index(const struct perf_event *event)
> +{
> + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
> +}
> +
> +static inline bool branch_sample_priv(const struct perf_event *event)
> +{
> + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
> +}
> +
>
> struct perf_sample_data {
> /*
> @@ -1210,6 +1235,21 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
> data->sample_flags |= PERF_SAMPLE_RAW;
> }
>
> +static inline void perf_sample_save_brstack(struct perf_sample_data *data,
> + struct perf_event *event,
> + struct perf_branch_stack *brs)
> +{
> + int size = sizeof(u64); /* nr */
> +
> + if (branch_sample_hw_index(event))
> + size += sizeof(u64);
> + size += brs->nr * sizeof(struct perf_branch_entry);
> +
> + data->br_stack = brs;
> + data->dyn_size += size;
> + data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
> +}
> +
> /*
> * Clear all bitfields in the perf_branch_entry.
> * The to and from fields are not cleared because they are
> @@ -1827,30 +1867,4 @@ static inline void perf_lopwr_cb(bool mode)
> }
> #endif
>
> -#ifdef CONFIG_PERF_EVENTS
> -static inline bool branch_sample_no_flags(const struct perf_event *event)
> -{
> - return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
> -}
> -
> -static inline bool branch_sample_no_cycles(const struct perf_event *event)
> -{
> - return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
> -}
> -
> -static inline bool branch_sample_type(const struct perf_event *event)
> -{
> - return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
> -}
> -
> -static inline bool branch_sample_hw_index(const struct perf_event *event)
> -{
> - return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
> -}
> -
> -static inline bool branch_sample_priv(const struct perf_event *event)
> -{
> - return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
> -}
> -#endif /* CONFIG_PERF_EVENTS */
> #endif /* _LINUX_PERF_EVENT_H */
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 133894ae5e30..0218b6ffaf36 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7317,7 +7317,7 @@ void perf_output_sample(struct perf_output_handle *handle,
> }
>
> if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
> - if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) {
> + if (data->br_stack) {
> size_t size;
>
> size = data->br_stack->nr
> @@ -7594,16 +7594,10 @@ void perf_prepare_sample(struct perf_event_header *header,
> data->sample_flags |= PERF_SAMPLE_RAW;
> }
>
> - if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
> - int size = sizeof(u64); /* nr */
> - if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) {
> - if (branch_sample_hw_index(event))
> - size += sizeof(u64);
> -
> - size += data->br_stack->nr
> - * sizeof(struct perf_branch_entry);
> - }
> - data->dyn_size += size;
> + if (filtered_sample_type & PERF_SAMPLE_BRANCH_STACK) {
> + data->br_stack = NULL;
> + data->dyn_size += sizeof(u64);
> + data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
> }
>
> if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
> --
> 2.39.0.314.g84b9a713c41-goog
>
@@ -2313,8 +2313,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
struct cpu_hw_events *cpuhw;
cpuhw = this_cpu_ptr(&cpu_hw_events);
power_pmu_bhrb_read(event, cpuhw);
- data.br_stack = &cpuhw->bhrb_stack;
- data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
+ perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack);
}
if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
@@ -928,10 +928,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!x86_perf_event_set_period(event))
continue;
- if (has_branch_stack(event)) {
- data.br_stack = &cpuc->lbr_stack;
- data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
- }
+ if (has_branch_stack(event))
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
@@ -3036,10 +3036,8 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
perf_sample_data_init(&data, 0, event->hw.last_period);
- if (has_branch_stack(event)) {
- data.br_stack = &cpuc->lbr_stack;
- data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
- }
+ if (has_branch_stack(event))
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
@@ -1720,10 +1720,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
data->sample_flags |= PERF_SAMPLE_TIME;
}
- if (has_branch_stack(event)) {
- data->br_stack = &cpuc->lbr_stack;
- data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
- }
+ if (has_branch_stack(event))
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
}
static void adaptive_pebs_save_regs(struct pt_regs *regs,
@@ -1883,8 +1881,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
if (has_branch_stack(event)) {
intel_pmu_store_pebs_lbrs(lbr);
- data->br_stack = &cpuc->lbr_stack;
- data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
}
}
@@ -1102,6 +1102,31 @@ extern u64 perf_event_read_value(struct perf_event *event,
extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
+static inline bool branch_sample_no_flags(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
+}
+
+static inline bool branch_sample_no_cycles(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
+}
+
+static inline bool branch_sample_type(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
+}
+
+static inline bool branch_sample_hw_index(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
+}
+
+static inline bool branch_sample_priv(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
+}
+
struct perf_sample_data {
/*
@@ -1210,6 +1235,21 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
data->sample_flags |= PERF_SAMPLE_RAW;
}
+static inline void perf_sample_save_brstack(struct perf_sample_data *data,
+ struct perf_event *event,
+ struct perf_branch_stack *brs)
+{
+ int size = sizeof(u64); /* nr */
+
+ if (branch_sample_hw_index(event))
+ size += sizeof(u64);
+ size += brs->nr * sizeof(struct perf_branch_entry);
+
+ data->br_stack = brs;
+ data->dyn_size += size;
+ data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
+}
+
/*
* Clear all bitfields in the perf_branch_entry.
* The to and from fields are not cleared because they are
@@ -1827,30 +1867,4 @@ static inline void perf_lopwr_cb(bool mode)
}
#endif
-#ifdef CONFIG_PERF_EVENTS
-static inline bool branch_sample_no_flags(const struct perf_event *event)
-{
- return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
-}
-
-static inline bool branch_sample_no_cycles(const struct perf_event *event)
-{
- return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
-}
-
-static inline bool branch_sample_type(const struct perf_event *event)
-{
- return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
-}
-
-static inline bool branch_sample_hw_index(const struct perf_event *event)
-{
- return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
-}
-
-static inline bool branch_sample_priv(const struct perf_event *event)
-{
- return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
-}
-#endif /* CONFIG_PERF_EVENTS */
#endif /* _LINUX_PERF_EVENT_H */
@@ -7317,7 +7317,7 @@ void perf_output_sample(struct perf_output_handle *handle,
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
- if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) {
+ if (data->br_stack) {
size_t size;
size = data->br_stack->nr
@@ -7594,16 +7594,10 @@ void perf_prepare_sample(struct perf_event_header *header,
data->sample_flags |= PERF_SAMPLE_RAW;
}
- if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
- int size = sizeof(u64); /* nr */
- if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) {
- if (branch_sample_hw_index(event))
- size += sizeof(u64);
-
- size += data->br_stack->nr
- * sizeof(struct perf_branch_entry);
- }
- data->dyn_size += size;
+ if (filtered_sample_type & PERF_SAMPLE_BRANCH_STACK) {
+ data->br_stack = NULL;
+ data->dyn_size += sizeof(u64);
+ data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))