[V5,2/8] perf/x86: Add PERF_X86_EVENT_NEEDS_BRANCH_STACK flag

Message ID 20231025201626.3000228-2-kan.liang@linux.intel.com
State New
Headers
Series [V5,1/8] perf: Add branch stack counters |

Commit Message

Liang, Kan Oct. 25, 2023, 8:16 p.m. UTC
  From: Kan Liang <kan.liang@linux.intel.com>

Currently, branch_sample_type !=0 is used to check whether a branch
stack setup is required. But it doesn't check the sample type,
unnecessary branch stack setup may be done for a counting event. E.g.,
perf record -e "{branch-instructions,branch-misses}:S" -j any
Also, the event only with the new PERF_SAMPLE_BRANCH_COUNTERS branch
sample type may not require a branch stack setup either.

Add a new flag NEEDS_BRANCH_STACK to indicate whether the event requires
a branch stack setup. Replace the needs_branch_stack() by checking the
new flag.

The counting event check is implemented here. The later patch will take
the new PERF_SAMPLE_BRANCH_COUNTERS into account.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---

No changes since V4

 arch/x86/events/intel/core.c       | 14 +++++++++++---
 arch/x86/events/perf_event_flags.h |  1 +
 2 files changed, 12 insertions(+), 3 deletions(-)
  

Comments

Arnaldo Carvalho de Melo Nov. 6, 2023, 9:12 p.m. UTC | #1
Em Wed, Oct 25, 2023 at 01:16:20PM -0700, kan.liang@linux.intel.com escreveu:
> From: Kan Liang <kan.liang@linux.intel.com>
> 
> Currently, branch_sample_type !=0 is used to check whether a branch
> stack setup is required. But it doesn't check the sample type,
> unnecessary branch stack setup may be done for a counting event. E.g.,
> perf record -e "{branch-instructions,branch-misses}:S" -j any
> Also, the event only with the new PERF_SAMPLE_BRANCH_COUNTERS branch
> sample type may not require a branch stack setup either.
> 
> Add a new flag NEEDS_BRANCH_STACK to indicate whether the event requires
> a branch stack setup. Replace the needs_branch_stack() by checking the
> new flag.
> 
> The counting event check is implemented here. The later patch will take
> the new PERF_SAMPLE_BRANCH_COUNTERS into account.
> 
> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> ---
> 
> No changes since V4

So I saw this on tip/perf/urgent, I'm picking the tools bits then.

- Arnaldo
 
>  arch/x86/events/intel/core.c       | 14 +++++++++++---
>  arch/x86/events/perf_event_flags.h |  1 +
>  2 files changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index 41a164764a84..a99449c0d77c 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -2527,9 +2527,14 @@ static void intel_pmu_assign_event(struct perf_event *event, int idx)
>  		perf_report_aux_output_id(event, idx);
>  }
>  
> +static __always_inline bool intel_pmu_needs_branch_stack(struct perf_event *event)
> +{
> +	return event->hw.flags & PERF_X86_EVENT_NEEDS_BRANCH_STACK;
> +}
> +
>  static void intel_pmu_del_event(struct perf_event *event)
>  {
> -	if (needs_branch_stack(event))
> +	if (intel_pmu_needs_branch_stack(event))
>  		intel_pmu_lbr_del(event);
>  	if (event->attr.precise_ip)
>  		intel_pmu_pebs_del(event);
> @@ -2820,7 +2825,7 @@ static void intel_pmu_add_event(struct perf_event *event)
>  {
>  	if (event->attr.precise_ip)
>  		intel_pmu_pebs_add(event);
> -	if (needs_branch_stack(event))
> +	if (intel_pmu_needs_branch_stack(event))
>  		intel_pmu_lbr_add(event);
>  }
>  
> @@ -3897,7 +3902,10 @@ static int intel_pmu_hw_config(struct perf_event *event)
>  			x86_pmu.pebs_aliases(event);
>  	}
>  
> -	if (needs_branch_stack(event)) {
> +	if (needs_branch_stack(event) && is_sampling_event(event))
> +		event->hw.flags  |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
> +
> +	if (intel_pmu_needs_branch_stack(event)) {
>  		ret = intel_pmu_setup_lbr_filter(event);
>  		if (ret)
>  			return ret;
> diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
> index 1dc19b9b4426..a1685981c520 100644
> --- a/arch/x86/events/perf_event_flags.h
> +++ b/arch/x86/events/perf_event_flags.h
> @@ -20,3 +20,4 @@ PERF_ARCH(TOPDOWN,		0x04000) /* Count Topdown slots/metrics events */
>  PERF_ARCH(PEBS_STLAT,		0x08000) /* st+stlat data address sampling */
>  PERF_ARCH(AMD_BRS,		0x10000) /* AMD Branch Sampling */
>  PERF_ARCH(PEBS_LAT_HYBRID,	0x20000) /* ld and st lat for hybrid */
> +PERF_ARCH(NEEDS_BRANCH_STACK,	0x40000) /* require branch stack setup */
> -- 
> 2.35.1
>
  
Liang, Kan Nov. 6, 2023, 9:19 p.m. UTC | #2
On 2023-11-06 4:12 p.m., Arnaldo Carvalho de Melo wrote:
> Em Wed, Oct 25, 2023 at 01:16:20PM -0700, kan.liang@linux.intel.com escreveu:
>> From: Kan Liang <kan.liang@linux.intel.com>
>>
>> Currently, branch_sample_type !=0 is used to check whether a branch
>> stack setup is required. But it doesn't check the sample type,
>> unnecessary branch stack setup may be done for a counting event. E.g.,
>> perf record -e "{branch-instructions,branch-misses}:S" -j any
>> Also, the event only with the new PERF_SAMPLE_BRANCH_COUNTERS branch
>> sample type may not require a branch stack setup either.
>>
>> Add a new flag NEEDS_BRANCH_STACK to indicate whether the event requires
>> a branch stack setup. Replace the needs_branch_stack() by checking the
>> new flag.
>>
>> The counting event check is implemented here. The later patch will take
>> the new PERF_SAMPLE_BRANCH_COUNTERS into account.
>>
>> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
>> ---
>>
>> No changes since V4
> 
> So I saw this on tip/perf/urgent, I'm picking the tools bits then.

Thanks Arnaldo.

Ian has already reviewed the tool parts.

But I still owe a test case for the feature. I will post a patch later.
https://lore.kernel.org/lkml/acbb895a-475e-4679-98fc-6b90c05a00af@linux.intel.com/

Thanks,
Kan

> 
> - Arnaldo
>  
>>  arch/x86/events/intel/core.c       | 14 +++++++++++---
>>  arch/x86/events/perf_event_flags.h |  1 +
>>  2 files changed, 12 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>> index 41a164764a84..a99449c0d77c 100644
>> --- a/arch/x86/events/intel/core.c
>> +++ b/arch/x86/events/intel/core.c
>> @@ -2527,9 +2527,14 @@ static void intel_pmu_assign_event(struct perf_event *event, int idx)
>>  		perf_report_aux_output_id(event, idx);
>>  }
>>  
>> +static __always_inline bool intel_pmu_needs_branch_stack(struct perf_event *event)
>> +{
>> +	return event->hw.flags & PERF_X86_EVENT_NEEDS_BRANCH_STACK;
>> +}
>> +
>>  static void intel_pmu_del_event(struct perf_event *event)
>>  {
>> -	if (needs_branch_stack(event))
>> +	if (intel_pmu_needs_branch_stack(event))
>>  		intel_pmu_lbr_del(event);
>>  	if (event->attr.precise_ip)
>>  		intel_pmu_pebs_del(event);
>> @@ -2820,7 +2825,7 @@ static void intel_pmu_add_event(struct perf_event *event)
>>  {
>>  	if (event->attr.precise_ip)
>>  		intel_pmu_pebs_add(event);
>> -	if (needs_branch_stack(event))
>> +	if (intel_pmu_needs_branch_stack(event))
>>  		intel_pmu_lbr_add(event);
>>  }
>>  
>> @@ -3897,7 +3902,10 @@ static int intel_pmu_hw_config(struct perf_event *event)
>>  			x86_pmu.pebs_aliases(event);
>>  	}
>>  
>> -	if (needs_branch_stack(event)) {
>> +	if (needs_branch_stack(event) && is_sampling_event(event))
>> +		event->hw.flags  |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
>> +
>> +	if (intel_pmu_needs_branch_stack(event)) {
>>  		ret = intel_pmu_setup_lbr_filter(event);
>>  		if (ret)
>>  			return ret;
>> diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
>> index 1dc19b9b4426..a1685981c520 100644
>> --- a/arch/x86/events/perf_event_flags.h
>> +++ b/arch/x86/events/perf_event_flags.h
>> @@ -20,3 +20,4 @@ PERF_ARCH(TOPDOWN,		0x04000) /* Count Topdown slots/metrics events */
>>  PERF_ARCH(PEBS_STLAT,		0x08000) /* st+stlat data address sampling */
>>  PERF_ARCH(AMD_BRS,		0x10000) /* AMD Branch Sampling */
>>  PERF_ARCH(PEBS_LAT_HYBRID,	0x20000) /* ld and st lat for hybrid */
>> +PERF_ARCH(NEEDS_BRANCH_STACK,	0x40000) /* require branch stack setup */
>> -- 
>> 2.35.1
>>
>
  
Arnaldo Carvalho de Melo Nov. 7, 2023, 3:11 p.m. UTC | #3
Em Mon, Nov 06, 2023 at 04:19:13PM -0500, Liang, Kan escreveu:
> 
> 
> On 2023-11-06 4:12 p.m., Arnaldo Carvalho de Melo wrote:
> > Em Wed, Oct 25, 2023 at 01:16:20PM -0700, kan.liang@linux.intel.com escreveu:
> >> From: Kan Liang <kan.liang@linux.intel.com>
> >>
> >> Currently, branch_sample_type !=0 is used to check whether a branch
> >> stack setup is required. But it doesn't check the sample type,
> >> unnecessary branch stack setup may be done for a counting event. E.g.,
> >> perf record -e "{branch-instructions,branch-misses}:S" -j any
> >> Also, the event only with the new PERF_SAMPLE_BRANCH_COUNTERS branch
> >> sample type may not require a branch stack setup either.
> >>
> >> Add a new flag NEEDS_BRANCH_STACK to indicate whether the event requires
> >> a branch stack setup. Replace the needs_branch_stack() by checking the
> >> new flag.
> >>
> >> The counting event check is implemented here. The later patch will take
> >> the new PERF_SAMPLE_BRANCH_COUNTERS into account.
> >>
> >> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> >> ---
> >>
> >> No changes since V4
> > 
> > So I saw this on tip/perf/urgent, I'm picking the tools bits then.
> 
> Thanks Arnaldo.
> 
> Ian has already reviewed the tool parts.
> 
> But I still owe a test case for the feature. I will post a patch later.
> https://lore.kernel.org/lkml/acbb895a-475e-4679-98fc-6b90c05a00af@linux.intel.com/

I saw Ian's suggestion, and agree with it, we need to pair new features
with regression tests in 'perf test', thanks for working on it!

- Arnaldo
  
Arnaldo Carvalho de Melo Nov. 8, 2023, 9:31 p.m. UTC | #4
Em Tue, Nov 07, 2023 at 12:11:50PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Mon, Nov 06, 2023 at 04:19:13PM -0500, Liang, Kan escreveu:
> > On 2023-11-06 4:12 p.m., Arnaldo Carvalho de Melo wrote:
> > > Em Wed, Oct 25, 2023 at 01:16:20PM -0700, kan.liang@linux.intel.com escreveu:
> > Ian has already reviewed the tool parts.

> > But I still owe a test case for the feature. I will post a patch later.
> > https://lore.kernel.org/lkml/acbb895a-475e-4679-98fc-6b90c05a00af@linux.intel.com/

> I saw Ian's suggestion, and agree with it, we need to pair new features
> with regression tests in 'perf test', thanks for working on it!

Kan,

	I still have to bisect, but can you check if this works for you?


(gdb) run test -F -v 27
Starting program: /root/bin/perf test -F -v 27

 27: Sample parsing                                                  :
--- start ---

Program received signal SIGSEGV, Segmentation fault.
0x00000000004e4aa6 in evsel.parse_sample ()
Missing separate debuginfos, use: dnf debuginfo-install bzip2-libs-1.0.8-13.fc38.x86_64 cyrus-sasl-lib-2.1.28-9.fc38.x86_64 elfutils-debuginfod-client-0.189-3.fc38.x86_64 elfutils-libelf-0.189-3.fc38.x86_64 elfutils-libs-0.189-3.fc38.x86_64 glib2-2.76.5-2.fc38.x86_64 glibc-2.37-13.fc38.x86_64 keyutils-libs-1.6.1-6.fc38.x86_64 krb5-libs-1.21-3.fc38.x86_64 libbabeltrace-1.5.11-2.fc38.x86_64 libbrotli-1.0.9-11.fc38.x86_64 libcap-2.48-6.fc38.x86_64 libcom_err-1.46.5-4.fc38.x86_64 libcurl-8.0.1-5.fc38.x86_64 libevent-2.1.12-8.fc38.x86_64 libgcc-13.2.1-4.fc38.x86_64 libidn2-2.3.4-2.fc38.x86_64 libnghttp2-1.52.0-2.fc38.x86_64 libpfm-4.11.0-11.fc38.x86_64 libpsl-0.21.2-2.fc38.x86_64 libselinux-3.5-1.fc38.x86_64 libssh-0.10.5-1.fc38.x86_64 libstdc++-13.2.1-4.fc38.x86_64 libtraceevent-1.7.2-1.fc38.x86_64 libunistring1.0-1.0-1.fc38.x86_64 libunwind-1.6.2-7.fc38.x86_64 libuuid-2.38.1-4.fc38.x86_64 libxcrypt-4.4.36-1.fc38.x86_64 libzstd-1.5.5-1.fc38.x86_64 opencsd-1.3.3-1.fc38.x86_64 openldap-2.6.6-1.fc38.x86_64 openssl-libs-3.0.9-2.fc38.x86_64 perl-libs-5.36.1-497.fc38.x86_64 popt-1.19-2.fc38.x86_64 python3-libs-3.11.6-1.fc38.x86_64 slang-2.3.3-3.fc38.x86_64 zlib-1.2.13-3.fc38.x86_64
(gdb) bt
#0  0x00000000004e4aa6 in evsel.parse_sample ()
#1  0x00000000004b28dc in do_test ()
#2  0x00000000004b2acd in test.sample_parsing ()
#3  0x0000000000495348 in test_and_print.isra ()
#4  0x0000000000495f5d in cmd_test ()
#5  0x00000000004c2a29 in run_builtin ()
#6  0x000000000041053f in main ()
(gdb)
  
Liang, Kan Nov. 9, 2023, 4:14 p.m. UTC | #5
On 2023-11-08 4:31 p.m., Arnaldo Carvalho de Melo wrote:
> Em Tue, Nov 07, 2023 at 12:11:50PM -0300, Arnaldo Carvalho de Melo escreveu:
>> Em Mon, Nov 06, 2023 at 04:19:13PM -0500, Liang, Kan escreveu:
>>> On 2023-11-06 4:12 p.m., Arnaldo Carvalho de Melo wrote:
>>>> Em Wed, Oct 25, 2023 at 01:16:20PM -0700, kan.liang@linux.intel.com escreveu:
>>> Ian has already reviewed the tool parts.
> 
>>> But I still owe a test case for the feature. I will post a patch later.
>>> https://lore.kernel.org/lkml/acbb895a-475e-4679-98fc-6b90c05a00af@linux.intel.com/
> 
>> I saw Ian's suggestion, and agree with it, we need to pair new features
>> with regression tests in 'perf test', thanks for working on it!
> 
> Kan,
> 
> 	I still have to bisect, but can you check if this works for you?

The branch counters feature requires all the events to belong to a
group. There is no problem for the normal perf usage which usually
initializes an evlist even for a single evsel.
But perf test is special, which may not initialize an evlist. The Sample
parsing test case is one of the examples. It crashes with the
!evsel->evlist.

The below change should fix it. I will post a complete patch shortly.

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 58a9b8c82790..7a6a2d1f96db 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2355,6 +2355,10 @@ static inline bool
evsel__has_branch_counters(const struct evsel *evsel)
 {
        struct evsel *cur, *leader = evsel__leader(evsel);

+       /* The branch counters feature only supports group */
+       if (!leader || !evsel->evlist)
+               return false;
+
        evlist__for_each_entry(evsel->evlist, cur) {
                if ((leader == evsel__leader(cur)) &&
                    (cur->core.attr.branch_sample_type &
PERF_SAMPLE_BRANCH_COUNTERS))

Thanks,
Kan

> 
> 
> (gdb) run test -F -v 27
> Starting program: /root/bin/perf test -F -v 27
> 
>  27: Sample parsing                                                  :
> --- start ---
> 
> Program received signal SIGSEGV, Segmentation fault.
> 0x00000000004e4aa6 in evsel.parse_sample ()
> Missing separate debuginfos, use: dnf debuginfo-install bzip2-libs-1.0.8-13.fc38.x86_64 cyrus-sasl-lib-2.1.28-9.fc38.x86_64 elfutils-debuginfod-client-0.189-3.fc38.x86_64 elfutils-libelf-0.189-3.fc38.x86_64 elfutils-libs-0.189-3.fc38.x86_64 glib2-2.76.5-2.fc38.x86_64 glibc-2.37-13.fc38.x86_64 keyutils-libs-1.6.1-6.fc38.x86_64 krb5-libs-1.21-3.fc38.x86_64 libbabeltrace-1.5.11-2.fc38.x86_64 libbrotli-1.0.9-11.fc38.x86_64 libcap-2.48-6.fc38.x86_64 libcom_err-1.46.5-4.fc38.x86_64 libcurl-8.0.1-5.fc38.x86_64 libevent-2.1.12-8.fc38.x86_64 libgcc-13.2.1-4.fc38.x86_64 libidn2-2.3.4-2.fc38.x86_64 libnghttp2-1.52.0-2.fc38.x86_64 libpfm-4.11.0-11.fc38.x86_64 libpsl-0.21.2-2.fc38.x86_64 libselinux-3.5-1.fc38.x86_64 libssh-0.10.5-1.fc38.x86_64 libstdc++-13.2.1-4.fc38.x86_64 libtraceevent-1.7.2-1.fc38.x86_64 libunistring1.0-1.0-1.fc38.x86_64 libunwind-1.6.2-7.fc38.x86_64 libuuid-2.38.1-4.fc38.x86_64 libxcrypt-4.4.36-1.fc38.x86_64 libzstd-1.5.5-1.fc38.x86_64 opencsd-1.3.3-1.fc38.x86_64 openldap-2.6.6-1.fc38.x86_64 openssl-libs-3.0.9-2.fc38.x86_64 perl-libs-5.36.1-497.fc38.x86_64 popt-1.19-2.fc38.x86_64 python3-libs-3.11.6-1.fc38.x86_64 slang-2.3.3-3.fc38.x86_64 zlib-1.2.13-3.fc38.x86_64
> (gdb) bt
> #0  0x00000000004e4aa6 in evsel.parse_sample ()
> #1  0x00000000004b28dc in do_test ()
> #2  0x00000000004b2acd in test.sample_parsing ()
> #3  0x0000000000495348 in test_and_print.isra ()
> #4  0x0000000000495f5d in cmd_test ()
> #5  0x00000000004c2a29 in run_builtin ()
> #6  0x000000000041053f in main ()
> (gdb)
>
  
Arnaldo Carvalho de Melo Nov. 9, 2023, 4:45 p.m. UTC | #6
Em Thu, Nov 09, 2023 at 11:14:31AM -0500, Liang, Kan escreveu:
> 
> 
> On 2023-11-08 4:31 p.m., Arnaldo Carvalho de Melo wrote:
> > Em Tue, Nov 07, 2023 at 12:11:50PM -0300, Arnaldo Carvalho de Melo escreveu:
> >> Em Mon, Nov 06, 2023 at 04:19:13PM -0500, Liang, Kan escreveu:
> >>> On 2023-11-06 4:12 p.m., Arnaldo Carvalho de Melo wrote:
> >>>> Em Wed, Oct 25, 2023 at 01:16:20PM -0700, kan.liang@linux.intel.com escreveu:
> >>> Ian has already reviewed the tool parts.
> > 
> >>> But I still owe a test case for the feature. I will post a patch later.
> >>> https://lore.kernel.org/lkml/acbb895a-475e-4679-98fc-6b90c05a00af@linux.intel.com/
> > 
> >> I saw Ian's suggestion, and agree with it, we need to pair new features
> >> with regression tests in 'perf test', thanks for working on it!
> > 
> > Kan,
> > 
> > 	I still have to bisect, but can you check if this works for you?
> 
> The branch counters feature requires all the events to belong to a
> group. There is no problem for the normal perf usage which usually
> initializes an evlist even for a single evsel.
> But perf test is special, which may not initialize an evlist. The Sample
> parsing test case is one of the examples. It crashes with the
> !evsel->evlist.
> 
> The below change should fix it. I will post a complete patch shortly.

Thanks for the quick response, if all that is needed are the checks
below, I'll fold it into your original patch:

2ae01908298426e4 perf tools: Add branch counter knob

So that we don't regress, ok?

I'll add a note and the Link tag points to this discussion in case
people want to do historical digs in the future :-)

- Arnaldo
 
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 58a9b8c82790..7a6a2d1f96db 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -2355,6 +2355,10 @@ static inline bool
> evsel__has_branch_counters(const struct evsel *evsel)
>  {
>         struct evsel *cur, *leader = evsel__leader(evsel);
> 
> +       /* The branch counters feature only supports group */
> +       if (!leader || !evsel->evlist)
> +               return false;
> +
>         evlist__for_each_entry(evsel->evlist, cur) {
>                 if ((leader == evsel__leader(cur)) &&
>                     (cur->core.attr.branch_sample_type &
> PERF_SAMPLE_BRANCH_COUNTERS))
> 
> Thanks,
> Kan
> 
> > 
> > 
> > (gdb) run test -F -v 27
> > Starting program: /root/bin/perf test -F -v 27
> > 
> >  27: Sample parsing                                                  :
> > --- start ---
> > 
> > Program received signal SIGSEGV, Segmentation fault.
> > 0x00000000004e4aa6 in evsel.parse_sample ()
> > Missing separate debuginfos, use: dnf debuginfo-install bzip2-libs-1.0.8-13.fc38.x86_64 cyrus-sasl-lib-2.1.28-9.fc38.x86_64 elfutils-debuginfod-client-0.189-3.fc38.x86_64 elfutils-libelf-0.189-3.fc38.x86_64 elfutils-libs-0.189-3.fc38.x86_64 glib2-2.76.5-2.fc38.x86_64 glibc-2.37-13.fc38.x86_64 keyutils-libs-1.6.1-6.fc38.x86_64 krb5-libs-1.21-3.fc38.x86_64 libbabeltrace-1.5.11-2.fc38.x86_64 libbrotli-1.0.9-11.fc38.x86_64 libcap-2.48-6.fc38.x86_64 libcom_err-1.46.5-4.fc38.x86_64 libcurl-8.0.1-5.fc38.x86_64 libevent-2.1.12-8.fc38.x86_64 libgcc-13.2.1-4.fc38.x86_64 libidn2-2.3.4-2.fc38.x86_64 libnghttp2-1.52.0-2.fc38.x86_64 libpfm-4.11.0-11.fc38.x86_64 libpsl-0.21.2-2.fc38.x86_64 libselinux-3.5-1.fc38.x86_64 libssh-0.10.5-1.fc38.x86_64 libstdc++-13.2.1-4.fc38.x86_64 libtraceevent-1.7.2-1.fc38.x86_64 libunistring1.0-1.0-1.fc38.x86_64 libunwind-1.6.2-7.fc38.x86_64 libuuid-2.38.1-4.fc38.x86_64 libxcrypt-4.4.36-1.fc38.x86_64 libzstd-1.5.5-1.fc38.x86_64 opencsd-1.3.3-1.fc38.x86_64 openldap-2.6.6-1.fc38.x86_64 openssl-libs-3.0.9-2.fc38.x86_64 perl-libs-5.36.1-497.fc38.x86_64 popt-1.19-2.fc38.x86_64 python3-libs-3.11.6-1.fc38.x86_64 slang-2.3.3-3.fc38.x86_64 zlib-1.2.13-3.fc38.x86_64
> > (gdb) bt
> > #0  0x00000000004e4aa6 in evsel.parse_sample ()
> > #1  0x00000000004b28dc in do_test ()
> > #2  0x00000000004b2acd in test.sample_parsing ()
> > #3  0x0000000000495348 in test_and_print.isra ()
> > #4  0x0000000000495f5d in cmd_test ()
> > #5  0x00000000004c2a29 in run_builtin ()
> > #6  0x000000000041053f in main ()
> > (gdb)
> >
  
Liang, Kan Nov. 9, 2023, 5:05 p.m. UTC | #7
On 2023-11-09 11:45 a.m., Arnaldo Carvalho de Melo wrote:
> Em Thu, Nov 09, 2023 at 11:14:31AM -0500, Liang, Kan escreveu:
>>
>>
>> On 2023-11-08 4:31 p.m., Arnaldo Carvalho de Melo wrote:
>>> Em Tue, Nov 07, 2023 at 12:11:50PM -0300, Arnaldo Carvalho de Melo escreveu:
>>>> Em Mon, Nov 06, 2023 at 04:19:13PM -0500, Liang, Kan escreveu:
>>>>> On 2023-11-06 4:12 p.m., Arnaldo Carvalho de Melo wrote:
>>>>>> Em Wed, Oct 25, 2023 at 01:16:20PM -0700, kan.liang@linux.intel.com escreveu:
>>>>> Ian has already reviewed the tool parts.
>>>
>>>>> But I still owe a test case for the feature. I will post a patch later.
>>>>> https://lore.kernel.org/lkml/acbb895a-475e-4679-98fc-6b90c05a00af@linux.intel.com/
>>>
>>>> I saw Ian's suggestion, and agree with it, we need to pair new features
>>>> with regression tests in 'perf test', thanks for working on it!
>>>
>>> Kan,
>>>
>>> 	I still have to bisect, but can you check if this works for you?
>>
>> The branch counters feature requires all the events to belong to a
>> group. There is no problem for the normal perf usage which usually
>> initializes an evlist even for a single evsel.
>> But perf test is special, which may not initialize an evlist. The Sample
>> parsing test case is one of the examples. It crashes with the
>> !evsel->evlist.
>>
>> The below change should fix it. I will post a complete patch shortly.
> 
> Thanks for the quick response, if all that is needed are the checks
> below, I'll fold it into your original patch:
> 
> 2ae01908298426e4 perf tools: Add branch counter knob
> 
> So that we don't regress, ok?

Sure.

I also post the patch to
https://lore.kernel.org/lkml/20231109164007.2037721-1-kan.liang@linux.intel.com/
Either folding it or using the new patch is fine for me.

BTW: the new perf test case for the feature is posted here.
I think Ian is reviewing it.
https://lore.kernel.org/lkml/20231107184020.1497571-1-kan.liang@linux.intel.com/

Thanks,
Kan
> 
> I'll add a note and the Link tag points to this discussion in case
> people want to do historical digs in the future :-)
> 
> - Arnaldo
>  
>> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
>> index 58a9b8c82790..7a6a2d1f96db 100644
>> --- a/tools/perf/util/evsel.c
>> +++ b/tools/perf/util/evsel.c
>> @@ -2355,6 +2355,10 @@ static inline bool
>> evsel__has_branch_counters(const struct evsel *evsel)
>>  {
>>         struct evsel *cur, *leader = evsel__leader(evsel);
>>
>> +       /* The branch counters feature only supports group */
>> +       if (!leader || !evsel->evlist)
>> +               return false;
>> +
>>         evlist__for_each_entry(evsel->evlist, cur) {
>>                 if ((leader == evsel__leader(cur)) &&
>>                     (cur->core.attr.branch_sample_type &
>> PERF_SAMPLE_BRANCH_COUNTERS))
>>
>> Thanks,
>> Kan
>>
>>>
>>>
>>> (gdb) run test -F -v 27
>>> Starting program: /root/bin/perf test -F -v 27
>>>
>>>  27: Sample parsing                                                  :
>>> --- start ---
>>>
>>> Program received signal SIGSEGV, Segmentation fault.
>>> 0x00000000004e4aa6 in evsel.parse_sample ()
>>> Missing separate debuginfos, use: dnf debuginfo-install bzip2-libs-1.0.8-13.fc38.x86_64 cyrus-sasl-lib-2.1.28-9.fc38.x86_64 elfutils-debuginfod-client-0.189-3.fc38.x86_64 elfutils-libelf-0.189-3.fc38.x86_64 elfutils-libs-0.189-3.fc38.x86_64 glib2-2.76.5-2.fc38.x86_64 glibc-2.37-13.fc38.x86_64 keyutils-libs-1.6.1-6.fc38.x86_64 krb5-libs-1.21-3.fc38.x86_64 libbabeltrace-1.5.11-2.fc38.x86_64 libbrotli-1.0.9-11.fc38.x86_64 libcap-2.48-6.fc38.x86_64 libcom_err-1.46.5-4.fc38.x86_64 libcurl-8.0.1-5.fc38.x86_64 libevent-2.1.12-8.fc38.x86_64 libgcc-13.2.1-4.fc38.x86_64 libidn2-2.3.4-2.fc38.x86_64 libnghttp2-1.52.0-2.fc38.x86_64 libpfm-4.11.0-11.fc38.x86_64 libpsl-0.21.2-2.fc38.x86_64 libselinux-3.5-1.fc38.x86_64 libssh-0.10.5-1.fc38.x86_64 libstdc++-13.2.1-4.fc38.x86_64 libtraceevent-1.7.2-1.fc38.x86_64 libunistring1.0-1.0-1.fc38.x86_64 libunwind-1.6.2-7.fc38.x86_64 libuuid-2.38.1-4.fc38.x86_64 libxcrypt-4.4.36-1.fc38.x86_64 libzstd-1.5.5-1.fc38.x86_64 opencsd-1.3.3-1.fc38.x86_64 openldap-2.6.6-1.fc38.x86_64 openssl-libs-3.0.9-2.fc38.x86_64 perl-libs-5.36.1-497.fc38.x86_64 popt-1.19-2.fc38.x86_64 python3-libs-3.11.6-1.fc38.x86_64 slang-2.3.3-3.fc38.x86_64 zlib-1.2.13-3.fc38.x86_64
>>> (gdb) bt
>>> #0  0x00000000004e4aa6 in evsel.parse_sample ()
>>> #1  0x00000000004b28dc in do_test ()
>>> #2  0x00000000004b2acd in test.sample_parsing ()
>>> #3  0x0000000000495348 in test_and_print.isra ()
>>> #4  0x0000000000495f5d in cmd_test ()
>>> #5  0x00000000004c2a29 in run_builtin ()
>>> #6  0x000000000041053f in main ()
>>> (gdb)
>>>
>
  
Arnaldo Carvalho de Melo Nov. 9, 2023, 6:46 p.m. UTC | #8
Em Thu, Nov 09, 2023 at 12:05:27PM -0500, Liang, Kan escreveu:
> On 2023-11-09 11:45 a.m., Arnaldo Carvalho de Melo wrote:
> > Em Thu, Nov 09, 2023 at 11:14:31AM -0500, Liang, Kan escreveu:
> >> The below change should fix it. I will post a complete patch shortly.

> > Thanks for the quick response, if all that is needed are the checks
> > below, I'll fold it into your original patch:

> > 2ae01908298426e4 perf tools: Add branch counter knob

> > So that we don't regress, ok?

> Sure.

> I also post the patch to
> https://lore.kernel.org/lkml/20231109164007.2037721-1-kan.liang@linux.intel.com/
> Either folding it or using the new patch is fine for me.

I folded it, retested, pushed out perf-tools-next.
 
> BTW: the new perf test case for the feature is posted here.
> I think Ian is reviewing it.
> https://lore.kernel.org/lkml/20231107184020.1497571-1-kan.liang@linux.intel.com/

Ok, lets wait some more.

Hey, what is SFR/GRR? Sapphire Rapids/Granite Rapids? I thought about
testing this somehow, if possible.

- Arnaldo
  
Liang, Kan Nov. 9, 2023, 7:07 p.m. UTC | #9
On 2023-11-09 1:46 p.m., Arnaldo Carvalho de Melo wrote:
> Em Thu, Nov 09, 2023 at 12:05:27PM -0500, Liang, Kan escreveu:
>> On 2023-11-09 11:45 a.m., Arnaldo Carvalho de Melo wrote:
>>> Em Thu, Nov 09, 2023 at 11:14:31AM -0500, Liang, Kan escreveu:
>>>> The below change should fix it. I will post a complete patch shortly.
> 
>>> Thanks for the quick response, if all that is needed are the checks
>>> below, I'll fold it into your original patch:
> 
>>> 2ae01908298426e4 perf tools: Add branch counter knob
> 
>>> So that we don't regress, ok?
> 
>> Sure.
> 
>> I also post the patch to
>> https://lore.kernel.org/lkml/20231109164007.2037721-1-kan.liang@linux.intel.com/
>> Either folding it or using the new patch is fine for me.
> 
> I folded it, retested, pushed out perf-tools-next.

Thanks!

>  
>> BTW: the new perf test case for the feature is posted here.
>> I think Ian is reviewing it.
>> https://lore.kernel.org/lkml/20231107184020.1497571-1-kan.liang@linux.intel.com/
> 
> Ok, lets wait some more.
> 
> Hey, what is SFR/GRR? Sapphire Rapids/Granite Rapids? I thought about
> testing this somehow, if possible.
> 

Sierra Forest/Grand Ridge. The feature is only available on the E-core
based server for now.

Thanks,
Kan
  

Patch

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 41a164764a84..a99449c0d77c 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2527,9 +2527,14 @@  static void intel_pmu_assign_event(struct perf_event *event, int idx)
 		perf_report_aux_output_id(event, idx);
 }
 
+static __always_inline bool intel_pmu_needs_branch_stack(struct perf_event *event)
+{
+	return event->hw.flags & PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+}
+
 static void intel_pmu_del_event(struct perf_event *event)
 {
-	if (needs_branch_stack(event))
+	if (intel_pmu_needs_branch_stack(event))
 		intel_pmu_lbr_del(event);
 	if (event->attr.precise_ip)
 		intel_pmu_pebs_del(event);
@@ -2820,7 +2825,7 @@  static void intel_pmu_add_event(struct perf_event *event)
 {
 	if (event->attr.precise_ip)
 		intel_pmu_pebs_add(event);
-	if (needs_branch_stack(event))
+	if (intel_pmu_needs_branch_stack(event))
 		intel_pmu_lbr_add(event);
 }
 
@@ -3897,7 +3902,10 @@  static int intel_pmu_hw_config(struct perf_event *event)
 			x86_pmu.pebs_aliases(event);
 	}
 
-	if (needs_branch_stack(event)) {
+	if (needs_branch_stack(event) && is_sampling_event(event))
+		event->hw.flags  |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+
+	if (intel_pmu_needs_branch_stack(event)) {
 		ret = intel_pmu_setup_lbr_filter(event);
 		if (ret)
 			return ret;
diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
index 1dc19b9b4426..a1685981c520 100644
--- a/arch/x86/events/perf_event_flags.h
+++ b/arch/x86/events/perf_event_flags.h
@@ -20,3 +20,4 @@  PERF_ARCH(TOPDOWN,		0x04000) /* Count Topdown slots/metrics events */
 PERF_ARCH(PEBS_STLAT,		0x08000) /* st+stlat data address sampling */
 PERF_ARCH(AMD_BRS,		0x10000) /* AMD Branch Sampling */
 PERF_ARCH(PEBS_LAT_HYBRID,	0x20000) /* ld and st lat for hybrid */
+PERF_ARCH(NEEDS_BRANCH_STACK,	0x40000) /* require branch stack setup */