[2/8] perf evsel: Fix the annotation for hardware events on hybrid

Message ID 20230607162700.3234712-3-kan.liang@linux.intel.com
State New
Headers
Series New metricgroup output in perf stat default mode |

Commit Message

Liang, Kan June 7, 2023, 4:26 p.m. UTC
  From: Kan Liang <kan.liang@linux.intel.com>

The annotation for hardware events is wrong on hybrid. For example,

 # ./perf stat -a sleep 1

 Performance counter stats for 'system wide':

         32,148.85 msec cpu-clock                        #   32.000 CPUs utilized
               374      context-switches                 #   11.633 /sec
                33      cpu-migrations                   #    1.026 /sec
               295      page-faults                      #    9.176 /sec
        18,979,960      cpu_core/cycles/                 #  590.378 K/sec
       261,230,783      cpu_atom/cycles/                 #    8.126 M/sec                       (54.21%)
        17,019,732      cpu_core/instructions/           #  529.404 K/sec
        38,020,470      cpu_atom/instructions/           #    1.183 M/sec                       (63.36%)
         3,296,743      cpu_core/branches/               #  102.546 K/sec
         6,692,338      cpu_atom/branches/               #  208.167 K/sec                       (63.40%)
            96,421      cpu_core/branch-misses/          #    2.999 K/sec
         1,016,336      cpu_atom/branch-misses/          #   31.613 K/sec                       (63.38%)

The hardware events have extended type on hybrid, but the evsel__match()
doesn't take it into account.

Add a mask to filter the extended type on hybrid when checking the config.

With the patch,

 # ./perf stat -a sleep 1

 Performance counter stats for 'system wide':

         32,139.90 msec cpu-clock                        #   32.003 CPUs utilized
               343      context-switches                 #   10.672 /sec
                32      cpu-migrations                   #    0.996 /sec
                73      page-faults                      #    2.271 /sec
        13,712,841      cpu_core/cycles/                 #    0.000 GHz
       258,301,691      cpu_atom/cycles/                 #    0.008 GHz                         (54.20%)
        12,428,163      cpu_core/instructions/           #    0.91  insn per cycle
        37,786,557      cpu_atom/instructions/           #    2.76  insn per cycle              (63.35%)
         2,418,826      cpu_core/branches/               #   75.259 K/sec
         6,965,962      cpu_atom/branches/               #  216.739 K/sec                       (63.38%)
            72,150      cpu_core/branch-misses/          #    2.98% of all branches
         1,032,746      cpu_atom/branch-misses/          #   42.70% of all branches             (63.35%)

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 tools/perf/util/evsel.h       | 12 ++++++-----
 tools/perf/util/stat-shadow.c | 39 +++++++++++++++++++----------------
 2 files changed, 28 insertions(+), 23 deletions(-)
  

Comments

Ian Rogers June 13, 2023, 7:35 p.m. UTC | #1
On Wed, Jun 7, 2023 at 9:27 AM <kan.liang@linux.intel.com> wrote:
>
> From: Kan Liang <kan.liang@linux.intel.com>
>
> The annotation for hardware events is wrong on hybrid. For example,
>
>  # ./perf stat -a sleep 1
>
>  Performance counter stats for 'system wide':
>
>          32,148.85 msec cpu-clock                        #   32.000 CPUs utilized
>                374      context-switches                 #   11.633 /sec
>                 33      cpu-migrations                   #    1.026 /sec
>                295      page-faults                      #    9.176 /sec
>         18,979,960      cpu_core/cycles/                 #  590.378 K/sec
>        261,230,783      cpu_atom/cycles/                 #    8.126 M/sec                       (54.21%)
>         17,019,732      cpu_core/instructions/           #  529.404 K/sec
>         38,020,470      cpu_atom/instructions/           #    1.183 M/sec                       (63.36%)
>          3,296,743      cpu_core/branches/               #  102.546 K/sec
>          6,692,338      cpu_atom/branches/               #  208.167 K/sec                       (63.40%)
>             96,421      cpu_core/branch-misses/          #    2.999 K/sec
>          1,016,336      cpu_atom/branch-misses/          #   31.613 K/sec                       (63.38%)
>
> The hardware events have extended type on hybrid, but the evsel__match()
> doesn't take it into account.
>
> Add a mask to filter the extended type on hybrid when checking the config.
>
> With the patch,
>
>  # ./perf stat -a sleep 1
>
>  Performance counter stats for 'system wide':
>
>          32,139.90 msec cpu-clock                        #   32.003 CPUs utilized
>                343      context-switches                 #   10.672 /sec
>                 32      cpu-migrations                   #    0.996 /sec
>                 73      page-faults                      #    2.271 /sec
>         13,712,841      cpu_core/cycles/                 #    0.000 GHz
>        258,301,691      cpu_atom/cycles/                 #    0.008 GHz                         (54.20%)
>         12,428,163      cpu_core/instructions/           #    0.91  insn per cycle
>         37,786,557      cpu_atom/instructions/           #    2.76  insn per cycle              (63.35%)
>          2,418,826      cpu_core/branches/               #   75.259 K/sec
>          6,965,962      cpu_atom/branches/               #  216.739 K/sec                       (63.38%)
>             72,150      cpu_core/branch-misses/          #    2.98% of all branches
>          1,032,746      cpu_atom/branch-misses/          #   42.70% of all branches             (63.35%)
>
> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> ---
>  tools/perf/util/evsel.h       | 12 ++++++-----
>  tools/perf/util/stat-shadow.c | 39 +++++++++++++++++++----------------
>  2 files changed, 28 insertions(+), 23 deletions(-)
>
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index b365b449c6ea..36a32e4ca168 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -350,9 +350,11 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
>
>  struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
>
> -#define evsel__match(evsel, t, c)              \
> +#define EVSEL_EVENT_MASK                       (~0ULL)
> +
> +#define evsel__match(evsel, t, c, m)                   \
>         (evsel->core.attr.type == PERF_TYPE_##t &&      \
> -        evsel->core.attr.config == PERF_COUNT_##c)
> +        (evsel->core.attr.config & m) == PERF_COUNT_##c)

The EVSEL_EVENT_MASK here isn't very intention revealing, perhaps we
can remove it and do something like:

static inline bool __evsel__match(const struct evsel *evsel, u32 type,
u64 config)
{
  if ((type == PERF_TYPE_HARDWARE || type ==PERF_TYPE_HW_CACHE)  &&
perf_pmus__supports_extended_type())
     return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config;

  return evsel->core.attr.config == config;
}
#define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t,
PERF_COUNT_##c)

Thanks,
Ian

>
>  static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
>  {
> @@ -438,13 +440,13 @@ bool evsel__is_function_event(struct evsel *evsel);
>
>  static inline bool evsel__is_bpf_output(struct evsel *evsel)
>  {
> -       return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT);
> +       return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT, EVSEL_EVENT_MASK);
>  }
>
>  static inline bool evsel__is_clock(const struct evsel *evsel)
>  {
> -       return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
> -              evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
> +       return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK, EVSEL_EVENT_MASK) ||
> +              evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK, EVSEL_EVENT_MASK);
>  }
>
>  bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize);
> diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
> index 1566a206ba42..074f38b57e2d 100644
> --- a/tools/perf/util/stat-shadow.c
> +++ b/tools/perf/util/stat-shadow.c
> @@ -6,6 +6,7 @@
>  #include "color.h"
>  #include "debug.h"
>  #include "pmu.h"
> +#include "pmus.h"
>  #include "rblist.h"
>  #include "evlist.h"
>  #include "expr.h"
> @@ -78,6 +79,8 @@ void perf_stat__reset_shadow_stats(void)
>
>  static enum stat_type evsel__stat_type(const struct evsel *evsel)
>  {
> +       u64 mask = perf_pmus__supports_extended_type() ? PERF_HW_EVENT_MASK : EVSEL_EVENT_MASK;
> +
>         /* Fake perf_hw_cache_op_id values for use with evsel__match. */
>         u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D |
>                 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
> @@ -97,41 +100,41 @@ static enum stat_type evsel__stat_type(const struct evsel *evsel)
>
>         if (evsel__is_clock(evsel))
>                 return STAT_NSECS;
> -       else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES))
> +       else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES, mask))
>                 return STAT_CYCLES;
> -       else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS))
> +       else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS, mask))
>                 return STAT_INSTRUCTIONS;
> -       else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
> +       else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND, mask))
>                 return STAT_STALLED_CYCLES_FRONT;
> -       else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND))
> +       else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND, mask))
>                 return STAT_STALLED_CYCLES_BACK;
> -       else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS))
> +       else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS, mask))
>                 return STAT_BRANCHES;
> -       else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES))
> +       else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES, mask))
>                 return STAT_BRANCH_MISS;
> -       else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES))
> +       else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES, mask))
>                 return STAT_CACHE_REFS;
> -       else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES))
> +       else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES, mask))
>                 return STAT_CACHE_MISSES;
> -       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D))
> +       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D, mask))
>                 return STAT_L1_DCACHE;
> -       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I))
> +       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I, mask))
>                 return STAT_L1_ICACHE;
> -       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL))
> +       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL, mask))
>                 return STAT_LL_CACHE;
> -       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB))
> +       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB, mask))
>                 return STAT_DTLB_CACHE;
> -       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB))
> +       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB, mask))
>                 return STAT_ITLB_CACHE;
> -       else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss))
> +       else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss, mask))
>                 return STAT_L1D_MISS;
> -       else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss))
> +       else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss, mask))
>                 return STAT_L1I_MISS;
> -       else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss))
> +       else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss, mask))
>                 return STAT_LL_MISS;
> -       else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss))
> +       else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss, mask))
>                 return STAT_DTLB_MISS;
> -       else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss))
> +       else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss, mask))
>                 return STAT_ITLB_MISS;
>         return STAT_NONE;
>  }
> --
> 2.35.1
>
  
Liang, Kan June 13, 2023, 8:06 p.m. UTC | #2
On 2023-06-13 3:35 p.m., Ian Rogers wrote:
> On Wed, Jun 7, 2023 at 9:27 AM <kan.liang@linux.intel.com> wrote:
>>
>> From: Kan Liang <kan.liang@linux.intel.com>
>>
>> The annotation for hardware events is wrong on hybrid. For example,
>>
>>  # ./perf stat -a sleep 1
>>
>>  Performance counter stats for 'system wide':
>>
>>          32,148.85 msec cpu-clock                        #   32.000 CPUs utilized
>>                374      context-switches                 #   11.633 /sec
>>                 33      cpu-migrations                   #    1.026 /sec
>>                295      page-faults                      #    9.176 /sec
>>         18,979,960      cpu_core/cycles/                 #  590.378 K/sec
>>        261,230,783      cpu_atom/cycles/                 #    8.126 M/sec                       (54.21%)
>>         17,019,732      cpu_core/instructions/           #  529.404 K/sec
>>         38,020,470      cpu_atom/instructions/           #    1.183 M/sec                       (63.36%)
>>          3,296,743      cpu_core/branches/               #  102.546 K/sec
>>          6,692,338      cpu_atom/branches/               #  208.167 K/sec                       (63.40%)
>>             96,421      cpu_core/branch-misses/          #    2.999 K/sec
>>          1,016,336      cpu_atom/branch-misses/          #   31.613 K/sec                       (63.38%)
>>
>> The hardware events have extended type on hybrid, but the evsel__match()
>> doesn't take it into account.
>>
>> Add a mask to filter the extended type on hybrid when checking the config.
>>
>> With the patch,
>>
>>  # ./perf stat -a sleep 1
>>
>>  Performance counter stats for 'system wide':
>>
>>          32,139.90 msec cpu-clock                        #   32.003 CPUs utilized
>>                343      context-switches                 #   10.672 /sec
>>                 32      cpu-migrations                   #    0.996 /sec
>>                 73      page-faults                      #    2.271 /sec
>>         13,712,841      cpu_core/cycles/                 #    0.000 GHz
>>        258,301,691      cpu_atom/cycles/                 #    0.008 GHz                         (54.20%)
>>         12,428,163      cpu_core/instructions/           #    0.91  insn per cycle
>>         37,786,557      cpu_atom/instructions/           #    2.76  insn per cycle              (63.35%)
>>          2,418,826      cpu_core/branches/               #   75.259 K/sec
>>          6,965,962      cpu_atom/branches/               #  216.739 K/sec                       (63.38%)
>>             72,150      cpu_core/branch-misses/          #    2.98% of all branches
>>          1,032,746      cpu_atom/branch-misses/          #   42.70% of all branches             (63.35%)
>>
>> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
>> ---
>>  tools/perf/util/evsel.h       | 12 ++++++-----
>>  tools/perf/util/stat-shadow.c | 39 +++++++++++++++++++----------------
>>  2 files changed, 28 insertions(+), 23 deletions(-)
>>
>> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
>> index b365b449c6ea..36a32e4ca168 100644
>> --- a/tools/perf/util/evsel.h
>> +++ b/tools/perf/util/evsel.h
>> @@ -350,9 +350,11 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
>>
>>  struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
>>
>> -#define evsel__match(evsel, t, c)              \
>> +#define EVSEL_EVENT_MASK                       (~0ULL)
>> +
>> +#define evsel__match(evsel, t, c, m)                   \
>>         (evsel->core.attr.type == PERF_TYPE_##t &&      \
>> -        evsel->core.attr.config == PERF_COUNT_##c)
>> +        (evsel->core.attr.config & m) == PERF_COUNT_##c)
> 
> The EVSEL_EVENT_MASK here isn't very intention revealing, perhaps we
> can remove it and do something like:
> 
> static inline bool __evsel__match(const struct evsel *evsel, u32 type,
> u64 config)
> {
>   if ((type == PERF_TYPE_HARDWARE || type ==PERF_TYPE_HW_CACHE)  &&
> perf_pmus__supports_extended_type())
>      return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config;
> 
>   return evsel->core.attr.config == config;
> }
> #define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t,
> PERF_COUNT_##c)

Yes, the above code looks better. I will apply it in V2.

Thanks,
Kan
> 
> Thanks,
> Ian
> 
>>
>>  static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
>>  {
>> @@ -438,13 +440,13 @@ bool evsel__is_function_event(struct evsel *evsel);
>>
>>  static inline bool evsel__is_bpf_output(struct evsel *evsel)
>>  {
>> -       return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT);
>> +       return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT, EVSEL_EVENT_MASK);
>>  }
>>
>>  static inline bool evsel__is_clock(const struct evsel *evsel)
>>  {
>> -       return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
>> -              evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
>> +       return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK, EVSEL_EVENT_MASK) ||
>> +              evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK, EVSEL_EVENT_MASK);
>>  }
>>
>>  bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize);
>> diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
>> index 1566a206ba42..074f38b57e2d 100644
>> --- a/tools/perf/util/stat-shadow.c
>> +++ b/tools/perf/util/stat-shadow.c
>> @@ -6,6 +6,7 @@
>>  #include "color.h"
>>  #include "debug.h"
>>  #include "pmu.h"
>> +#include "pmus.h"
>>  #include "rblist.h"
>>  #include "evlist.h"
>>  #include "expr.h"
>> @@ -78,6 +79,8 @@ void perf_stat__reset_shadow_stats(void)
>>
>>  static enum stat_type evsel__stat_type(const struct evsel *evsel)
>>  {
>> +       u64 mask = perf_pmus__supports_extended_type() ? PERF_HW_EVENT_MASK : EVSEL_EVENT_MASK;
>> +
>>         /* Fake perf_hw_cache_op_id values for use with evsel__match. */
>>         u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D |
>>                 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
>> @@ -97,41 +100,41 @@ static enum stat_type evsel__stat_type(const struct evsel *evsel)
>>
>>         if (evsel__is_clock(evsel))
>>                 return STAT_NSECS;
>> -       else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES))
>> +       else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES, mask))
>>                 return STAT_CYCLES;
>> -       else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS))
>> +       else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS, mask))
>>                 return STAT_INSTRUCTIONS;
>> -       else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
>> +       else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND, mask))
>>                 return STAT_STALLED_CYCLES_FRONT;
>> -       else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND))
>> +       else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND, mask))
>>                 return STAT_STALLED_CYCLES_BACK;
>> -       else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS))
>> +       else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS, mask))
>>                 return STAT_BRANCHES;
>> -       else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES))
>> +       else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES, mask))
>>                 return STAT_BRANCH_MISS;
>> -       else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES))
>> +       else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES, mask))
>>                 return STAT_CACHE_REFS;
>> -       else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES))
>> +       else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES, mask))
>>                 return STAT_CACHE_MISSES;
>> -       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D))
>> +       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D, mask))
>>                 return STAT_L1_DCACHE;
>> -       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I))
>> +       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I, mask))
>>                 return STAT_L1_ICACHE;
>> -       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL))
>> +       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL, mask))
>>                 return STAT_LL_CACHE;
>> -       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB))
>> +       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB, mask))
>>                 return STAT_DTLB_CACHE;
>> -       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB))
>> +       else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB, mask))
>>                 return STAT_ITLB_CACHE;
>> -       else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss))
>> +       else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss, mask))
>>                 return STAT_L1D_MISS;
>> -       else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss))
>> +       else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss, mask))
>>                 return STAT_L1I_MISS;
>> -       else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss))
>> +       else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss, mask))
>>                 return STAT_LL_MISS;
>> -       else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss))
>> +       else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss, mask))
>>                 return STAT_DTLB_MISS;
>> -       else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss))
>> +       else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss, mask))
>>                 return STAT_ITLB_MISS;
>>         return STAT_NONE;
>>  }
>> --
>> 2.35.1
>>
  
Arnaldo Carvalho de Melo June 13, 2023, 9:18 p.m. UTC | #3
Em Tue, Jun 13, 2023 at 04:06:59PM -0400, Liang, Kan escreveu:
> 
> 
> On 2023-06-13 3:35 p.m., Ian Rogers wrote:
> > On Wed, Jun 7, 2023 at 9:27 AM <kan.liang@linux.intel.com> wrote:
> >>
> >> From: Kan Liang <kan.liang@linux.intel.com>
> >>
> >> The annotation for hardware events is wrong on hybrid. For example,
> >>
> >>  # ./perf stat -a sleep 1
> >>
> >>  Performance counter stats for 'system wide':
> >>
> >>          32,148.85 msec cpu-clock                        #   32.000 CPUs utilized
> >>                374      context-switches                 #   11.633 /sec
> >>                 33      cpu-migrations                   #    1.026 /sec
> >>                295      page-faults                      #    9.176 /sec
> >>         18,979,960      cpu_core/cycles/                 #  590.378 K/sec
> >>        261,230,783      cpu_atom/cycles/                 #    8.126 M/sec                       (54.21%)
> >>         17,019,732      cpu_core/instructions/           #  529.404 K/sec
> >>         38,020,470      cpu_atom/instructions/           #    1.183 M/sec                       (63.36%)
> >>          3,296,743      cpu_core/branches/               #  102.546 K/sec
> >>          6,692,338      cpu_atom/branches/               #  208.167 K/sec                       (63.40%)
> >>             96,421      cpu_core/branch-misses/          #    2.999 K/sec
> >>          1,016,336      cpu_atom/branch-misses/          #   31.613 K/sec                       (63.38%)
> >>
> >> The hardware events have extended type on hybrid, but the evsel__match()
> >> doesn't take it into account.
> >>
> >> Add a mask to filter the extended type on hybrid when checking the config.
> >>
> >> With the patch,
> >>
> >>  # ./perf stat -a sleep 1
> >>
> >>  Performance counter stats for 'system wide':
> >>
> >>          32,139.90 msec cpu-clock                        #   32.003 CPUs utilized
> >>                343      context-switches                 #   10.672 /sec
> >>                 32      cpu-migrations                   #    0.996 /sec
> >>                 73      page-faults                      #    2.271 /sec
> >>         13,712,841      cpu_core/cycles/                 #    0.000 GHz
> >>        258,301,691      cpu_atom/cycles/                 #    0.008 GHz                         (54.20%)
> >>         12,428,163      cpu_core/instructions/           #    0.91  insn per cycle
> >>         37,786,557      cpu_atom/instructions/           #    2.76  insn per cycle              (63.35%)
> >>          2,418,826      cpu_core/branches/               #   75.259 K/sec
> >>          6,965,962      cpu_atom/branches/               #  216.739 K/sec                       (63.38%)
> >>             72,150      cpu_core/branch-misses/          #    2.98% of all branches
> >>          1,032,746      cpu_atom/branch-misses/          #   42.70% of all branches             (63.35%)
> >>
> >> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> >> ---
> >>  tools/perf/util/evsel.h       | 12 ++++++-----
> >>  tools/perf/util/stat-shadow.c | 39 +++++++++++++++++++----------------
> >>  2 files changed, 28 insertions(+), 23 deletions(-)
> >>
> >> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> >> index b365b449c6ea..36a32e4ca168 100644
> >> --- a/tools/perf/util/evsel.h
> >> +++ b/tools/perf/util/evsel.h
> >> @@ -350,9 +350,11 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
> >>
> >>  struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
> >>
> >> -#define evsel__match(evsel, t, c)              \
> >> +#define EVSEL_EVENT_MASK                       (~0ULL)
> >> +
> >> +#define evsel__match(evsel, t, c, m)                   \
> >>         (evsel->core.attr.type == PERF_TYPE_##t &&      \
> >> -        evsel->core.attr.config == PERF_COUNT_##c)
> >> +        (evsel->core.attr.config & m) == PERF_COUNT_##c)
> > 
> > The EVSEL_EVENT_MASK here isn't very intention revealing, perhaps we
> > can remove it and do something like:
> > 
> > static inline bool __evsel__match(const struct evsel *evsel, u32 type,
> > u64 config)
> > {
> >   if ((type == PERF_TYPE_HARDWARE || type ==PERF_TYPE_HW_CACHE)  &&
> > perf_pmus__supports_extended_type())
> >      return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config;
> > 
> >   return evsel->core.attr.config == config;
> > }
> > #define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t,
> > PERF_COUNT_##c)
> 
> Yes, the above code looks better. I will apply it in V2.

Please base v2 on tmp.perf-tools-next, tests are running and that branch
will become perf-tools-next.

Some patches from your series were cherry-picked there.

- Arnaldo
  
Liang, Kan June 13, 2023, 11:57 p.m. UTC | #4
On 2023-06-13 5:18 p.m., Arnaldo Carvalho de Melo wrote:
> Em Tue, Jun 13, 2023 at 04:06:59PM -0400, Liang, Kan escreveu:
>>
>>
>> On 2023-06-13 3:35 p.m., Ian Rogers wrote:
>>> On Wed, Jun 7, 2023 at 9:27 AM <kan.liang@linux.intel.com> wrote:
>>>>
>>>> From: Kan Liang <kan.liang@linux.intel.com>
>>>>
>>>> The annotation for hardware events is wrong on hybrid. For example,
>>>>
>>>>  # ./perf stat -a sleep 1
>>>>
>>>>  Performance counter stats for 'system wide':
>>>>
>>>>          32,148.85 msec cpu-clock                        #   32.000 CPUs utilized
>>>>                374      context-switches                 #   11.633 /sec
>>>>                 33      cpu-migrations                   #    1.026 /sec
>>>>                295      page-faults                      #    9.176 /sec
>>>>         18,979,960      cpu_core/cycles/                 #  590.378 K/sec
>>>>        261,230,783      cpu_atom/cycles/                 #    8.126 M/sec                       (54.21%)
>>>>         17,019,732      cpu_core/instructions/           #  529.404 K/sec
>>>>         38,020,470      cpu_atom/instructions/           #    1.183 M/sec                       (63.36%)
>>>>          3,296,743      cpu_core/branches/               #  102.546 K/sec
>>>>          6,692,338      cpu_atom/branches/               #  208.167 K/sec                       (63.40%)
>>>>             96,421      cpu_core/branch-misses/          #    2.999 K/sec
>>>>          1,016,336      cpu_atom/branch-misses/          #   31.613 K/sec                       (63.38%)
>>>>
>>>> The hardware events have extended type on hybrid, but the evsel__match()
>>>> doesn't take it into account.
>>>>
>>>> Add a mask to filter the extended type on hybrid when checking the config.
>>>>
>>>> With the patch,
>>>>
>>>>  # ./perf stat -a sleep 1
>>>>
>>>>  Performance counter stats for 'system wide':
>>>>
>>>>          32,139.90 msec cpu-clock                        #   32.003 CPUs utilized
>>>>                343      context-switches                 #   10.672 /sec
>>>>                 32      cpu-migrations                   #    0.996 /sec
>>>>                 73      page-faults                      #    2.271 /sec
>>>>         13,712,841      cpu_core/cycles/                 #    0.000 GHz
>>>>        258,301,691      cpu_atom/cycles/                 #    0.008 GHz                         (54.20%)
>>>>         12,428,163      cpu_core/instructions/           #    0.91  insn per cycle
>>>>         37,786,557      cpu_atom/instructions/           #    2.76  insn per cycle              (63.35%)
>>>>          2,418,826      cpu_core/branches/               #   75.259 K/sec
>>>>          6,965,962      cpu_atom/branches/               #  216.739 K/sec                       (63.38%)
>>>>             72,150      cpu_core/branch-misses/          #    2.98% of all branches
>>>>          1,032,746      cpu_atom/branch-misses/          #   42.70% of all branches             (63.35%)
>>>>
>>>> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
>>>> ---
>>>>  tools/perf/util/evsel.h       | 12 ++++++-----
>>>>  tools/perf/util/stat-shadow.c | 39 +++++++++++++++++++----------------
>>>>  2 files changed, 28 insertions(+), 23 deletions(-)
>>>>
>>>> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
>>>> index b365b449c6ea..36a32e4ca168 100644
>>>> --- a/tools/perf/util/evsel.h
>>>> +++ b/tools/perf/util/evsel.h
>>>> @@ -350,9 +350,11 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
>>>>
>>>>  struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
>>>>
>>>> -#define evsel__match(evsel, t, c)              \
>>>> +#define EVSEL_EVENT_MASK                       (~0ULL)
>>>> +
>>>> +#define evsel__match(evsel, t, c, m)                   \
>>>>         (evsel->core.attr.type == PERF_TYPE_##t &&      \
>>>> -        evsel->core.attr.config == PERF_COUNT_##c)
>>>> +        (evsel->core.attr.config & m) == PERF_COUNT_##c)
>>>
>>> The EVSEL_EVENT_MASK here isn't very intention revealing, perhaps we
>>> can remove it and do something like:
>>>
>>> static inline bool __evsel__match(const struct evsel *evsel, u32 type,
>>> u64 config)
>>> {
>>>   if ((type == PERF_TYPE_HARDWARE || type ==PERF_TYPE_HW_CACHE)  &&
>>> perf_pmus__supports_extended_type())
>>>      return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config;
>>>
>>>   return evsel->core.attr.config == config;
>>> }
>>> #define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t,
>>> PERF_COUNT_##c)
>>
>> Yes, the above code looks better. I will apply it in V2.
> 
> Please base v2 on tmp.perf-tools-next, tests are running and that branch
> will become perf-tools-next.
> 

Sure.

> Some patches from your series were cherry-picked there.

Thanks.

Kan
  

Patch

diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b365b449c6ea..36a32e4ca168 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -350,9 +350,11 @@  u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
 
 struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
 
-#define evsel__match(evsel, t, c)		\
+#define EVSEL_EVENT_MASK			(~0ULL)
+
+#define evsel__match(evsel, t, c, m)			\
 	(evsel->core.attr.type == PERF_TYPE_##t &&	\
-	 evsel->core.attr.config == PERF_COUNT_##c)
+	 (evsel->core.attr.config & m) == PERF_COUNT_##c)
 
 static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
 {
@@ -438,13 +440,13 @@  bool evsel__is_function_event(struct evsel *evsel);
 
 static inline bool evsel__is_bpf_output(struct evsel *evsel)
 {
-	return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT);
+	return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT, EVSEL_EVENT_MASK);
 }
 
 static inline bool evsel__is_clock(const struct evsel *evsel)
 {
-	return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
-	       evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
+	return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK, EVSEL_EVENT_MASK) ||
+	       evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK, EVSEL_EVENT_MASK);
 }
 
 bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize);
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 1566a206ba42..074f38b57e2d 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -6,6 +6,7 @@ 
 #include "color.h"
 #include "debug.h"
 #include "pmu.h"
+#include "pmus.h"
 #include "rblist.h"
 #include "evlist.h"
 #include "expr.h"
@@ -78,6 +79,8 @@  void perf_stat__reset_shadow_stats(void)
 
 static enum stat_type evsel__stat_type(const struct evsel *evsel)
 {
+	u64 mask = perf_pmus__supports_extended_type() ? PERF_HW_EVENT_MASK : EVSEL_EVENT_MASK;
+
 	/* Fake perf_hw_cache_op_id values for use with evsel__match. */
 	u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D |
 		((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
@@ -97,41 +100,41 @@  static enum stat_type evsel__stat_type(const struct evsel *evsel)
 
 	if (evsel__is_clock(evsel))
 		return STAT_NSECS;
-	else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES))
+	else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES, mask))
 		return STAT_CYCLES;
-	else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS))
+	else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS, mask))
 		return STAT_INSTRUCTIONS;
-	else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
+	else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND, mask))
 		return STAT_STALLED_CYCLES_FRONT;
-	else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND))
+	else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND, mask))
 		return STAT_STALLED_CYCLES_BACK;
-	else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+	else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS, mask))
 		return STAT_BRANCHES;
-	else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES))
+	else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES, mask))
 		return STAT_BRANCH_MISS;
-	else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES))
+	else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES, mask))
 		return STAT_CACHE_REFS;
-	else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES))
+	else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES, mask))
 		return STAT_CACHE_MISSES;
-	else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D))
+	else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D, mask))
 		return STAT_L1_DCACHE;
-	else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I))
+	else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I, mask))
 		return STAT_L1_ICACHE;
-	else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL))
+	else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL, mask))
 		return STAT_LL_CACHE;
-	else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB))
+	else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB, mask))
 		return STAT_DTLB_CACHE;
-	else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB))
+	else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB, mask))
 		return STAT_ITLB_CACHE;
-	else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss))
+	else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss, mask))
 		return STAT_L1D_MISS;
-	else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss))
+	else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss, mask))
 		return STAT_L1I_MISS;
-	else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss))
+	else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss, mask))
 		return STAT_LL_MISS;
-	else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss))
+	else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss, mask))
 		return STAT_DTLB_MISS;
-	else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss))
+	else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss, mask))
 		return STAT_ITLB_MISS;
 	return STAT_NONE;
 }