[V2,1/8] perf evsel: Fix the annotation for hardware events on hybrid

Message ID 20230615001735.3643996-2-kan.liang@linux.intel.com
State New
Headers
Series New metricgroup output in perf stat default mode |

Commit Message

Liang, Kan June 15, 2023, 12:17 a.m. UTC
  From: Kan Liang <kan.liang@linux.intel.com>

The annotation for hardware events is wrong on hybrid. For example,

 # ./perf stat -a sleep 1

 Performance counter stats for 'system wide':

         32,148.85 msec cpu-clock                        #   32.000 CPUs utilized
               374      context-switches                 #   11.633 /sec
                33      cpu-migrations                   #    1.026 /sec
               295      page-faults                      #    9.176 /sec
        18,979,960      cpu_core/cycles/                 #  590.378 K/sec
       261,230,783      cpu_atom/cycles/                 #    8.126 M/sec                       (54.21%)
        17,019,732      cpu_core/instructions/           #  529.404 K/sec
        38,020,470      cpu_atom/instructions/           #    1.183 M/sec                       (63.36%)
         3,296,743      cpu_core/branches/               #  102.546 K/sec
         6,692,338      cpu_atom/branches/               #  208.167 K/sec                       (63.40%)
            96,421      cpu_core/branch-misses/          #    2.999 K/sec
         1,016,336      cpu_atom/branch-misses/          #   31.613 K/sec                       (63.38%)

The hardware events have extended type on hybrid, but the evsel__match()
doesn't take it into account.

Filter the config on hybrid before checking.

With the patch,

 # ./perf stat -a sleep 1

 Performance counter stats for 'system wide':

         32,139.90 msec cpu-clock                        #   32.003 CPUs utilized
               343      context-switches                 #   10.672 /sec
                32      cpu-migrations                   #    0.996 /sec
                73      page-faults                      #    2.271 /sec
        13,712,841      cpu_core/cycles/                 #    0.000 GHz
       258,301,691      cpu_atom/cycles/                 #    0.008 GHz                         (54.20%)
        12,428,163      cpu_core/instructions/           #    0.91  insn per cycle
        37,786,557      cpu_atom/instructions/           #    2.76  insn per cycle              (63.35%)
         2,418,826      cpu_core/branches/               #   75.259 K/sec
         6,965,962      cpu_atom/branches/               #  216.739 K/sec                       (63.38%)
            72,150      cpu_core/branch-misses/          #    2.98% of all branches
         1,032,746      cpu_atom/branch-misses/          #   42.70% of all branches             (63.35%)

Suggested-by: Ian Rogers <irogers@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 tools/perf/util/evsel.h | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)
  

Comments

Ian Rogers June 15, 2023, 5:49 a.m. UTC | #1
On Wed, Jun 14, 2023 at 5:18 PM <kan.liang@linux.intel.com> wrote:
>
> From: Kan Liang <kan.liang@linux.intel.com>
>
> The annotation for hardware events is wrong on hybrid. For example,
>
>  # ./perf stat -a sleep 1
>
>  Performance counter stats for 'system wide':
>
>          32,148.85 msec cpu-clock                        #   32.000 CPUs utilized
>                374      context-switches                 #   11.633 /sec
>                 33      cpu-migrations                   #    1.026 /sec
>                295      page-faults                      #    9.176 /sec
>         18,979,960      cpu_core/cycles/                 #  590.378 K/sec
>        261,230,783      cpu_atom/cycles/                 #    8.126 M/sec                       (54.21%)
>         17,019,732      cpu_core/instructions/           #  529.404 K/sec
>         38,020,470      cpu_atom/instructions/           #    1.183 M/sec                       (63.36%)
>          3,296,743      cpu_core/branches/               #  102.546 K/sec
>          6,692,338      cpu_atom/branches/               #  208.167 K/sec                       (63.40%)
>             96,421      cpu_core/branch-misses/          #    2.999 K/sec
>          1,016,336      cpu_atom/branch-misses/          #   31.613 K/sec                       (63.38%)
>
> The hardware events have extended type on hybrid, but the evsel__match()
> doesn't take it into account.
>
> Filter the config on hybrid before checking.
>
> With the patch,
>
>  # ./perf stat -a sleep 1
>
>  Performance counter stats for 'system wide':
>
>          32,139.90 msec cpu-clock                        #   32.003 CPUs utilized
>                343      context-switches                 #   10.672 /sec
>                 32      cpu-migrations                   #    0.996 /sec
>                 73      page-faults                      #    2.271 /sec
>         13,712,841      cpu_core/cycles/                 #    0.000 GHz
>        258,301,691      cpu_atom/cycles/                 #    0.008 GHz                         (54.20%)
>         12,428,163      cpu_core/instructions/           #    0.91  insn per cycle
>         37,786,557      cpu_atom/instructions/           #    2.76  insn per cycle              (63.35%)
>          2,418,826      cpu_core/branches/               #   75.259 K/sec
>          6,965,962      cpu_atom/branches/               #  216.739 K/sec                       (63.38%)
>             72,150      cpu_core/branch-misses/          #    2.98% of all branches
>          1,032,746      cpu_atom/branch-misses/          #   42.70% of all branches             (63.35%)
>
> Suggested-by: Ian Rogers <irogers@google.com>
> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>

Reviewed-by: Ian Rogers <irogers@google.com>

Thanks,
Ian

> ---
>  tools/perf/util/evsel.h | 17 ++++++++++++++---
>  1 file changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index b365b449c6ea..cc6fb3049b99 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -10,6 +10,7 @@
>  #include <internal/evsel.h>
>  #include <perf/evsel.h>
>  #include "symbol_conf.h"
> +#include "pmus.h"
>
>  struct bpf_object;
>  struct cgroup;
> @@ -350,9 +351,19 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
>
>  struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
>
> -#define evsel__match(evsel, t, c)              \
> -       (evsel->core.attr.type == PERF_TYPE_##t &&      \
> -        evsel->core.attr.config == PERF_COUNT_##c)
> +static inline bool __evsel__match(const struct evsel *evsel, u32 type, u64 config)
> +{
> +       if (evsel->core.attr.type != type)
> +               return false;
> +
> +       if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE)  &&
> +           perf_pmus__supports_extended_type())
> +               return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config;
> +
> +       return evsel->core.attr.config == config;
> +}
> +
> +#define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t, PERF_COUNT_##c)
>
>  static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
>  {
> --
> 2.35.1
>
  

Patch

diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b365b449c6ea..cc6fb3049b99 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -10,6 +10,7 @@ 
 #include <internal/evsel.h>
 #include <perf/evsel.h>
 #include "symbol_conf.h"
+#include "pmus.h"
 
 struct bpf_object;
 struct cgroup;
@@ -350,9 +351,19 @@  u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
 
 struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
 
-#define evsel__match(evsel, t, c)		\
-	(evsel->core.attr.type == PERF_TYPE_##t &&	\
-	 evsel->core.attr.config == PERF_COUNT_##c)
+static inline bool __evsel__match(const struct evsel *evsel, u32 type, u64 config)
+{
+	if (evsel->core.attr.type != type)
+		return false;
+
+	if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE)  &&
+	    perf_pmus__supports_extended_type())
+		return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config;
+
+	return evsel->core.attr.config == config;
+}
+
+#define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t, PERF_COUNT_##c)
 
 static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
 {