[v6,11/20] KVM: selftests: Test Intel PMU architectural events on fixed counters

Message ID 20231104000239.367005-12-seanjc@google.com
State New
Headers
Series KVM: x86/pmu: selftests: Fixes and new tests |

Commit Message

Sean Christopherson Nov. 4, 2023, 12:02 a.m. UTC
  From: Jinrong Liang <cloudliang@tencent.com>

Extend the PMU counters test to validate architectural events using fixed
counters.  The core logic is largely the same, the biggest difference
being that if a fixed counter exists, its associated event is available
(the SDM doesn't explicitly state this to be true, but it's KVM's ABI and
letting software program a fixed counter that doesn't actually count would
be quite bizarre).

Note, fixed counters rely on PERF_GLOBAL_CTRL.

Co-developed-by: Like Xu <likexu@tencent.com>
Signed-off-by: Like Xu <likexu@tencent.com>
Signed-off-by: Jinrong Liang <cloudliang@tencent.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 .../selftests/kvm/x86_64/pmu_counters_test.c  | 53 ++++++++++++++++---
 1 file changed, 45 insertions(+), 8 deletions(-)
  

Comments

Jim Mattson Nov. 4, 2023, 1:46 p.m. UTC | #1
On Fri, Nov 3, 2023 at 5:03 PM Sean Christopherson <seanjc@google.com> wrote:
>
> From: Jinrong Liang <cloudliang@tencent.com>
>
> Extend the PMU counters test to validate architectural events using fixed
> counters.  The core logic is largely the same, the biggest difference
> being that if a fixed counter exists, its associated event is available
> (the SDM doesn't explicitly state this to be true, but it's KVM's ABI and
> letting software program a fixed counter that doesn't actually count would
> be quite bizarre).
>
> Note, fixed counters rely on PERF_GLOBAL_CTRL.
>
> Co-developed-by: Like Xu <likexu@tencent.com>
> Signed-off-by: Like Xu <likexu@tencent.com>
> Signed-off-by: Jinrong Liang <cloudliang@tencent.com>
> Co-developed-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>

Reviewed-by: Jim Mattson <jmattson@google.com>

> ---
>  .../selftests/kvm/x86_64/pmu_counters_test.c  | 53 ++++++++++++++++---
>  1 file changed, 45 insertions(+), 8 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> index dd9a7864410c..4d3a5c94b8ba 100644
> --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> @@ -150,25 +150,46 @@ static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature even
>         guest_assert_event_count(idx, event, pmc, pmc_msr);
>  }
>
> +#define X86_PMU_FEATURE_NULL                                           \
> +({                                                                     \
> +       struct kvm_x86_pmu_feature feature = {};                        \
> +                                                                       \
> +       feature;                                                        \
> +})
> +
> +static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
> +{
> +       return !(*(u64 *)&event);
> +}
> +
>  static void guest_test_arch_event(uint8_t idx)
>  {
>         const struct {
>                 struct kvm_x86_pmu_feature gp_event;
> +               struct kvm_x86_pmu_feature fixed_event;
>         } intel_event_to_feature[] = {
> -               [INTEL_ARCH_CPU_CYCLES]            = { X86_PMU_FEATURE_CPU_CYCLES },
> -               [INTEL_ARCH_INSTRUCTIONS_RETIRED]  = { X86_PMU_FEATURE_INSNS_RETIRED },
> -               [INTEL_ARCH_REFERENCE_CYCLES]      = { X86_PMU_FEATURE_REFERENCE_CYCLES },
> -               [INTEL_ARCH_LLC_REFERENCES]        = { X86_PMU_FEATURE_LLC_REFERENCES },
> -               [INTEL_ARCH_LLC_MISSES]            = { X86_PMU_FEATURE_LLC_MISSES },
> -               [INTEL_ARCH_BRANCHES_RETIRED]      = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED },
> -               [INTEL_ARCH_BRANCHES_MISPREDICTED] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED },
> +               [INTEL_ARCH_CPU_CYCLES]            = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
> +               [INTEL_ARCH_INSTRUCTIONS_RETIRED]  = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
> +               /*
> +                * Note, the fixed counter for reference cycles is NOT the same
> +                * as the general purpose architectural event (because the GP
> +                * event is garbage).  The fixed counter explicitly counts at
> +                * the same frequency as the TSC, whereas the GP event counts
> +                * at a fixed, but uarch specific, frequency.  Bundle them here
> +                * for simplicity.
> +                */

Implementation-specific is not necessarily garbage, though it would be
nice if there was a way to query the frequency rather than calibrating
against another clock.
Note that tools/perf/pmu-events/arch/x86/*/pipeline.json does
typically indicate the {0x3c, 1} frequency for the CPU in question.

> +               [INTEL_ARCH_REFERENCE_CYCLES]      = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_CYCLES_FIXED },
> +               [INTEL_ARCH_LLC_REFERENCES]        = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
> +               [INTEL_ARCH_LLC_MISSES]            = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
> +               [INTEL_ARCH_BRANCHES_RETIRED]      = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
> +               [INTEL_ARCH_BRANCHES_MISPREDICTED] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
>         };
>
>         uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
>         uint32_t pmu_version = guest_get_pmu_version();
>         /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
>         bool guest_has_perf_global_ctrl = pmu_version >= 2;
> -       struct kvm_x86_pmu_feature gp_event;
> +       struct kvm_x86_pmu_feature gp_event, fixed_event;
>         uint32_t base_pmc_msr;
>         unsigned int i;
>
> @@ -198,6 +219,22 @@ static void guest_test_arch_event(uint8_t idx)
>                 __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
>                                         MSR_P6_EVNTSEL0 + i, eventsel);
>         }
> +
> +       if (!guest_has_perf_global_ctrl)
> +               return;
> +
> +       fixed_event = intel_event_to_feature[idx].fixed_event;
> +       if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
> +               return;
> +
> +       i = fixed_event.f.bit;
> +
> +       wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, BIT_ULL(4 * i));
> +
> +       __guest_test_arch_event(idx, fixed_event, PMC_FIXED_RDPMC_BASE | i,
> +                               MSR_CORE_PERF_FIXED_CTR0 + i,
> +                               MSR_CORE_PERF_GLOBAL_CTRL,
> +                               BIT_ULL(PMC_IDX_FIXED + i));
>  }
>
>  static void guest_test_arch_events(void)
> --
> 2.42.0.869.gea05f2083d-goog
>
  
Sean Christopherson Nov. 6, 2023, 4:39 p.m. UTC | #2
On Sat, Nov 04, 2023, Jim Mattson wrote:
> On Fri, Nov 3, 2023 at 5:03 PM Sean Christopherson <seanjc@google.com> wrote:
> >  static void guest_test_arch_event(uint8_t idx)
> >  {
> >         const struct {
> >                 struct kvm_x86_pmu_feature gp_event;
> > +               struct kvm_x86_pmu_feature fixed_event;
> >         } intel_event_to_feature[] = {
> > -               [INTEL_ARCH_CPU_CYCLES]            = { X86_PMU_FEATURE_CPU_CYCLES },
> > -               [INTEL_ARCH_INSTRUCTIONS_RETIRED]  = { X86_PMU_FEATURE_INSNS_RETIRED },
> > -               [INTEL_ARCH_REFERENCE_CYCLES]      = { X86_PMU_FEATURE_REFERENCE_CYCLES },
> > -               [INTEL_ARCH_LLC_REFERENCES]        = { X86_PMU_FEATURE_LLC_REFERENCES },
> > -               [INTEL_ARCH_LLC_MISSES]            = { X86_PMU_FEATURE_LLC_MISSES },
> > -               [INTEL_ARCH_BRANCHES_RETIRED]      = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED },
> > -               [INTEL_ARCH_BRANCHES_MISPREDICTED] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED },
> > +               [INTEL_ARCH_CPU_CYCLES]            = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
> > +               [INTEL_ARCH_INSTRUCTIONS_RETIRED]  = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
> > +               /*
> > +                * Note, the fixed counter for reference cycles is NOT the same
> > +                * as the general purpose architectural event (because the GP
> > +                * event is garbage).  The fixed counter explicitly counts at
> > +                * the same frequency as the TSC, whereas the GP event counts
> > +                * at a fixed, but uarch specific, frequency.  Bundle them here
> > +                * for simplicity.
> > +                */
> 
> Implementation-specific is not necessarily garbage, though it would be
> nice if there was a way to query the frequency rather than calibrating
> against another clock.

Heh, I'll drop the editorial commentry, though I still think an architectural event
with implementation-specific behavior is garbage :-)
  

Patch

diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
index dd9a7864410c..4d3a5c94b8ba 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
@@ -150,25 +150,46 @@  static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature even
 	guest_assert_event_count(idx, event, pmc, pmc_msr);
 }
 
+#define X86_PMU_FEATURE_NULL						\
+({									\
+	struct kvm_x86_pmu_feature feature = {};			\
+									\
+	feature;							\
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+	return !(*(u64 *)&event);
+}
+
 static void guest_test_arch_event(uint8_t idx)
 {
 	const struct {
 		struct kvm_x86_pmu_feature gp_event;
+		struct kvm_x86_pmu_feature fixed_event;
 	} intel_event_to_feature[] = {
-		[INTEL_ARCH_CPU_CYCLES]		   = { X86_PMU_FEATURE_CPU_CYCLES },
-		[INTEL_ARCH_INSTRUCTIONS_RETIRED]  = { X86_PMU_FEATURE_INSNS_RETIRED },
-		[INTEL_ARCH_REFERENCE_CYCLES]	   = { X86_PMU_FEATURE_REFERENCE_CYCLES },
-		[INTEL_ARCH_LLC_REFERENCES]	   = { X86_PMU_FEATURE_LLC_REFERENCES },
-		[INTEL_ARCH_LLC_MISSES]		   = { X86_PMU_FEATURE_LLC_MISSES },
-		[INTEL_ARCH_BRANCHES_RETIRED]	   = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED },
-		[INTEL_ARCH_BRANCHES_MISPREDICTED] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED },
+		[INTEL_ARCH_CPU_CYCLES]		   = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+		[INTEL_ARCH_INSTRUCTIONS_RETIRED]  = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+		/*
+		 * Note, the fixed counter for reference cycles is NOT the same
+		 * as the general purpose architectural event (because the GP
+		 * event is garbage).  The fixed counter explicitly counts at
+		 * the same frequency as the TSC, whereas the GP event counts
+		 * at a fixed, but uarch specific, frequency.  Bundle them here
+		 * for simplicity.
+		 */
+		[INTEL_ARCH_REFERENCE_CYCLES]	   = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_CYCLES_FIXED },
+		[INTEL_ARCH_LLC_REFERENCES]	   = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+		[INTEL_ARCH_LLC_MISSES]		   = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+		[INTEL_ARCH_BRANCHES_RETIRED]	   = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+		[INTEL_ARCH_BRANCHES_MISPREDICTED] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
 	};
 
 	uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
 	uint32_t pmu_version = guest_get_pmu_version();
 	/* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
 	bool guest_has_perf_global_ctrl = pmu_version >= 2;
-	struct kvm_x86_pmu_feature gp_event;
+	struct kvm_x86_pmu_feature gp_event, fixed_event;
 	uint32_t base_pmc_msr;
 	unsigned int i;
 
@@ -198,6 +219,22 @@  static void guest_test_arch_event(uint8_t idx)
 		__guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
 					MSR_P6_EVNTSEL0 + i, eventsel);
 	}
+
+	if (!guest_has_perf_global_ctrl)
+		return;
+
+	fixed_event = intel_event_to_feature[idx].fixed_event;
+	if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
+		return;
+
+	i = fixed_event.f.bit;
+
+	wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, BIT_ULL(4 * i));
+
+	__guest_test_arch_event(idx, fixed_event, PMC_FIXED_RDPMC_BASE | i,
+				MSR_CORE_PERF_FIXED_CTR0 + i,
+				MSR_CORE_PERF_GLOBAL_CTRL,
+				BIT_ULL(PMC_IDX_FIXED + i));
 }
 
 static void guest_test_arch_events(void)