[v6,11/20] KVM: selftests: Test Intel PMU architectural events on fixed counters
Commit Message
From: Jinrong Liang <cloudliang@tencent.com>
Extend the PMU counters test to validate architectural events using fixed
counters. The core logic is largely the same, the biggest difference
being that if a fixed counter exists, its associated event is available
(the SDM doesn't explicitly state this to be true, but it's KVM's ABI and
letting software program a fixed counter that doesn't actually count would
be quite bizarre).
Note, fixed counters rely on PERF_GLOBAL_CTRL.
Co-developed-by: Like Xu <likexu@tencent.com>
Signed-off-by: Like Xu <likexu@tencent.com>
Signed-off-by: Jinrong Liang <cloudliang@tencent.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
.../selftests/kvm/x86_64/pmu_counters_test.c | 53 ++++++++++++++++---
1 file changed, 45 insertions(+), 8 deletions(-)
Comments
On Fri, Nov 3, 2023 at 5:03 PM Sean Christopherson <seanjc@google.com> wrote:
>
> From: Jinrong Liang <cloudliang@tencent.com>
>
> Extend the PMU counters test to validate architectural events using fixed
> counters. The core logic is largely the same, the biggest difference
> being that if a fixed counter exists, its associated event is available
> (the SDM doesn't explicitly state this to be true, but it's KVM's ABI and
> letting software program a fixed counter that doesn't actually count would
> be quite bizarre).
>
> Note, fixed counters rely on PERF_GLOBAL_CTRL.
>
> Co-developed-by: Like Xu <likexu@tencent.com>
> Signed-off-by: Like Xu <likexu@tencent.com>
> Signed-off-by: Jinrong Liang <cloudliang@tencent.com>
> Co-developed-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
> ---
> .../selftests/kvm/x86_64/pmu_counters_test.c | 53 ++++++++++++++++---
> 1 file changed, 45 insertions(+), 8 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> index dd9a7864410c..4d3a5c94b8ba 100644
> --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> @@ -150,25 +150,46 @@ static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature even
> guest_assert_event_count(idx, event, pmc, pmc_msr);
> }
>
> +#define X86_PMU_FEATURE_NULL \
> +({ \
> + struct kvm_x86_pmu_feature feature = {}; \
> + \
> + feature; \
> +})
> +
> +static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
> +{
> + return !(*(u64 *)&event);
> +}
> +
> static void guest_test_arch_event(uint8_t idx)
> {
> const struct {
> struct kvm_x86_pmu_feature gp_event;
> + struct kvm_x86_pmu_feature fixed_event;
> } intel_event_to_feature[] = {
> - [INTEL_ARCH_CPU_CYCLES] = { X86_PMU_FEATURE_CPU_CYCLES },
> - [INTEL_ARCH_INSTRUCTIONS_RETIRED] = { X86_PMU_FEATURE_INSNS_RETIRED },
> - [INTEL_ARCH_REFERENCE_CYCLES] = { X86_PMU_FEATURE_REFERENCE_CYCLES },
> - [INTEL_ARCH_LLC_REFERENCES] = { X86_PMU_FEATURE_LLC_REFERENCES },
> - [INTEL_ARCH_LLC_MISSES] = { X86_PMU_FEATURE_LLC_MISSES },
> - [INTEL_ARCH_BRANCHES_RETIRED] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED },
> - [INTEL_ARCH_BRANCHES_MISPREDICTED] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED },
> + [INTEL_ARCH_CPU_CYCLES] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
> + [INTEL_ARCH_INSTRUCTIONS_RETIRED] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
> + /*
> + * Note, the fixed counter for reference cycles is NOT the same
> + * as the general purpose architectural event (because the GP
> + * event is garbage). The fixed counter explicitly counts at
> + * the same frequency as the TSC, whereas the GP event counts
> + * at a fixed, but uarch specific, frequency. Bundle them here
> + * for simplicity.
> + */
Implementation-specific is not necessarily garbage, though it would be
nice if there was a way to query the frequency rather than calibrating
against another clock.
Note that tools/perf/pmu-events/arch/x86/*/pipeline.json does
typically indicate the {0x3c, 1} frequency for the CPU in question.
> + [INTEL_ARCH_REFERENCE_CYCLES] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_CYCLES_FIXED },
> + [INTEL_ARCH_LLC_REFERENCES] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
> + [INTEL_ARCH_LLC_MISSES] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
> + [INTEL_ARCH_BRANCHES_RETIRED] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
> + [INTEL_ARCH_BRANCHES_MISPREDICTED] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
> };
>
> uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
> uint32_t pmu_version = guest_get_pmu_version();
> /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
> bool guest_has_perf_global_ctrl = pmu_version >= 2;
> - struct kvm_x86_pmu_feature gp_event;
> + struct kvm_x86_pmu_feature gp_event, fixed_event;
> uint32_t base_pmc_msr;
> unsigned int i;
>
> @@ -198,6 +219,22 @@ static void guest_test_arch_event(uint8_t idx)
> __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
> MSR_P6_EVNTSEL0 + i, eventsel);
> }
> +
> + if (!guest_has_perf_global_ctrl)
> + return;
> +
> + fixed_event = intel_event_to_feature[idx].fixed_event;
> + if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
> + return;
> +
> + i = fixed_event.f.bit;
> +
> + wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, BIT_ULL(4 * i));
> +
> + __guest_test_arch_event(idx, fixed_event, PMC_FIXED_RDPMC_BASE | i,
> + MSR_CORE_PERF_FIXED_CTR0 + i,
> + MSR_CORE_PERF_GLOBAL_CTRL,
> + BIT_ULL(PMC_IDX_FIXED + i));
> }
>
> static void guest_test_arch_events(void)
> --
> 2.42.0.869.gea05f2083d-goog
>
On Sat, Nov 04, 2023, Jim Mattson wrote:
> On Fri, Nov 3, 2023 at 5:03 PM Sean Christopherson <seanjc@google.com> wrote:
> > static void guest_test_arch_event(uint8_t idx)
> > {
> > const struct {
> > struct kvm_x86_pmu_feature gp_event;
> > + struct kvm_x86_pmu_feature fixed_event;
> > } intel_event_to_feature[] = {
> > - [INTEL_ARCH_CPU_CYCLES] = { X86_PMU_FEATURE_CPU_CYCLES },
> > - [INTEL_ARCH_INSTRUCTIONS_RETIRED] = { X86_PMU_FEATURE_INSNS_RETIRED },
> > - [INTEL_ARCH_REFERENCE_CYCLES] = { X86_PMU_FEATURE_REFERENCE_CYCLES },
> > - [INTEL_ARCH_LLC_REFERENCES] = { X86_PMU_FEATURE_LLC_REFERENCES },
> > - [INTEL_ARCH_LLC_MISSES] = { X86_PMU_FEATURE_LLC_MISSES },
> > - [INTEL_ARCH_BRANCHES_RETIRED] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED },
> > - [INTEL_ARCH_BRANCHES_MISPREDICTED] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED },
> > + [INTEL_ARCH_CPU_CYCLES] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
> > + [INTEL_ARCH_INSTRUCTIONS_RETIRED] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
> > + /*
> > + * Note, the fixed counter for reference cycles is NOT the same
> > + * as the general purpose architectural event (because the GP
> > + * event is garbage). The fixed counter explicitly counts at
> > + * the same frequency as the TSC, whereas the GP event counts
> > + * at a fixed, but uarch specific, frequency. Bundle them here
> > + * for simplicity.
> > + */
>
> Implementation-specific is not necessarily garbage, though it would be
> nice if there was a way to query the frequency rather than calibrating
> against another clock.
Heh, I'll drop the editorial commentry, though I still think an architectural event
with implementation-specific behavior is garbage :-)
@@ -150,25 +150,46 @@ static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature even
guest_assert_event_count(idx, event, pmc, pmc_msr);
}
+#define X86_PMU_FEATURE_NULL \
+({ \
+ struct kvm_x86_pmu_feature feature = {}; \
+ \
+ feature; \
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+ return !(*(u64 *)&event);
+}
+
static void guest_test_arch_event(uint8_t idx)
{
const struct {
struct kvm_x86_pmu_feature gp_event;
+ struct kvm_x86_pmu_feature fixed_event;
} intel_event_to_feature[] = {
- [INTEL_ARCH_CPU_CYCLES] = { X86_PMU_FEATURE_CPU_CYCLES },
- [INTEL_ARCH_INSTRUCTIONS_RETIRED] = { X86_PMU_FEATURE_INSNS_RETIRED },
- [INTEL_ARCH_REFERENCE_CYCLES] = { X86_PMU_FEATURE_REFERENCE_CYCLES },
- [INTEL_ARCH_LLC_REFERENCES] = { X86_PMU_FEATURE_LLC_REFERENCES },
- [INTEL_ARCH_LLC_MISSES] = { X86_PMU_FEATURE_LLC_MISSES },
- [INTEL_ARCH_BRANCHES_RETIRED] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED },
- [INTEL_ARCH_BRANCHES_MISPREDICTED] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED },
+ [INTEL_ARCH_CPU_CYCLES] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+ [INTEL_ARCH_INSTRUCTIONS_RETIRED] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+ /*
+ * Note, the fixed counter for reference cycles is NOT the same
+ * as the general purpose architectural event (because the GP
+ * event is garbage). The fixed counter explicitly counts at
+ * the same frequency as the TSC, whereas the GP event counts
+ * at a fixed, but uarch specific, frequency. Bundle them here
+ * for simplicity.
+ */
+ [INTEL_ARCH_REFERENCE_CYCLES] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_CYCLES_FIXED },
+ [INTEL_ARCH_LLC_REFERENCES] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_LLC_MISSES] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_RETIRED] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_MISPREDICTED] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
};
uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
uint32_t pmu_version = guest_get_pmu_version();
/* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
bool guest_has_perf_global_ctrl = pmu_version >= 2;
- struct kvm_x86_pmu_feature gp_event;
+ struct kvm_x86_pmu_feature gp_event, fixed_event;
uint32_t base_pmc_msr;
unsigned int i;
@@ -198,6 +219,22 @@ static void guest_test_arch_event(uint8_t idx)
__guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
MSR_P6_EVNTSEL0 + i, eventsel);
}
+
+ if (!guest_has_perf_global_ctrl)
+ return;
+
+ fixed_event = intel_event_to_feature[idx].fixed_event;
+ if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
+ return;
+
+ i = fixed_event.f.bit;
+
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, BIT_ULL(4 * i));
+
+ __guest_test_arch_event(idx, fixed_event, PMC_FIXED_RDPMC_BASE | i,
+ MSR_CORE_PERF_FIXED_CTR0 + i,
+ MSR_CORE_PERF_GLOBAL_CTRL,
+ BIT_ULL(PMC_IDX_FIXED + i));
}
static void guest_test_arch_events(void)