[2/7] KVM: selftests: Test Intel PMU architectural events on fixed counters
Commit Message
From: Jinrong Liang <cloudliang@tencent.com>
Update test to cover Intel PMU architectural events on fixed counters.
Per Intel SDM, PMU users can also count architecture performance events
on fixed counters (specifically, FIXED_CTR0 for the retired instructions
and FIXED_CTR1 for cpu core cycles event). Therefore, if guest's CPUID
indicates that an architecture event is not available, the corresponding
fixed counter will also not count that event.
Co-developed-by: Like Xu <likexu@tencent.com>
Signed-off-by: Like Xu <likexu@tencent.com>
Signed-off-by: Jinrong Liang <cloudliang@tencent.com>
---
.../selftests/kvm/x86_64/pmu_cpuid_test.c | 37 +++++++++++++++++--
1 file changed, 33 insertions(+), 4 deletions(-)
Comments
On Thu, Mar 23, 2023, Like Xu wrote:
> From: Jinrong Liang <cloudliang@tencent.com>
>
> Update test to cover Intel PMU architectural events on fixed counters.
> Per Intel SDM, PMU users can also count architecture performance events
> on fixed counters (specifically, FIXED_CTR0 for the retired instructions
> and FIXED_CTR1 for cpu core cycles event). Therefore, if guest's CPUID
> indicates that an architecture event is not available, the corresponding
> fixed counter will also not count that event.
>
> Co-developed-by: Like Xu <likexu@tencent.com>
> Signed-off-by: Like Xu <likexu@tencent.com>
> Signed-off-by: Jinrong Liang <cloudliang@tencent.com>
> ---
> .../selftests/kvm/x86_64/pmu_cpuid_test.c | 37 +++++++++++++++++--
> 1 file changed, 33 insertions(+), 4 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/x86_64/pmu_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/pmu_cpuid_test.c
> index faab0a91e191..75434aa2a0ec 100644
> --- a/tools/testing/selftests/kvm/x86_64/pmu_cpuid_test.c
> +++ b/tools/testing/selftests/kvm/x86_64/pmu_cpuid_test.c
> @@ -25,6 +25,9 @@
> #define GP_CTR_NUM_MASK GENMASK_ULL(15, GP_CTR_NUM_OFS_BIT)
> #define EVT_LEN_OFS_BIT 24
> #define EVT_LEN_MASK GENMASK_ULL(31, EVT_LEN_OFS_BIT)
> +#define INTEL_PMC_IDX_FIXED 32
> +#define RDPMC_FIXED_BASE BIT_ULL(30)
> +#define FIXED_CTR_NUM_MASK GENMASK_ULL(4, 0)
>
> #define ARCH_EVENT(select, umask) (((select) & 0xff) | ((umask) & 0xff) << 8)
>
> @@ -43,6 +46,14 @@ static const uint64_t arch_events[] = {
> [7] = ARCH_EVENT(0xa4, 0x1), /* Topdown Slots */
> };
>
> +/* Association of Fixed Counters with Architectural Performance Events */
> +static int fixed_events[] = {1, 0, 7};
> +
> +static uint64_t evt_code_for_fixed_ctr(uint8_t idx)
> +{
> + return arch_events[fixed_events[idx]];
> +}
This appears to be more fodder for common code.
> static struct kvm_vcpu *new_vcpu(void *guest_code)
> {
> struct kvm_vm *vm;
> @@ -88,8 +99,8 @@ static bool first_uc_arg_non_zero(struct ucall *uc, void *data)
> }
>
> static void intel_guest_run_arch_event(uint8_t version, uint8_t max_gp_num,
> - bool supported, uint32_t ctr_base_msr,
> - uint64_t evt_code)
> + uint8_t max_fixed_num, bool supported,
> + uint32_t ctr_base_msr, uint64_t evt_code)
> {
> uint32_t global_msr = MSR_CORE_PERF_GLOBAL_CTRL;
> unsigned int i;
> @@ -108,6 +119,23 @@ static void intel_guest_run_arch_event(uint8_t version, uint8_t max_gp_num,
> GUEST_SYNC(supported == !!_rdpmc(i));
> }
>
> + /* No need to test independent arch events on fixed counters. */
> + if (version > 1 && max_fixed_num > 1 &&
> + (evt_code == evt_code_for_fixed_ctr(0) ||
> + evt_code == evt_code_for_fixed_ctr(1))) {
> + i = (evt_code == evt_code_for_fixed_ctr(0)) ? 0 : 1;
The ternary operator on top of a duplicate comparison isn't super intuitive.
Maybe use gotos? Definitely just an idea, not a requirement.
if (version <= 1 || max_fixed_num <= 1)
goto done;
if (evt_code == evt_code_for_fixed_ctr(0))
i = 0;
else if (evt_code == evt_code_for_fixed_ctr(1))
i = 1;
else
goto done;
> + wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
> + wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, BIT_ULL(4 * i));
> + wrmsr(global_msr, BIT_ULL(INTEL_PMC_IDX_FIXED + i));
> +
> + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
> +
> + wrmsr(global_msr, 0);
> +
> + GUEST_SYNC(supported == !!_rdpmc(RDPMC_FIXED_BASE | i));
> + }
> +
> GUEST_DONE();
> }
@@ -25,6 +25,9 @@
#define GP_CTR_NUM_MASK GENMASK_ULL(15, GP_CTR_NUM_OFS_BIT)
#define EVT_LEN_OFS_BIT 24
#define EVT_LEN_MASK GENMASK_ULL(31, EVT_LEN_OFS_BIT)
+#define INTEL_PMC_IDX_FIXED 32
+#define RDPMC_FIXED_BASE BIT_ULL(30)
+#define FIXED_CTR_NUM_MASK GENMASK_ULL(4, 0)
#define ARCH_EVENT(select, umask) (((select) & 0xff) | ((umask) & 0xff) << 8)
@@ -43,6 +46,14 @@ static const uint64_t arch_events[] = {
[7] = ARCH_EVENT(0xa4, 0x1), /* Topdown Slots */
};
+/* Association of Fixed Counters with Architectural Performance Events */
+static int fixed_events[] = {1, 0, 7};
+
+static uint64_t evt_code_for_fixed_ctr(uint8_t idx)
+{
+ return arch_events[fixed_events[idx]];
+}
+
static struct kvm_vcpu *new_vcpu(void *guest_code)
{
struct kvm_vm *vm;
@@ -88,8 +99,8 @@ static bool first_uc_arg_non_zero(struct ucall *uc, void *data)
}
static void intel_guest_run_arch_event(uint8_t version, uint8_t max_gp_num,
- bool supported, uint32_t ctr_base_msr,
- uint64_t evt_code)
+ uint8_t max_fixed_num, bool supported,
+ uint32_t ctr_base_msr, uint64_t evt_code)
{
uint32_t global_msr = MSR_CORE_PERF_GLOBAL_CTRL;
unsigned int i;
@@ -108,6 +119,23 @@ static void intel_guest_run_arch_event(uint8_t version, uint8_t max_gp_num,
GUEST_SYNC(supported == !!_rdpmc(i));
}
+ /* No need to test independent arch events on fixed counters. */
+ if (version > 1 && max_fixed_num > 1 &&
+ (evt_code == evt_code_for_fixed_ctr(0) ||
+ evt_code == evt_code_for_fixed_ctr(1))) {
+ i = (evt_code == evt_code_for_fixed_ctr(0)) ? 0 : 1;
+
+ wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, BIT_ULL(4 * i));
+ wrmsr(global_msr, BIT_ULL(INTEL_PMC_IDX_FIXED + i));
+
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+
+ wrmsr(global_msr, 0);
+
+ GUEST_SYNC(supported == !!_rdpmc(RDPMC_FIXED_BASE | i));
+ }
+
GUEST_DONE();
}
@@ -131,9 +159,10 @@ static void test_arch_events_setup(struct kvm_vcpu *vcpu, uint8_t evt_vector,
is_supported = !(entry->ebx & BIT_ULL(idx)) &&
(((entry->eax & EVT_LEN_MASK) >> EVT_LEN_OFS_BIT) > idx);
- vcpu_args_set(vcpu, 5, entry->eax & PMU_VERSION_MASK,
+ vcpu_args_set(vcpu, 6, entry->eax & PMU_VERSION_MASK,
(entry->eax & GP_CTR_NUM_MASK) >> GP_CTR_NUM_OFS_BIT,
- is_supported, ctr_msr, arch_events[idx]);
+ (entry->edx & FIXED_CTR_NUM_MASK), is_supported,
+ ctr_msr, arch_events[idx]);
}
static void intel_check_arch_event_is_unavl(uint8_t idx)