[v7,03/19] KVM: x86/pmu: Remove KVM's enumeration of Intel's architectural encodings

Message ID 20231108003135.546002-4-seanjc@google.com
State New
Headers
Series KVM: x86/pmu: selftests: Fixes and new tests |

Commit Message

Sean Christopherson Nov. 8, 2023, 12:31 a.m. UTC
  Drop KVM's enumeration of Intel's architectural event encodings, and
instead open code the three encodings (of which only two are real) that
KVM uses to emulate fixed counters.  Now that KVM doesn't incorrectly
enforce the availability of architectural encodings, there is no reason
for KVM to ever care about the encodings themselves, at least not in the
current format of an array indexed by the encoding's position in CPUID.

Opportunistically add a comment to explain why KVM cares about eventsel
values for fixed counters.

Suggested-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/vmx/pmu_intel.c | 72 ++++++++++++------------------------
 1 file changed, 23 insertions(+), 49 deletions(-)
  

Comments

Liang, Kan Nov. 8, 2023, 4:06 p.m. UTC | #1
On 2023-11-07 7:31 p.m., Sean Christopherson wrote:
> Drop KVM's enumeration of Intel's architectural event encodings, and
> instead open code the three encodings (of which only two are real) that
> KVM uses to emulate fixed counters.  Now that KVM doesn't incorrectly
> enforce the availability of architectural encodings, there is no reason
> for KVM to ever care about the encodings themselves, at least not in the
> current format of an array indexed by the encoding's position in CPUID.
> 
> Opportunistically add a comment to explain why KVM cares about eventsel
> values for fixed counters.
> 
> Suggested-by: Jim Mattson <jmattson@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/kvm/vmx/pmu_intel.c | 72 ++++++++++++------------------------
>  1 file changed, 23 insertions(+), 49 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 7737ee2fc62f..c4f2c6a268e7 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -22,52 +22,6 @@
>  
>  #define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
>  
> -enum intel_pmu_architectural_events {
> -	/*
> -	 * The order of the architectural events matters as support for each
> -	 * event is enumerated via CPUID using the index of the event.
> -	 */
> -	INTEL_ARCH_CPU_CYCLES,
> -	INTEL_ARCH_INSTRUCTIONS_RETIRED,
> -	INTEL_ARCH_REFERENCE_CYCLES,
> -	INTEL_ARCH_LLC_REFERENCES,
> -	INTEL_ARCH_LLC_MISSES,
> -	INTEL_ARCH_BRANCHES_RETIRED,
> -	INTEL_ARCH_BRANCHES_MISPREDICTED,
> -
> -	NR_REAL_INTEL_ARCH_EVENTS,
> -
> -	/*
> -	 * Pseudo-architectural event used to implement IA32_FIXED_CTR2, a.k.a.
> -	 * TSC reference cycles.  The architectural reference cycles event may
> -	 * or may not actually use the TSC as the reference, e.g. might use the
> -	 * core crystal clock or the bus clock (yeah, "architectural").
> -	 */
> -	PSEUDO_ARCH_REFERENCE_CYCLES = NR_REAL_INTEL_ARCH_EVENTS,
> -	NR_INTEL_ARCH_EVENTS,
> -};
> -
> -static struct {
> -	u8 eventsel;
> -	u8 unit_mask;
> -} const intel_arch_events[] = {
> -	[INTEL_ARCH_CPU_CYCLES]			= { 0x3c, 0x00 },
> -	[INTEL_ARCH_INSTRUCTIONS_RETIRED]	= { 0xc0, 0x00 },
> -	[INTEL_ARCH_REFERENCE_CYCLES]		= { 0x3c, 0x01 },
> -	[INTEL_ARCH_LLC_REFERENCES]		= { 0x2e, 0x4f },
> -	[INTEL_ARCH_LLC_MISSES]			= { 0x2e, 0x41 },
> -	[INTEL_ARCH_BRANCHES_RETIRED]		= { 0xc4, 0x00 },
> -	[INTEL_ARCH_BRANCHES_MISPREDICTED]	= { 0xc5, 0x00 },
> -	[PSEUDO_ARCH_REFERENCE_CYCLES]		= { 0x00, 0x03 },
> -};
> -
> -/* mapping between fixed pmc index and intel_arch_events array */
> -static int fixed_pmc_events[] = {
> -	[0] = INTEL_ARCH_INSTRUCTIONS_RETIRED,
> -	[1] = INTEL_ARCH_CPU_CYCLES,
> -	[2] = PSEUDO_ARCH_REFERENCE_CYCLES,
> -};
> -
>  static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
>  {
>  	struct kvm_pmc *pmc;
> @@ -442,8 +396,29 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	return 0;
>  }
>  
> +/*
> + * Map fixed counter events to architectural general purpose event encodings.
> + * Perf doesn't provide APIs to allow KVM to directly program a fixed counter,
> + * and so KVM instead programs the architectural event to effectively request
> + * the fixed counter.  Perf isn't guaranteed to use a fixed counter and may
> + * instead program the encoding into a general purpose counter, e.g. if a
> + * different perf_event is already utilizing the requested counter, but the end
> + * result is the same (ignoring the fact that using a general purpose counter
> + * will likely exacerbate counter contention).
> + *
> + * Note, reference cycles is counted using a perf-defined "psuedo-encoding",
> + * as there is no architectural general purpose encoding for reference cycles.

It's not the case for the latest Intel platforms anymore. Please see
ffbe4ab0beda ("perf/x86/intel: Extend the ref-cycles event to GP counters").

Maybe perf should export .event_map to KVM somehow.

Thanks,
Kan
> + */
>  static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
>  {
> +	const struct {
> +		u8 eventsel;
> +		u8 unit_mask;
> +	} fixed_pmc_events[] = {
> +		[0] = { 0xc0, 0x00 }, /* Instruction Retired / PERF_COUNT_HW_INSTRUCTIONS. */
> +		[1] = { 0x3c, 0x00 }, /* CPU Cycles/ PERF_COUNT_HW_CPU_CYCLES. */
> +		[2] = { 0x00, 0x03 }, /* Reference Cycles / PERF_COUNT_HW_REF_CPU_CYCLES*/
> +	};
>  	int i;
>  
>  	BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED);
> @@ -451,10 +426,9 @@ static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
>  	for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
>  		int index = array_index_nospec(i, KVM_PMC_MAX_FIXED);
>  		struct kvm_pmc *pmc = &pmu->fixed_counters[index];
> -		u32 event = fixed_pmc_events[index];
>  
> -		pmc->eventsel = (intel_arch_events[event].unit_mask << 8) |
> -				 intel_arch_events[event].eventsel;
> +		pmc->eventsel = (fixed_pmc_events[index].unit_mask << 8) |
> +				 fixed_pmc_events[index].eventsel;
>  	}
>  }
>
  
Sean Christopherson Nov. 8, 2023, 7:35 p.m. UTC | #2
On Wed, Nov 08, 2023, Kan Liang wrote:
> On 2023-11-07 7:31 p.m., Sean Christopherson wrote:
> > @@ -442,8 +396,29 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >  	return 0;
> >  }
> >  
> > +/*
> > + * Map fixed counter events to architectural general purpose event encodings.
> > + * Perf doesn't provide APIs to allow KVM to directly program a fixed counter,
> > + * and so KVM instead programs the architectural event to effectively request
> > + * the fixed counter.  Perf isn't guaranteed to use a fixed counter and may
> > + * instead program the encoding into a general purpose counter, e.g. if a
> > + * different perf_event is already utilizing the requested counter, but the end
> > + * result is the same (ignoring the fact that using a general purpose counter
> > + * will likely exacerbate counter contention).
> > + *
> > + * Note, reference cycles is counted using a perf-defined "psuedo-encoding",
> > + * as there is no architectural general purpose encoding for reference cycles.
> 
> It's not the case for the latest Intel platforms anymore. Please see
> ffbe4ab0beda ("perf/x86/intel: Extend the ref-cycles event to GP counters").

Ugh, yeah.  But that and should actually be easier to do on top.

> Maybe perf should export .event_map to KVM somehow.

Oh for ***** sake, perf already does export this for KVM.  Untested, but the below
should do the trick.  If I need to spin another version of this series then I'll
fold it in, otherwise I'll post it as something on top.

There's also an optimization to be had for kvm_pmu_trigger_event(), which incurs
an indirect branch not only every invocation, but on every iteration.  I'll post
this one separately.

diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 5fc5a62af428..a02e13c2e5e6 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -405,25 +405,32 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
  * different perf_event is already utilizing the requested counter, but the end
  * result is the same (ignoring the fact that using a general purpose counter
  * will likely exacerbate counter contention).
- *
- * Note, reference cycles is counted using a perf-defined "psuedo-encoding",
- * as there is no architectural general purpose encoding for reference cycles.
  */
 static u64 intel_get_fixed_pmc_eventsel(int index)
 {
-       const struct {
-               u8 eventsel;
-               u8 unit_mask;
-       } fixed_pmc_events[] = {
-               [0] = { 0xc0, 0x00 }, /* Instruction Retired / PERF_COUNT_HW_INSTRUCTIONS. */
-               [1] = { 0x3c, 0x00 }, /* CPU Cycles/ PERF_COUNT_HW_CPU_CYCLES. */
-               [2] = { 0x00, 0x03 }, /* Reference Cycles / PERF_COUNT_HW_REF_CPU_CYCLES*/
+       enum perf_hw_id perf_id;
+       u64 eventsel;
+
+       BUILD_BUG_ON(KVM_PMC_MAX_FIXED != 3);
+
+       switch (index) {
+       case 0:
+               perf_id = PERF_COUNT_HW_INSTRUCTIONS;
+               break;
+       case 1:
+               perf_id = PERF_COUNT_HW_CPU_CYCLES;
+               break;
+       case 2:
+               perf_id = PERF_COUNT_HW_REF_CPU_CYCLES;
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               return 0;
        };
 
-       BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED);
-
-       return (fixed_pmc_events[index].unit_mask << 8) |
-               fixed_pmc_events[index].eventsel;
+       eventsel = perf_get_hw_event_config(perf_id);
+       WARN_ON_ONCE(!eventsel && index < kvm_pmu_cap.num_counters_fixed);
+       return eventsel;
 }
 
 static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
  
Liang, Kan Nov. 8, 2023, 8:38 p.m. UTC | #3
On 2023-11-08 2:35 p.m., Sean Christopherson wrote:
> On Wed, Nov 08, 2023, Kan Liang wrote:
>> On 2023-11-07 7:31 p.m., Sean Christopherson wrote:
>>> @@ -442,8 +396,29 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>>  	return 0;
>>>  }
>>>  
>>> +/*
>>> + * Map fixed counter events to architectural general purpose event encodings.
>>> + * Perf doesn't provide APIs to allow KVM to directly program a fixed counter,
>>> + * and so KVM instead programs the architectural event to effectively request
>>> + * the fixed counter.  Perf isn't guaranteed to use a fixed counter and may
>>> + * instead program the encoding into a general purpose counter, e.g. if a
>>> + * different perf_event is already utilizing the requested counter, but the end
>>> + * result is the same (ignoring the fact that using a general purpose counter
>>> + * will likely exacerbate counter contention).
>>> + *
>>> + * Note, reference cycles is counted using a perf-defined "psuedo-encoding",
>>> + * as there is no architectural general purpose encoding for reference cycles.
>>
>> It's not the case for the latest Intel platforms anymore. Please see
>> ffbe4ab0beda ("perf/x86/intel: Extend the ref-cycles event to GP counters").
> 
> Ugh, yeah.  But that and should actually be easier to do on top.
> 
>> Maybe perf should export .event_map to KVM somehow.
> 
> Oh for ***** sake, perf already does export this for KVM.  Untested, but the below
> should do the trick.  If I need to spin another version of this series then I'll
> fold it in, otherwise I'll post it as something on top.
> 
> There's also an optimization to be had for kvm_pmu_trigger_event(), which incurs
> an indirect branch not only every invocation, but on every iteration.  I'll post
> this one separately.
> 
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 5fc5a62af428..a02e13c2e5e6 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -405,25 +405,32 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   * different perf_event is already utilizing the requested counter, but the end
>   * result is the same (ignoring the fact that using a general purpose counter
>   * will likely exacerbate counter contention).
> - *
> - * Note, reference cycles is counted using a perf-defined "psuedo-encoding",
> - * as there is no architectural general purpose encoding for reference cycles.
>   */
>  static u64 intel_get_fixed_pmc_eventsel(int index)
>  {
> -       const struct {
> -               u8 eventsel;
> -               u8 unit_mask;
> -       } fixed_pmc_events[] = {
> -               [0] = { 0xc0, 0x00 }, /* Instruction Retired / PERF_COUNT_HW_INSTRUCTIONS. */
> -               [1] = { 0x3c, 0x00 }, /* CPU Cycles/ PERF_COUNT_HW_CPU_CYCLES. */
> -               [2] = { 0x00, 0x03 }, /* Reference Cycles / PERF_COUNT_HW_REF_CPU_CYCLES*/
> +       enum perf_hw_id perf_id;
> +       u64 eventsel;
> +
> +       BUILD_BUG_ON(KVM_PMC_MAX_FIXED != 3);
> +
> +       switch (index) {
> +       case 0:
> +               perf_id = PERF_COUNT_HW_INSTRUCTIONS;
> +               break;
> +       case 1:
> +               perf_id = PERF_COUNT_HW_CPU_CYCLES;
> +               break;
> +       case 2:
> +               perf_id = PERF_COUNT_HW_REF_CPU_CYCLES;
> +               break;
> +       default:
> +               WARN_ON_ONCE(1);
> +               return 0;
>         };
>  
> -       BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED);
> -
> -       return (fixed_pmc_events[index].unit_mask << 8) |
> -               fixed_pmc_events[index].eventsel;
> +       eventsel = perf_get_hw_event_config(perf_id);

Yes, the perf_get_hw_event_config() can tell the updated event encoding.

Thanks,
Kan

> +       WARN_ON_ONCE(!eventsel && index < kvm_pmu_cap.num_counters_fixed);
> +       return eventsel;
>  }
>  
>  static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
  

Patch

diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 7737ee2fc62f..c4f2c6a268e7 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -22,52 +22,6 @@ 
 
 #define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
 
-enum intel_pmu_architectural_events {
-	/*
-	 * The order of the architectural events matters as support for each
-	 * event is enumerated via CPUID using the index of the event.
-	 */
-	INTEL_ARCH_CPU_CYCLES,
-	INTEL_ARCH_INSTRUCTIONS_RETIRED,
-	INTEL_ARCH_REFERENCE_CYCLES,
-	INTEL_ARCH_LLC_REFERENCES,
-	INTEL_ARCH_LLC_MISSES,
-	INTEL_ARCH_BRANCHES_RETIRED,
-	INTEL_ARCH_BRANCHES_MISPREDICTED,
-
-	NR_REAL_INTEL_ARCH_EVENTS,
-
-	/*
-	 * Pseudo-architectural event used to implement IA32_FIXED_CTR2, a.k.a.
-	 * TSC reference cycles.  The architectural reference cycles event may
-	 * or may not actually use the TSC as the reference, e.g. might use the
-	 * core crystal clock or the bus clock (yeah, "architectural").
-	 */
-	PSEUDO_ARCH_REFERENCE_CYCLES = NR_REAL_INTEL_ARCH_EVENTS,
-	NR_INTEL_ARCH_EVENTS,
-};
-
-static struct {
-	u8 eventsel;
-	u8 unit_mask;
-} const intel_arch_events[] = {
-	[INTEL_ARCH_CPU_CYCLES]			= { 0x3c, 0x00 },
-	[INTEL_ARCH_INSTRUCTIONS_RETIRED]	= { 0xc0, 0x00 },
-	[INTEL_ARCH_REFERENCE_CYCLES]		= { 0x3c, 0x01 },
-	[INTEL_ARCH_LLC_REFERENCES]		= { 0x2e, 0x4f },
-	[INTEL_ARCH_LLC_MISSES]			= { 0x2e, 0x41 },
-	[INTEL_ARCH_BRANCHES_RETIRED]		= { 0xc4, 0x00 },
-	[INTEL_ARCH_BRANCHES_MISPREDICTED]	= { 0xc5, 0x00 },
-	[PSEUDO_ARCH_REFERENCE_CYCLES]		= { 0x00, 0x03 },
-};
-
-/* mapping between fixed pmc index and intel_arch_events array */
-static int fixed_pmc_events[] = {
-	[0] = INTEL_ARCH_INSTRUCTIONS_RETIRED,
-	[1] = INTEL_ARCH_CPU_CYCLES,
-	[2] = PSEUDO_ARCH_REFERENCE_CYCLES,
-};
-
 static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
 {
 	struct kvm_pmc *pmc;
@@ -442,8 +396,29 @@  static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	return 0;
 }
 
+/*
+ * Map fixed counter events to architectural general purpose event encodings.
+ * Perf doesn't provide APIs to allow KVM to directly program a fixed counter,
+ * and so KVM instead programs the architectural event to effectively request
+ * the fixed counter.  Perf isn't guaranteed to use a fixed counter and may
+ * instead program the encoding into a general purpose counter, e.g. if a
+ * different perf_event is already utilizing the requested counter, but the end
+ * result is the same (ignoring the fact that using a general purpose counter
+ * will likely exacerbate counter contention).
+ *
+ * Note, reference cycles is counted using a perf-defined "psuedo-encoding",
+ * as there is no architectural general purpose encoding for reference cycles.
+ */
 static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
 {
+	const struct {
+		u8 eventsel;
+		u8 unit_mask;
+	} fixed_pmc_events[] = {
+		[0] = { 0xc0, 0x00 }, /* Instruction Retired / PERF_COUNT_HW_INSTRUCTIONS. */
+		[1] = { 0x3c, 0x00 }, /* CPU Cycles/ PERF_COUNT_HW_CPU_CYCLES. */
+		[2] = { 0x00, 0x03 }, /* Reference Cycles / PERF_COUNT_HW_REF_CPU_CYCLES*/
+	};
 	int i;
 
 	BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED);
@@ -451,10 +426,9 @@  static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
 	for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
 		int index = array_index_nospec(i, KVM_PMC_MAX_FIXED);
 		struct kvm_pmc *pmc = &pmu->fixed_counters[index];
-		u32 event = fixed_pmc_events[index];
 
-		pmc->eventsel = (intel_arch_events[event].unit_mask << 8) |
-				 intel_arch_events[event].eventsel;
+		pmc->eventsel = (fixed_pmc_events[index].unit_mask << 8) |
+				 fixed_pmc_events[index].eventsel;
 	}
 }