[v2,10/15] KVM: x86/vmx: Check Arch LBR config when return perf capabilities
Commit Message
Two new bit fields(VM_EXIT_CLEAR_IA32_LBR_CTL, VM_ENTRY_LOAD_IA32_LBR_CTL)
are added to support guest Arch LBR. These two bits should be set in order
to make Arch LBR workable in both guest and host. Since we don't support
Arch LBR in nested guest, clear the two bits before run L2 VM.
Co-developed-by: Like Xu <like.xu@linux.intel.com>
Signed-off-by: Like Xu <like.xu@linux.intel.com>
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
---
arch/x86/include/asm/vmx.h | 2 ++
arch/x86/kvm/vmx/capabilities.h | 5 +++++
arch/x86/kvm/vmx/nested.c | 8 ++++++++
arch/x86/kvm/vmx/vmx.c | 11 +++++++++++
arch/x86/kvm/vmx/vmx.h | 6 ++++--
5 files changed, 30 insertions(+), 2 deletions(-)
Comments
On 25/11/2022 12:05 pm, Yang Weijiang wrote:
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index b28be793de29..59bdd9873fb5 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -2360,6 +2360,10 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
> if (guest_efer != host_efer)
> exec_control |= VM_ENTRY_LOAD_IA32_EFER;
> }
> +
> + if (cpu_has_vmx_arch_lbr())
> + exec_control &= ~VM_ENTRY_LOAD_IA32_LBR_CTL;
Please verify that (arch) lbr is not available in the nested test case.
Thus when we support nested lbr, the developer will be aware of the need for
test case updates
> +
> vm_entry_controls_set(vmx, exec_control);
>
> /*
On 12/22/2022 7:06 PM, Like Xu wrote:
> On 25/11/2022 12:05 pm, Yang Weijiang wrote:
>> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
>> index b28be793de29..59bdd9873fb5 100644
>> --- a/arch/x86/kvm/vmx/nested.c
>> +++ b/arch/x86/kvm/vmx/nested.c
>> @@ -2360,6 +2360,10 @@ static void prepare_vmcs02_early(struct
>> vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
>> if (guest_efer != host_efer)
>> exec_control |= VM_ENTRY_LOAD_IA32_EFER;
>> }
>> +
>> + if (cpu_has_vmx_arch_lbr())
>> + exec_control &= ~VM_ENTRY_LOAD_IA32_LBR_CTL;
>
> Please verify that (arch) lbr is not available in the nested test case.
> Thus when we support nested lbr, the developer will be aware of the
> need for test case updates
It's not available in either KUT or selftest cases now.
>
>> +
>> vm_entry_controls_set(vmx, exec_control);
>> /*
On Thu, Nov 24, 2022, Yang Weijiang wrote:
> return vmcs_config.cpu_based_2nd_exec_ctrl &
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index b28be793de29..59bdd9873fb5 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -2360,6 +2360,10 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
> if (guest_efer != host_efer)
> exec_control |= VM_ENTRY_LOAD_IA32_EFER;
> }
> +
> + if (cpu_has_vmx_arch_lbr())
> + exec_control &= ~VM_ENTRY_LOAD_IA32_LBR_CTL;
> +
> vm_entry_controls_set(vmx, exec_control);
>
> /*
> @@ -2374,6 +2378,10 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
> exec_control |= VM_EXIT_LOAD_IA32_EFER;
> else
> exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
> +
> + if (cpu_has_vmx_arch_lbr())
> + exec_control &= ~VM_EXIT_CLEAR_IA32_LBR_CTL;
This is wrong. If KVM doesn't enumerate suport to L1, then L1's value needs to
be preserved on entry/exit to/from L1<->L2. I.e. just delete these lines.
> vm_exit_controls_set(vmx, exec_control);
>
> /*
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 2ab4c33b5008..359da38a19a1 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2599,6 +2599,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
> { VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER },
> { VM_ENTRY_LOAD_BNDCFGS, VM_EXIT_CLEAR_BNDCFGS },
> { VM_ENTRY_LOAD_IA32_RTIT_CTL, VM_EXIT_CLEAR_IA32_RTIT_CTL },
> + { VM_ENTRY_LOAD_IA32_LBR_CTL, VM_EXIT_CLEAR_IA32_LBR_CTL },
> };
>
> memset(vmcs_conf, 0, sizeof(*vmcs_conf));
> @@ -4794,6 +4795,9 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> vpid_sync_context(vmx->vpid);
>
> vmx_update_fb_clear_dis(vcpu, vmx);
> +
> + if (!init_event && cpu_has_vmx_arch_lbr())
> + vmcs_write64(GUEST_IA32_LBR_CTL, 0);
This belongs in init_vmcs().
> }
>
> static void vmx_enable_irq_window(struct kvm_vcpu *vcpu)
> @@ -6191,6 +6195,10 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
> vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
> pr_err("PerfGlobCtl = 0x%016llx\n",
> vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
> + if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR) &&
Follow every other field and check the VMCS support, not the cap.
> + vmentry_ctl & VM_ENTRY_LOAD_IA32_LBR_CTL)
> + pr_err("ArchLBRCtl = 0x%016llx\n",
> + vmcs_read64(GUEST_IA32_LBR_CTL));
> if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
> pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
> pr_err("Interruptibility = %08x ActivityState = %08x\n",
...
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index a3da84f4ea45..f68c8a53a248 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -493,7 +493,8 @@ static inline u8 vmx_get_rvi(void)
> VM_ENTRY_LOAD_IA32_EFER | \
> VM_ENTRY_LOAD_BNDCFGS | \
> VM_ENTRY_PT_CONCEAL_PIP | \
> - VM_ENTRY_LOAD_IA32_RTIT_CTL)
> + VM_ENTRY_LOAD_IA32_RTIT_CTL | \
> + VM_ENTRY_LOAD_IA32_LBR_CTL)
>
> #define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \
> (VM_EXIT_SAVE_DEBUG_CONTROLS | \
> @@ -515,7 +516,8 @@ static inline u8 vmx_get_rvi(void)
> VM_EXIT_LOAD_IA32_EFER | \
> VM_EXIT_CLEAR_BNDCFGS | \
> VM_EXIT_PT_CONCEAL_PIP | \
> - VM_EXIT_CLEAR_IA32_RTIT_CTL)
> + VM_EXIT_CLEAR_IA32_RTIT_CTL | \
> + VM_EXIT_CLEAR_IA32_LBR_CTL)
Enabling these flags by default is wrong. KVM will clear LBR_CTL on VM-Exit when
arch LBRs are supported, and AFAICT, never restore the host's values. And that
will happen regardless of whether or not the guest is using arch LBRs. I assume
the correct approach is to toggle these fields, but I'll be damned if I can figure
out what the intended behavior of the existing LBR suport is. I'll follow up in
the cover letter.
@@ -102,6 +102,7 @@
#define VM_EXIT_CLEAR_BNDCFGS 0x00800000
#define VM_EXIT_PT_CONCEAL_PIP 0x01000000
#define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000
+#define VM_EXIT_CLEAR_IA32_LBR_CTL 0x04000000
#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
@@ -115,6 +116,7 @@
#define VM_ENTRY_LOAD_BNDCFGS 0x00010000
#define VM_ENTRY_PT_CONCEAL_PIP 0x00020000
#define VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000
+#define VM_ENTRY_LOAD_IA32_LBR_CTL 0x00200000
#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
@@ -395,6 +395,11 @@ static inline bool vmx_pebs_supported(void)
return boot_cpu_has(X86_FEATURE_PEBS) && kvm_pmu_cap.pebs_ept;
}
+static inline bool cpu_has_vmx_arch_lbr(void)
+{
+ return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_LBR_CTL;
+}
+
static inline bool cpu_has_notify_vmexit(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -2360,6 +2360,10 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
if (guest_efer != host_efer)
exec_control |= VM_ENTRY_LOAD_IA32_EFER;
}
+
+ if (cpu_has_vmx_arch_lbr())
+ exec_control &= ~VM_ENTRY_LOAD_IA32_LBR_CTL;
+
vm_entry_controls_set(vmx, exec_control);
/*
@@ -2374,6 +2378,10 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
exec_control |= VM_EXIT_LOAD_IA32_EFER;
else
exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
+
+ if (cpu_has_vmx_arch_lbr())
+ exec_control &= ~VM_EXIT_CLEAR_IA32_LBR_CTL;
+
vm_exit_controls_set(vmx, exec_control);
/*
@@ -2599,6 +2599,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
{ VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER },
{ VM_ENTRY_LOAD_BNDCFGS, VM_EXIT_CLEAR_BNDCFGS },
{ VM_ENTRY_LOAD_IA32_RTIT_CTL, VM_EXIT_CLEAR_IA32_RTIT_CTL },
+ { VM_ENTRY_LOAD_IA32_LBR_CTL, VM_EXIT_CLEAR_IA32_LBR_CTL },
};
memset(vmcs_conf, 0, sizeof(*vmcs_conf));
@@ -4794,6 +4795,9 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vpid_sync_context(vmx->vpid);
vmx_update_fb_clear_dis(vcpu, vmx);
+
+ if (!init_event && cpu_has_vmx_arch_lbr())
+ vmcs_write64(GUEST_IA32_LBR_CTL, 0);
}
static void vmx_enable_irq_window(struct kvm_vcpu *vcpu)
@@ -6191,6 +6195,10 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
pr_err("PerfGlobCtl = 0x%016llx\n",
vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
+ if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR) &&
+ vmentry_ctl & VM_ENTRY_LOAD_IA32_LBR_CTL)
+ pr_err("ArchLBRCtl = 0x%016llx\n",
+ vmcs_read64(GUEST_IA32_LBR_CTL));
if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
pr_err("Interruptibility = %08x ActivityState = %08x\n",
@@ -7700,6 +7708,9 @@ static u64 vmx_get_perf_capabilities(void)
perf_cap &= ~PERF_CAP_PEBS_BASELINE;
}
+ if (boot_cpu_has(X86_FEATURE_ARCH_LBR) && !cpu_has_vmx_arch_lbr())
+ perf_cap &= ~PMU_CAP_LBR_FMT;
+
return perf_cap;
}
@@ -493,7 +493,8 @@ static inline u8 vmx_get_rvi(void)
VM_ENTRY_LOAD_IA32_EFER | \
VM_ENTRY_LOAD_BNDCFGS | \
VM_ENTRY_PT_CONCEAL_PIP | \
- VM_ENTRY_LOAD_IA32_RTIT_CTL)
+ VM_ENTRY_LOAD_IA32_RTIT_CTL | \
+ VM_ENTRY_LOAD_IA32_LBR_CTL)
#define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \
(VM_EXIT_SAVE_DEBUG_CONTROLS | \
@@ -515,7 +516,8 @@ static inline u8 vmx_get_rvi(void)
VM_EXIT_LOAD_IA32_EFER | \
VM_EXIT_CLEAR_BNDCFGS | \
VM_EXIT_PT_CONCEAL_PIP | \
- VM_EXIT_CLEAR_IA32_RTIT_CTL)
+ VM_EXIT_CLEAR_IA32_RTIT_CTL | \
+ VM_EXIT_CLEAR_IA32_LBR_CTL)
#define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \
(PIN_BASED_EXT_INTR_MASK | \