[v1,06/23] KVM: VMX: Defer enabling FRED MSRs save/load until after set CPUID

Message ID 20231108183003.5981-7-xin3.li@intel.com
State New
Headers
Series Enable FRED with KVM VMX |

Commit Message

Li, Xin3 Nov. 8, 2023, 6:29 p.m. UTC
  Clear FRED VM entry/exit controls when initializing a vCPU, and set
these controls only if FRED is enumerated after set CPUID.

FRED VM entry/exit controls need to be set to establish context
sufficient to support FRED event delivery immediately after VM entry
and exit.  However it is not required to save/load FRED MSRs for
a non-FRED guest, which aren't supposed to access FRED MSRs.

A non-FRED guest should get #GP upon accessing FRED MSRs, otherwise
it corrupts host FRED MSRs.

Tested-by: Shan Kang <shan.kang@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
---
 arch/x86/kvm/vmx/vmx.c | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)
  

Comments

Chao Gao Nov. 9, 2023, 9:15 a.m. UTC | #1
On Wed, Nov 08, 2023 at 10:29:46AM -0800, Xin Li wrote:
>Clear FRED VM entry/exit controls when initializing a vCPU, and set
>these controls only if FRED is enumerated after set CPUID.
>
>FRED VM entry/exit controls need to be set to establish context
>sufficient to support FRED event delivery immediately after VM entry
>and exit.  However it is not required to save/load FRED MSRs for
>a non-FRED guest, which aren't supposed to access FRED MSRs.
>
>A non-FRED guest should get #GP upon accessing FRED MSRs, otherwise
>it corrupts host FRED MSRs.
>
>Tested-by: Shan Kang <shan.kang@intel.com>
>Signed-off-by: Xin Li <xin3.li@intel.com>
>---
> arch/x86/kvm/vmx/vmx.c | 34 +++++++++++++++++++++++++++++++++-
> 1 file changed, 33 insertions(+), 1 deletion(-)
>
>diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>index 9186f41974ab..5d4786812664 100644
>--- a/arch/x86/kvm/vmx/vmx.c
>+++ b/arch/x86/kvm/vmx/vmx.c
>@@ -4423,6 +4423,9 @@ static u32 vmx_vmentry_ctrl(void)
> 	if (cpu_has_perf_global_ctrl_bug())
> 		vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
> 
>+	/* Whether to load guest FRED MSRs is deferred until after set CPUID */
>+	vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_FRED;
>+
> 	return vmentry_ctrl;
> }
> 
>@@ -4458,7 +4461,13 @@ static u32 vmx_vmexit_ctrl(void)
> 
> static u64 vmx_secondary_vmexit_ctrl(void)
> {
>-	return vmcs_config.secondary_vmexit_ctrl;
>+	u64 secondary_vmexit_ctrl = vmcs_config.secondary_vmexit_ctrl;
>+
>+	/* Whether to save/load FRED MSRs is deferred until after set CPUID */
>+	secondary_vmexit_ctrl &= ~(SECONDARY_VM_EXIT_SAVE_IA32_FRED |
>+				   SECONDARY_VM_EXIT_LOAD_IA32_FRED);
>+
>+	return secondary_vmexit_ctrl;
> }
> 
> static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
>@@ -7785,10 +7794,33 @@ static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
> 		vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
> }
> 
>+static void vmx_vcpu_config_fred_after_set_cpuid(struct kvm_vcpu *vcpu)
>+{
>+	struct vcpu_vmx *vmx = to_vmx(vcpu);
>+
>+	if (!cpu_feature_enabled(X86_FEATURE_FRED) ||
>+	    !guest_cpuid_has(vcpu, X86_FEATURE_FRED))
>+		return;
>+
>+	/* Enable loading guest FRED MSRs from VMCS */
>+	vm_entry_controls_setbit(vmx, VM_ENTRY_LOAD_IA32_FRED);
>+
>+	/*
>+	 * Enable saving guest FRED MSRs into VMCS and loading host FRED MSRs
>+	 * from VMCS.
>+	 */
>+	vm_exit_controls_setbit(vmx, VM_EXIT_ACTIVATE_SECONDARY_CONTROLS);
>+	secondary_vm_exit_controls_setbit(vmx,
>+					  SECONDARY_VM_EXIT_SAVE_IA32_FRED |
>+					  SECONDARY_VM_EXIT_LOAD_IA32_FRED);

all above vmcs controls need to be cleared if guest doesn't enumerate FRED, see

https://lore.kernel.org/all/ZJYzPn7ipYfO0fLZ@google.com/

Clearing VM_EXIT_ACTIVATE_SECONDARY_CONTROLS may be problematic when
new bits are added to secondary vmcs controls. Why not keep
VM_EXIT_ACTIVATE_SECONDARY_CONTROLS always on if it is supported? or you see
any perf impact?

>+}
>+
> static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> {
> 	struct vcpu_vmx *vmx = to_vmx(vcpu);
> 
>+	vmx_vcpu_config_fred_after_set_cpuid(vcpu);
>+
> 	/*
> 	 * XSAVES is effectively enabled if and only if XSAVE is also exposed
> 	 * to the guest.  XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be
>-- 
>2.42.0
>
>
  
Li, Xin3 Nov. 9, 2023, 11:50 p.m. UTC | #2
> >+static void vmx_vcpu_config_fred_after_set_cpuid(struct kvm_vcpu *vcpu)
> >+{
> >+	struct vcpu_vmx *vmx = to_vmx(vcpu);
> >+
> >+	if (!cpu_feature_enabled(X86_FEATURE_FRED) ||
> >+	    !guest_cpuid_has(vcpu, X86_FEATURE_FRED))
> >+		return;
> >+
> >+	/* Enable loading guest FRED MSRs from VMCS */
> >+	vm_entry_controls_setbit(vmx, VM_ENTRY_LOAD_IA32_FRED);
> >+
> >+	/*
> >+	 * Enable saving guest FRED MSRs into VMCS and loading host FRED MSRs
> >+	 * from VMCS.
> >+	 */
> >+	vm_exit_controls_setbit(vmx,
> VM_EXIT_ACTIVATE_SECONDARY_CONTROLS);
> >+	secondary_vm_exit_controls_setbit(vmx,
> >+					  SECONDARY_VM_EXIT_SAVE_IA32_FRED
> |
> >+
> SECONDARY_VM_EXIT_LOAD_IA32_FRED);
> 
> all above vmcs controls need to be cleared if guest doesn't enumerate FRED, see
> 
> https://lore.kernel.org/all/ZJYzPn7ipYfO0fLZ@google.com/

Good point, the user space could set cpuid multiple times...
 
> Clearing VM_EXIT_ACTIVATE_SECONDARY_CONTROLS may be problematic when
> new bits are added to secondary vmcs controls. Why not keep
> VM_EXIT_ACTIVATE_SECONDARY_CONTROLS always on if it is supported? or you
> see any perf impact?

I think it from the other way, why keeps hw loading it on every vmentry
even if it's not used by a guest?

Different CPUs may implement it in different ways, which we can't assume.

Other features needing it should set it separately, say with a refcount.
  
Sean Christopherson Nov. 10, 2023, 12:18 a.m. UTC | #3
On Thu, Nov 09, 2023, Xin3 Li wrote:
> > >+static void vmx_vcpu_config_fred_after_set_cpuid(struct kvm_vcpu *vcpu)
> > >+{
> > >+	struct vcpu_vmx *vmx = to_vmx(vcpu);
> > >+
> > >+	if (!cpu_feature_enabled(X86_FEATURE_FRED) ||
> > >+	    !guest_cpuid_has(vcpu, X86_FEATURE_FRED))
> > >+		return;
> > >+
> > >+	/* Enable loading guest FRED MSRs from VMCS */
> > >+	vm_entry_controls_setbit(vmx, VM_ENTRY_LOAD_IA32_FRED);
> > >+
> > >+	/*
> > >+	 * Enable saving guest FRED MSRs into VMCS and loading host FRED MSRs
> > >+	 * from VMCS.
> > >+	 */
> > >+	vm_exit_controls_setbit(vmx,
> > VM_EXIT_ACTIVATE_SECONDARY_CONTROLS);
> > >+	secondary_vm_exit_controls_setbit(vmx,
> > >+					  SECONDARY_VM_EXIT_SAVE_IA32_FRED
> > |
> > >+
> > SECONDARY_VM_EXIT_LOAD_IA32_FRED);
> > 
> > all above vmcs controls need to be cleared if guest doesn't enumerate FRED, see
> > 
> > https://lore.kernel.org/all/ZJYzPn7ipYfO0fLZ@google.com/
> 
> Good point, the user space could set cpuid multiple times...
>  
> > Clearing VM_EXIT_ACTIVATE_SECONDARY_CONTROLS may be problematic when
> > new bits are added to secondary vmcs controls. Why not keep
> > VM_EXIT_ACTIVATE_SECONDARY_CONTROLS always on if it is supported? or you
> > see any perf impact?
> 
> I think it from the other way, why keeps hw loading it on every vmentry
> even if it's not used by a guest?

Oh, yeesh, this is clearing the activation control.  Yeah, NAK to that, just
leave it set.  If it weren't for the fact that there is apparently a metrict ton
of FRED state (I thought the whole point of FRED was to kill off legacy crap like
CPL1 and CPL2 stacks?) _and_ that KVM needs to toggle MSR intercepts, I'd probably
push back on toggling even the FRED controls.

> Different CPUs may implement it in different ways, which we can't assume.

Implement what in a different way?  The VMCS fields and FRED are architectural.
The internal layout of the VMCS is uarch specific, but the encodings and semantics
absolutely cannot change without breaking software.  And if Intel does something
asinine like make a control active-low then we have far, far bigger problems.

> Other features needing it should set it separately, say with a refcount.

Absolutely not.  Unless Intel screwed up the implementation, the cost of leaving
VM_EXIT_ACTIVATE_SECONDARY_CONTROLS set when it's supported shouldn't even be
measurable.
  
Li, Xin3 Nov. 14, 2023, 2:50 a.m. UTC | #4
> > > Clearing VM_EXIT_ACTIVATE_SECONDARY_CONTROLS may be problematic when
> > > new bits are added to secondary vmcs controls. Why not keep
> > > VM_EXIT_ACTIVATE_SECONDARY_CONTROLS always on if it is supported? or
> > > you see any perf impact?
> >
> > I think it from the other way, why keeps hw loading it on every
> > vmentry even if it's not used by a guest?
> 
> Oh, yeesh, this is clearing the activation control.  Yeah, NAK to that, just leave it
> set.  If it weren't for the fact that there is apparently a metrict ton of FRED state (I
> thought the whole point of FRED was to kill off legacy crap like
> CPL1 and CPL2 stacks?) _and_ that KVM needs to toggle MSR intercepts, I'd
> probably push back on toggling even the FRED controls.

To me, FRED is to _architecturally_ do the right thing for x86 event handling.

I don't think FRED's major purpose is to kill legacy craps, but longer term
it paves the way for that.

Yeah, I would like to discuss whether to toggle FRED controls.

> 
> > Different CPUs may implement it in different ways, which we can't assume.
> 
> Implement what in a different way?  The VMCS fields and FRED are architectural.
> The internal layout of the VMCS is uarch specific, but the encodings and semantics
> absolutely cannot change without breaking software.  And if Intel does something
> asinine like make a control active-low then we have far, far bigger problems.

I should have made it clear that I wasn't talking at the ISA level.  And
of course CPU uarch implementations should be transparent to software.

I mean a CPU uarch could choose to check the activation bit in the VM exit
controls first and then decide whether to load the 2nd VM exit controls.
While if resources allow, a CPU uarch could always load the 2nd VM exit
controls.

BTW, I believe the active-low controls are really gone with new features.
All new controls are all 0s by default.

> > Other features needing it should set it separately, say with a refcount.
> 
> Absolutely not.  Unless Intel screwed up the implementation, the cost of leaving
> VM_EXIT_ACTIVATE_SECONDARY_CONTROLS set when it's supported shouldn't
> even be measurable.

I do hope so.  However, I don't know whether this is guaranteed or not on
all uarch implementations.

A decision to leave it set is good enough for now.

Thanks!
    Xin
  
Sean Christopherson Nov. 15, 2023, 9:47 p.m. UTC | #5
On Tue, Nov 14, 2023, Xin3 Li wrote:
> > Implement what in a different way?  The VMCS fields and FRED are architectural.
> > The internal layout of the VMCS is uarch specific, but the encodings and semantics
> > absolutely cannot change without breaking software.  And if Intel does something
> > asinine like make a control active-low then we have far, far bigger problems.
> 
> I should have made it clear that I wasn't talking at the ISA level.  And
> of course CPU uarch implementations should be transparent to software.
> 
> I mean a CPU uarch could choose to check the activation bit in the VM exit
> controls first and then decide whether to load the 2nd VM exit controls.
> While if resources allow, a CPU uarch could always load the 2nd VM exit
> controls.

And why does that matter?  Loading a field speculatively/out-of-order is fine,
consuming it when it architecturally is supposed to be ignored is not.
  

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9186f41974ab..5d4786812664 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4423,6 +4423,9 @@  static u32 vmx_vmentry_ctrl(void)
 	if (cpu_has_perf_global_ctrl_bug())
 		vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
 
+	/* Whether to load guest FRED MSRs is deferred until after set CPUID */
+	vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_FRED;
+
 	return vmentry_ctrl;
 }
 
@@ -4458,7 +4461,13 @@  static u32 vmx_vmexit_ctrl(void)
 
 static u64 vmx_secondary_vmexit_ctrl(void)
 {
-	return vmcs_config.secondary_vmexit_ctrl;
+	u64 secondary_vmexit_ctrl = vmcs_config.secondary_vmexit_ctrl;
+
+	/* Whether to save/load FRED MSRs is deferred until after set CPUID */
+	secondary_vmexit_ctrl &= ~(SECONDARY_VM_EXIT_SAVE_IA32_FRED |
+				   SECONDARY_VM_EXIT_LOAD_IA32_FRED);
+
+	return secondary_vmexit_ctrl;
 }
 
 static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
@@ -7785,10 +7794,33 @@  static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
 		vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
 }
 
+static void vmx_vcpu_config_fred_after_set_cpuid(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (!cpu_feature_enabled(X86_FEATURE_FRED) ||
+	    !guest_cpuid_has(vcpu, X86_FEATURE_FRED))
+		return;
+
+	/* Enable loading guest FRED MSRs from VMCS */
+	vm_entry_controls_setbit(vmx, VM_ENTRY_LOAD_IA32_FRED);
+
+	/*
+	 * Enable saving guest FRED MSRs into VMCS and loading host FRED MSRs
+	 * from VMCS.
+	 */
+	vm_exit_controls_setbit(vmx, VM_EXIT_ACTIVATE_SECONDARY_CONTROLS);
+	secondary_vm_exit_controls_setbit(vmx,
+					  SECONDARY_VM_EXIT_SAVE_IA32_FRED |
+					  SECONDARY_VM_EXIT_LOAD_IA32_FRED);
+}
+
 static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	vmx_vcpu_config_fred_after_set_cpuid(vcpu);
+
 	/*
 	 * XSAVES is effectively enabled if and only if XSAVE is also exposed
 	 * to the guest.  XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be