[v1,08/23] KVM: VMX: Initialize VMCS FRED fields

Message ID 20231108183003.5981-9-xin3.li@intel.com
State New
Headers
Series Enable FRED with KVM VMX |

Commit Message

Li, Xin3 Nov. 8, 2023, 6:29 p.m. UTC
  Initialize host VMCS FRED fields with host FRED MSRs' value and
guest VMCS FRED fields to 0.

FRED CPU states are managed in 9 new FRED MSRs, as well as a few
existing CPU registers and MSRs, e.g., CR4.FRED.  To support FRED
context management, new VMCS fields corresponding to most of FRED
CPU state MSRs are added to both the host-state and guest-state
areas of VMCS.

Specifically no VMCS fields are added for FRED RSP0 and SSP0 MSRs,
because the 2 FRED MSRs are used during ring 3 event delivery only,
thus KVM, running on ring 0, can run safely even with guest FRED
RSP0 and SSP0.  It can be deferred to load host FRED RSP0 and SSP0
until before returning to user level.

Tested-by: Shan Kang <shan.kang@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
---
 arch/x86/include/asm/vmx.h | 16 ++++++++++++++++
 arch/x86/kvm/vmx/vmx.c     | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)
  

Comments

Chao Gao Nov. 13, 2023, 3:04 a.m. UTC | #1
On Wed, Nov 08, 2023 at 10:29:48AM -0800, Xin Li wrote:
>Initialize host VMCS FRED fields with host FRED MSRs' value and
>guest VMCS FRED fields to 0.
>
>FRED CPU states are managed in 9 new FRED MSRs, as well as a few
>existing CPU registers and MSRs, e.g., CR4.FRED.  To support FRED
>context management, new VMCS fields corresponding to most of FRED
>CPU state MSRs are added to both the host-state and guest-state
>areas of VMCS.
>
>Specifically no VMCS fields are added for FRED RSP0 and SSP0 MSRs,
>because the 2 FRED MSRs are used during ring 3 event delivery only,
>thus KVM, running on ring 0, can run safely even with guest FRED
>RSP0 and SSP0.  It can be deferred to load host FRED RSP0 and SSP0
>until before returning to user level.
>
>Tested-by: Shan Kang <shan.kang@intel.com>
>Signed-off-by: Xin Li <xin3.li@intel.com>
>---
> arch/x86/include/asm/vmx.h | 16 ++++++++++++++++
> arch/x86/kvm/vmx/vmx.c     | 32 ++++++++++++++++++++++++++++++++
> 2 files changed, 48 insertions(+)
>
>diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>index 41796a733bc9..d54a1a1057b0 100644
>--- a/arch/x86/include/asm/vmx.h
>+++ b/arch/x86/include/asm/vmx.h
>@@ -277,12 +277,28 @@ enum vmcs_field {
> 	GUEST_BNDCFGS_HIGH              = 0x00002813,
> 	GUEST_IA32_RTIT_CTL		= 0x00002814,
> 	GUEST_IA32_RTIT_CTL_HIGH	= 0x00002815,
>+	GUEST_IA32_FRED_CONFIG		= 0x0000281a,
>+	GUEST_IA32_FRED_RSP1		= 0x0000281c,
>+	GUEST_IA32_FRED_RSP2		= 0x0000281e,
>+	GUEST_IA32_FRED_RSP3		= 0x00002820,
>+	GUEST_IA32_FRED_STKLVLS		= 0x00002822,
>+	GUEST_IA32_FRED_SSP1		= 0x00002824,
>+	GUEST_IA32_FRED_SSP2		= 0x00002826,
>+	GUEST_IA32_FRED_SSP3		= 0x00002828,
> 	HOST_IA32_PAT			= 0x00002c00,
> 	HOST_IA32_PAT_HIGH		= 0x00002c01,
> 	HOST_IA32_EFER			= 0x00002c02,
> 	HOST_IA32_EFER_HIGH		= 0x00002c03,
> 	HOST_IA32_PERF_GLOBAL_CTRL	= 0x00002c04,
> 	HOST_IA32_PERF_GLOBAL_CTRL_HIGH	= 0x00002c05,
>+	HOST_IA32_FRED_CONFIG		= 0x00002c08,
>+	HOST_IA32_FRED_RSP1		= 0x00002c0a,
>+	HOST_IA32_FRED_RSP2		= 0x00002c0c,
>+	HOST_IA32_FRED_RSP3		= 0x00002c0e,
>+	HOST_IA32_FRED_STKLVLS		= 0x00002c10,
>+	HOST_IA32_FRED_SSP1		= 0x00002c12,
>+	HOST_IA32_FRED_SSP2		= 0x00002c14,
>+	HOST_IA32_FRED_SSP3		= 0x00002c16,
> 	PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
> 	CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
> 	EXCEPTION_BITMAP                = 0x00004004,
>diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>index 327e052d90c1..41772ecdd368 100644
>--- a/arch/x86/kvm/vmx/vmx.c
>+++ b/arch/x86/kvm/vmx/vmx.c
>@@ -1477,6 +1477,18 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
> 				    (unsigned long)(cpu_entry_stack(cpu) + 1));
> 		}
> 
>+#ifdef CONFIG_X86_64
>+		/* Per-CPU FRED MSRs */
>+		if (cpu_feature_enabled(X86_FEATURE_FRED)) {

how about kvm_cpu_cap_has()? to decouple KVM's capability to virtualize a feature
and host's enabling a feature.

>+			vmcs_write64(HOST_IA32_FRED_RSP1, read_msr(MSR_IA32_FRED_RSP1));
>+			vmcs_write64(HOST_IA32_FRED_RSP2, read_msr(MSR_IA32_FRED_RSP2));
>+			vmcs_write64(HOST_IA32_FRED_RSP3, read_msr(MSR_IA32_FRED_RSP3));
>+			vmcs_write64(HOST_IA32_FRED_SSP1, read_msr(MSR_IA32_FRED_SSP1));
>+			vmcs_write64(HOST_IA32_FRED_SSP2, read_msr(MSR_IA32_FRED_SSP2));
>+			vmcs_write64(HOST_IA32_FRED_SSP3, read_msr(MSR_IA32_FRED_SSP3));
>+		}
>+#endif

why is this hunk enclosed in #ifdef CONFIG_X86_64 while the one below isn't?

>+
> 		vmx->loaded_vmcs->cpu = cpu;
> 	}
> }
>@@ -4375,6 +4387,15 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
> 
> 	if (cpu_has_load_ia32_efer())
> 		vmcs_write64(HOST_IA32_EFER, host_efer);
>+
>+	/*
>+	 * FRED MSRs are per-cpu, however FRED CONFIG and STKLVLS MSRs
>+	 * are the same on all CPUs, thus they are initialized here.
>+	 */
>+	if (cpu_feature_enabled(X86_FEATURE_FRED)) {
>+		vmcs_write64(HOST_IA32_FRED_CONFIG, read_msr(MSR_IA32_FRED_CONFIG));
>+		vmcs_write64(HOST_IA32_FRED_STKLVLS, read_msr(MSR_IA32_FRED_STKLVLS));
>+	}
> }
> 
> void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
>@@ -4936,6 +4957,17 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> 	vmcs_writel(GUEST_IDTR_BASE, 0);
> 	vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
> 
>+	if (cpu_feature_enabled(X86_FEATURE_FRED)) {
>+		vmcs_write64(GUEST_IA32_FRED_CONFIG, 0);
>+		vmcs_write64(GUEST_IA32_FRED_RSP1, 0);
>+		vmcs_write64(GUEST_IA32_FRED_RSP2, 0);
>+		vmcs_write64(GUEST_IA32_FRED_RSP3, 0);
>+		vmcs_write64(GUEST_IA32_FRED_STKLVLS, 0);
>+		vmcs_write64(GUEST_IA32_FRED_SSP1, 0);
>+		vmcs_write64(GUEST_IA32_FRED_SSP2, 0);
>+		vmcs_write64(GUEST_IA32_FRED_SSP3, 0);
>+	}
>+

move this hunk to __vmx_vcpu_reset() because FRED spec says

"INIT does not change the value of the new MSRs."

> 	vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
> 	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
> 	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
>-- 
>2.42.0
>
>
  
Li, Xin3 Nov. 14, 2023, 6:02 a.m. UTC | #2
> >@@ -1477,6 +1477,18 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int
> cpu,
> > 				    (unsigned long)(cpu_entry_stack(cpu) + 1));
> > 		}
> >
> >+#ifdef CONFIG_X86_64
> >+		/* Per-CPU FRED MSRs */
> >+		if (cpu_feature_enabled(X86_FEATURE_FRED)) {
> 
> how about kvm_cpu_cap_has()? to decouple KVM's capability to virtualize a feature
> and host's enabling a feature.

Very likely I guess.

> >+			vmcs_write64(HOST_IA32_FRED_RSP1,
> read_msr(MSR_IA32_FRED_RSP1));
> >+			vmcs_write64(HOST_IA32_FRED_RSP2,
> read_msr(MSR_IA32_FRED_RSP2));
> >+			vmcs_write64(HOST_IA32_FRED_RSP3,
> read_msr(MSR_IA32_FRED_RSP3));
> >+			vmcs_write64(HOST_IA32_FRED_SSP1,
> read_msr(MSR_IA32_FRED_SSP1));
> >+			vmcs_write64(HOST_IA32_FRED_SSP2,
> read_msr(MSR_IA32_FRED_SSP2));
> >+			vmcs_write64(HOST_IA32_FRED_SSP3,
> read_msr(MSR_IA32_FRED_SSP3));
> >+		}
> >+#endif
> 
> why is this hunk enclosed in #ifdef CONFIG_X86_64 while the one below isn't?

As if the compiler doesn't complain, I should NOT add it.

> 
> >+	if (cpu_feature_enabled(X86_FEATURE_FRED)) {
> >+		vmcs_write64(GUEST_IA32_FRED_CONFIG, 0);
> >+		vmcs_write64(GUEST_IA32_FRED_RSP1, 0);
> >+		vmcs_write64(GUEST_IA32_FRED_RSP2, 0);
> >+		vmcs_write64(GUEST_IA32_FRED_RSP3, 0);
> >+		vmcs_write64(GUEST_IA32_FRED_STKLVLS, 0);
> >+		vmcs_write64(GUEST_IA32_FRED_SSP1, 0);
> >+		vmcs_write64(GUEST_IA32_FRED_SSP2, 0);
> >+		vmcs_write64(GUEST_IA32_FRED_SSP3, 0);
> >+	}
> >+
> 
> move this hunk to __vmx_vcpu_reset() because FRED spec says
> 
> "INIT does not change the value of the new MSRs."
> 

Yeah, will do.
  
Li, Xin3 Nov. 14, 2023, 6:51 a.m. UTC | #3
> 
> > >+			vmcs_write64(HOST_IA32_FRED_RSP1,
> > read_msr(MSR_IA32_FRED_RSP1));
> > >+			vmcs_write64(HOST_IA32_FRED_RSP2,
> > read_msr(MSR_IA32_FRED_RSP2));
> > >+			vmcs_write64(HOST_IA32_FRED_RSP3,
> > read_msr(MSR_IA32_FRED_RSP3));
> > >+			vmcs_write64(HOST_IA32_FRED_SSP1,
> > read_msr(MSR_IA32_FRED_SSP1));
> > >+			vmcs_write64(HOST_IA32_FRED_SSP2,
> > read_msr(MSR_IA32_FRED_SSP2));
> > >+			vmcs_write64(HOST_IA32_FRED_SSP3,
> > read_msr(MSR_IA32_FRED_SSP3));
> > >+		}
> > >+#endif
> >
> > why is this hunk enclosed in #ifdef CONFIG_X86_64 while the one below isn't?
> 
> As if the compiler doesn't complain, I should NOT add it.

I think I don't need to add CONFIG_X86_64 for the above, but somehow
this was left over.  Let me double check.
  

Patch

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 41796a733bc9..d54a1a1057b0 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -277,12 +277,28 @@  enum vmcs_field {
 	GUEST_BNDCFGS_HIGH              = 0x00002813,
 	GUEST_IA32_RTIT_CTL		= 0x00002814,
 	GUEST_IA32_RTIT_CTL_HIGH	= 0x00002815,
+	GUEST_IA32_FRED_CONFIG		= 0x0000281a,
+	GUEST_IA32_FRED_RSP1		= 0x0000281c,
+	GUEST_IA32_FRED_RSP2		= 0x0000281e,
+	GUEST_IA32_FRED_RSP3		= 0x00002820,
+	GUEST_IA32_FRED_STKLVLS		= 0x00002822,
+	GUEST_IA32_FRED_SSP1		= 0x00002824,
+	GUEST_IA32_FRED_SSP2		= 0x00002826,
+	GUEST_IA32_FRED_SSP3		= 0x00002828,
 	HOST_IA32_PAT			= 0x00002c00,
 	HOST_IA32_PAT_HIGH		= 0x00002c01,
 	HOST_IA32_EFER			= 0x00002c02,
 	HOST_IA32_EFER_HIGH		= 0x00002c03,
 	HOST_IA32_PERF_GLOBAL_CTRL	= 0x00002c04,
 	HOST_IA32_PERF_GLOBAL_CTRL_HIGH	= 0x00002c05,
+	HOST_IA32_FRED_CONFIG		= 0x00002c08,
+	HOST_IA32_FRED_RSP1		= 0x00002c0a,
+	HOST_IA32_FRED_RSP2		= 0x00002c0c,
+	HOST_IA32_FRED_RSP3		= 0x00002c0e,
+	HOST_IA32_FRED_STKLVLS		= 0x00002c10,
+	HOST_IA32_FRED_SSP1		= 0x00002c12,
+	HOST_IA32_FRED_SSP2		= 0x00002c14,
+	HOST_IA32_FRED_SSP3		= 0x00002c16,
 	PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
 	CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
 	EXCEPTION_BITMAP                = 0x00004004,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 327e052d90c1..41772ecdd368 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1477,6 +1477,18 @@  void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
 				    (unsigned long)(cpu_entry_stack(cpu) + 1));
 		}
 
+#ifdef CONFIG_X86_64
+		/* Per-CPU FRED MSRs */
+		if (cpu_feature_enabled(X86_FEATURE_FRED)) {
+			vmcs_write64(HOST_IA32_FRED_RSP1, read_msr(MSR_IA32_FRED_RSP1));
+			vmcs_write64(HOST_IA32_FRED_RSP2, read_msr(MSR_IA32_FRED_RSP2));
+			vmcs_write64(HOST_IA32_FRED_RSP3, read_msr(MSR_IA32_FRED_RSP3));
+			vmcs_write64(HOST_IA32_FRED_SSP1, read_msr(MSR_IA32_FRED_SSP1));
+			vmcs_write64(HOST_IA32_FRED_SSP2, read_msr(MSR_IA32_FRED_SSP2));
+			vmcs_write64(HOST_IA32_FRED_SSP3, read_msr(MSR_IA32_FRED_SSP3));
+		}
+#endif
+
 		vmx->loaded_vmcs->cpu = cpu;
 	}
 }
@@ -4375,6 +4387,15 @@  void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 
 	if (cpu_has_load_ia32_efer())
 		vmcs_write64(HOST_IA32_EFER, host_efer);
+
+	/*
+	 * FRED MSRs are per-cpu, however FRED CONFIG and STKLVLS MSRs
+	 * are the same on all CPUs, thus they are initialized here.
+	 */
+	if (cpu_feature_enabled(X86_FEATURE_FRED)) {
+		vmcs_write64(HOST_IA32_FRED_CONFIG, read_msr(MSR_IA32_FRED_CONFIG));
+		vmcs_write64(HOST_IA32_FRED_STKLVLS, read_msr(MSR_IA32_FRED_STKLVLS));
+	}
 }
 
 void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
@@ -4936,6 +4957,17 @@  static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vmcs_writel(GUEST_IDTR_BASE, 0);
 	vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
 
+	if (cpu_feature_enabled(X86_FEATURE_FRED)) {
+		vmcs_write64(GUEST_IA32_FRED_CONFIG, 0);
+		vmcs_write64(GUEST_IA32_FRED_RSP1, 0);
+		vmcs_write64(GUEST_IA32_FRED_RSP2, 0);
+		vmcs_write64(GUEST_IA32_FRED_RSP3, 0);
+		vmcs_write64(GUEST_IA32_FRED_STKLVLS, 0);
+		vmcs_write64(GUEST_IA32_FRED_SSP1, 0);
+		vmcs_write64(GUEST_IA32_FRED_SSP2, 0);
+		vmcs_write64(GUEST_IA32_FRED_SSP3, 0);
+	}
+
 	vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
 	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
 	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);