[v3,03/18] x86/reboot: KVM: Handle VMXOFF in KVM's reboot callback

Message ID 20230512235026.808058-4-seanjc@google.com
State New
Headers
Series x86/reboot: KVM: Clean up "emergency" virt code |

Commit Message

Sean Christopherson May 12, 2023, 11:50 p.m. UTC
  Use KVM VMX's reboot/crash callback to do VMXOFF in an emergency instead
of manually and blindly doing VMXOFF.  There's no need to attempt VMXOFF
if a hypervisor, i.e. KVM, isn't loaded/active, i.e. if the CPU can't
possibly be post-VMXON.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/include/asm/virtext.h | 10 ----------
 arch/x86/kernel/reboot.c       | 29 +++++++++--------------------
 arch/x86/kvm/vmx/vmx.c         |  8 +++++---
 3 files changed, 14 insertions(+), 33 deletions(-)
  

Comments

Kai Huang May 22, 2023, 12:55 p.m. UTC | #1
On Fri, 2023-05-12 at 16:50 -0700, Sean Christopherson wrote:
> Use KVM VMX's reboot/crash callback to do VMXOFF in an emergency instead
> of manually and blindly doing VMXOFF.  There's no need to attempt VMXOFF
> if a hypervisor, i.e. KVM, isn't loaded/active, i.e. if the CPU can't
> possibly be post-VMXON.
> 
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/include/asm/virtext.h | 10 ----------
>  arch/x86/kernel/reboot.c       | 29 +++++++++--------------------
>  arch/x86/kvm/vmx/vmx.c         |  8 +++++---
>  3 files changed, 14 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
> index 3b12e6b99412..5bc29fab15da 100644
> --- a/arch/x86/include/asm/virtext.h
> +++ b/arch/x86/include/asm/virtext.h
> @@ -70,16 +70,6 @@ static inline void __cpu_emergency_vmxoff(void)
>  		cpu_vmxoff();
>  }
>  
> -/** Disable VMX if it is supported and enabled on the current CPU
> - */
> -static inline void cpu_emergency_vmxoff(void)
> -{
> -	if (cpu_has_vmx())
> -		__cpu_emergency_vmxoff();
> -}
> -
> -
> -
>  
>  /*
>   * SVM functions:
> diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
> index 739e09527dbb..0cf2261c2dec 100644
> --- a/arch/x86/kernel/reboot.c
> +++ b/arch/x86/kernel/reboot.c
> @@ -787,13 +787,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
>  }
>  #endif
>  
> -/*
> - * This is used to VMCLEAR all VMCSs loaded on the
> - * processor. And when loading kvm_intel module, the
> - * callback function pointer will be assigned.
> - *
> - * protected by rcu.
> - */
> +/* RCU-protected callback to disable virtualization prior to reboot. */
>  static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
>  
>  void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback)
> @@ -815,17 +809,6 @@ void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback)
>  }
>  EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback);
>  
> -static inline void cpu_crash_vmclear_loaded_vmcss(void)
> -{
> -	cpu_emergency_virt_cb *callback;
> -
> -	rcu_read_lock();
> -	callback = rcu_dereference(cpu_emergency_virt_callback);
> -	if (callback)
> -		callback();
> -	rcu_read_unlock();
> -}
> -
>  /* This is the CPU performing the emergency shutdown work. */
>  int crashing_cpu = -1;
>  
> @@ -836,9 +819,15 @@ int crashing_cpu = -1;
>   */
>  void cpu_emergency_disable_virtualization(void)
>  {
> -	cpu_crash_vmclear_loaded_vmcss();
> +	cpu_emergency_virt_cb *callback;
>  
> -	cpu_emergency_vmxoff();
> +	rcu_read_lock();
> +	callback = rcu_dereference(cpu_emergency_virt_callback);
> +	if (callback)
> +		callback();
> +	rcu_read_unlock();
> +
> +	/* KVM_AMD doesn't yet utilize the common callback. */
>  	cpu_emergency_svm_disable();
>  }
>  
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index fc9cdb4114cc..76cdb189f1b5 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -744,7 +744,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
>  	return ret;
>  }
>  
> -static void crash_vmclear_local_loaded_vmcss(void)
> +static void vmx_emergency_disable(void)
>  {
>  	int cpu = raw_smp_processor_id();
>  	struct loaded_vmcs *v;
> @@ -752,6 +752,8 @@ static void crash_vmclear_local_loaded_vmcss(void)
>  	list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
>  			    loaded_vmcss_on_cpu_link)
>  		vmcs_clear(v->vmcs);
> +
> +	__cpu_emergency_vmxoff();

__cpu_emergency_vmxoff() internally checks whether VMX is enabled in CR4.  
Logically, looks it's more reasonable to do such check before VMCLEAR active
VMCSes, although in practice there should be no problem I think.

But this problem inherits from the existing code in  upstream, so not sure
whether it is worth fixing.

>  }
>  
>  static void __loaded_vmcs_clear(void *arg)
> @@ -8547,7 +8549,7 @@ static void __vmx_exit(void)
>  {
>  	allow_smaller_maxphyaddr = false;
>  
> -	cpu_emergency_unregister_virt_callback(crash_vmclear_local_loaded_vmcss);
> +	cpu_emergency_unregister_virt_callback(vmx_emergency_disable);
>  
>  	vmx_cleanup_l1d_flush();
>  }
> @@ -8597,7 +8599,7 @@ static int __init vmx_init(void)
>  		pi_init_cpu(cpu);
>  	}
>  
> -	cpu_emergency_register_virt_callback(crash_vmclear_local_loaded_vmcss);
> +	cpu_emergency_register_virt_callback(vmx_emergency_disable);
>  
>  	vmx_check_vmcs12_offsets();
>  
> -- 
> 2.40.1.606.ga4b1b128d6-goog
>
  
Sean Christopherson May 22, 2023, 5:58 p.m. UTC | #2
On Mon, May 22, 2023, Kai Huang wrote:
> On Fri, 2023-05-12 at 16:50 -0700, Sean Christopherson wrote:
> > Use KVM VMX's reboot/crash callback to do VMXOFF in an emergency instead
> > of manually and blindly doing VMXOFF.  There's no need to attempt VMXOFF
> > if a hypervisor, i.e. KVM, isn't loaded/active, i.e. if the CPU can't
> > possibly be post-VMXON.
> > 
> > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > ---
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index fc9cdb4114cc..76cdb189f1b5 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -744,7 +744,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
> >  	return ret;
> >  }
> >  
> > -static void crash_vmclear_local_loaded_vmcss(void)
> > +static void vmx_emergency_disable(void)
> >  {
> >  	int cpu = raw_smp_processor_id();
> >  	struct loaded_vmcs *v;
> > @@ -752,6 +752,8 @@ static void crash_vmclear_local_loaded_vmcss(void)
> >  	list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
> >  			    loaded_vmcss_on_cpu_link)
> >  		vmcs_clear(v->vmcs);
> > +
> > +	__cpu_emergency_vmxoff();
> 
> __cpu_emergency_vmxoff() internally checks whether VMX is enabled in CR4.  
> Logically, looks it's more reasonable to do such check before VMCLEAR active
> VMCSes, although in practice there should be no problem I think.
> 
> But this problem inherits from the existing code in  upstream, so not sure
> whether it is worth fixing.

Hmm, I think it's worth fixing, if only to avoid confusing future readers.  Blindly
doing VMCLEAR but then conditionally executing VMXOFF is nonsensical.  I'll tack on
a patch, and also add a comment to call out that CR4.VMXE can be _cleared_
asynchronously by NMI, but can't be set after being checked.  I.e. explain that
checking CR4.VMXE is a "best effort" sort of thing.
  
Kai Huang May 22, 2023, 11:11 p.m. UTC | #3
On Mon, 2023-05-22 at 10:58 -0700, Sean Christopherson wrote:
> On Mon, May 22, 2023, Kai Huang wrote:
> > On Fri, 2023-05-12 at 16:50 -0700, Sean Christopherson wrote:
> > > Use KVM VMX's reboot/crash callback to do VMXOFF in an emergency instead
> > > of manually and blindly doing VMXOFF.  There's no need to attempt VMXOFF
> > > if a hypervisor, i.e. KVM, isn't loaded/active, i.e. if the CPU can't
> > > possibly be post-VMXON.
> > > 
> > > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > > ---
> > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > > index fc9cdb4114cc..76cdb189f1b5 100644
> > > --- a/arch/x86/kvm/vmx/vmx.c
> > > +++ b/arch/x86/kvm/vmx/vmx.c
> > > @@ -744,7 +744,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
> > >  	return ret;
> > >  }
> > >  
> > > -static void crash_vmclear_local_loaded_vmcss(void)
> > > +static void vmx_emergency_disable(void)
> > >  {
> > >  	int cpu = raw_smp_processor_id();
> > >  	struct loaded_vmcs *v;
> > > @@ -752,6 +752,8 @@ static void crash_vmclear_local_loaded_vmcss(void)
> > >  	list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
> > >  			    loaded_vmcss_on_cpu_link)
> > >  		vmcs_clear(v->vmcs);
> > > +
> > > +	__cpu_emergency_vmxoff();
> > 
> > __cpu_emergency_vmxoff() internally checks whether VMX is enabled in CR4.  
> > Logically, looks it's more reasonable to do such check before VMCLEAR active
> > VMCSes, although in practice there should be no problem I think.
> > 
> > But this problem inherits from the existing code in  upstream, so not sure
> > whether it is worth fixing.
> 
> Hmm, I think it's worth fixing, if only to avoid confusing future readers.  Blindly
> doing VMCLEAR but then conditionally executing VMXOFF is nonsensical.  I'll tack on
> a patch, and also add a comment to call out that CR4.VMXE can be _cleared_
> asynchronously by NMI, but can't be set after being checked.  I.e. explain that
> checking CR4.VMXE is a "best effort" sort of thing.

Yeah looks good.
  

Patch

diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 3b12e6b99412..5bc29fab15da 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -70,16 +70,6 @@  static inline void __cpu_emergency_vmxoff(void)
 		cpu_vmxoff();
 }
 
-/** Disable VMX if it is supported and enabled on the current CPU
- */
-static inline void cpu_emergency_vmxoff(void)
-{
-	if (cpu_has_vmx())
-		__cpu_emergency_vmxoff();
-}
-
-
-
 
 /*
  * SVM functions:
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 739e09527dbb..0cf2261c2dec 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -787,13 +787,7 @@  void machine_crash_shutdown(struct pt_regs *regs)
 }
 #endif
 
-/*
- * This is used to VMCLEAR all VMCSs loaded on the
- * processor. And when loading kvm_intel module, the
- * callback function pointer will be assigned.
- *
- * protected by rcu.
- */
+/* RCU-protected callback to disable virtualization prior to reboot. */
 static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
 
 void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback)
@@ -815,17 +809,6 @@  void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback)
 }
 EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback);
 
-static inline void cpu_crash_vmclear_loaded_vmcss(void)
-{
-	cpu_emergency_virt_cb *callback;
-
-	rcu_read_lock();
-	callback = rcu_dereference(cpu_emergency_virt_callback);
-	if (callback)
-		callback();
-	rcu_read_unlock();
-}
-
 /* This is the CPU performing the emergency shutdown work. */
 int crashing_cpu = -1;
 
@@ -836,9 +819,15 @@  int crashing_cpu = -1;
  */
 void cpu_emergency_disable_virtualization(void)
 {
-	cpu_crash_vmclear_loaded_vmcss();
+	cpu_emergency_virt_cb *callback;
 
-	cpu_emergency_vmxoff();
+	rcu_read_lock();
+	callback = rcu_dereference(cpu_emergency_virt_callback);
+	if (callback)
+		callback();
+	rcu_read_unlock();
+
+	/* KVM_AMD doesn't yet utilize the common callback. */
 	cpu_emergency_svm_disable();
 }
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index fc9cdb4114cc..76cdb189f1b5 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -744,7 +744,7 @@  static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
 	return ret;
 }
 
-static void crash_vmclear_local_loaded_vmcss(void)
+static void vmx_emergency_disable(void)
 {
 	int cpu = raw_smp_processor_id();
 	struct loaded_vmcs *v;
@@ -752,6 +752,8 @@  static void crash_vmclear_local_loaded_vmcss(void)
 	list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
 			    loaded_vmcss_on_cpu_link)
 		vmcs_clear(v->vmcs);
+
+	__cpu_emergency_vmxoff();
 }
 
 static void __loaded_vmcs_clear(void *arg)
@@ -8547,7 +8549,7 @@  static void __vmx_exit(void)
 {
 	allow_smaller_maxphyaddr = false;
 
-	cpu_emergency_unregister_virt_callback(crash_vmclear_local_loaded_vmcss);
+	cpu_emergency_unregister_virt_callback(vmx_emergency_disable);
 
 	vmx_cleanup_l1d_flush();
 }
@@ -8597,7 +8599,7 @@  static int __init vmx_init(void)
 		pi_init_cpu(cpu);
 	}
 
-	cpu_emergency_register_virt_callback(crash_vmclear_local_loaded_vmcss);
+	cpu_emergency_register_virt_callback(vmx_emergency_disable);
 
 	vmx_check_vmcs12_offsets();