[v3,03/18] x86/reboot: KVM: Handle VMXOFF in KVM's reboot callback
Commit Message
Use KVM VMX's reboot/crash callback to do VMXOFF in an emergency instead
of manually and blindly doing VMXOFF. There's no need to attempt VMXOFF
if a hypervisor, i.e. KVM, isn't loaded/active, i.e. if the CPU can't
possibly be post-VMXON.
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/include/asm/virtext.h | 10 ----------
arch/x86/kernel/reboot.c | 29 +++++++++--------------------
arch/x86/kvm/vmx/vmx.c | 8 +++++---
3 files changed, 14 insertions(+), 33 deletions(-)
Comments
On Fri, 2023-05-12 at 16:50 -0700, Sean Christopherson wrote:
> Use KVM VMX's reboot/crash callback to do VMXOFF in an emergency instead
> of manually and blindly doing VMXOFF. There's no need to attempt VMXOFF
> if a hypervisor, i.e. KVM, isn't loaded/active, i.e. if the CPU can't
> possibly be post-VMXON.
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
> arch/x86/include/asm/virtext.h | 10 ----------
> arch/x86/kernel/reboot.c | 29 +++++++++--------------------
> arch/x86/kvm/vmx/vmx.c | 8 +++++---
> 3 files changed, 14 insertions(+), 33 deletions(-)
>
> diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
> index 3b12e6b99412..5bc29fab15da 100644
> --- a/arch/x86/include/asm/virtext.h
> +++ b/arch/x86/include/asm/virtext.h
> @@ -70,16 +70,6 @@ static inline void __cpu_emergency_vmxoff(void)
> cpu_vmxoff();
> }
>
> -/** Disable VMX if it is supported and enabled on the current CPU
> - */
> -static inline void cpu_emergency_vmxoff(void)
> -{
> - if (cpu_has_vmx())
> - __cpu_emergency_vmxoff();
> -}
> -
> -
> -
>
> /*
> * SVM functions:
> diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
> index 739e09527dbb..0cf2261c2dec 100644
> --- a/arch/x86/kernel/reboot.c
> +++ b/arch/x86/kernel/reboot.c
> @@ -787,13 +787,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
> }
> #endif
>
> -/*
> - * This is used to VMCLEAR all VMCSs loaded on the
> - * processor. And when loading kvm_intel module, the
> - * callback function pointer will be assigned.
> - *
> - * protected by rcu.
> - */
> +/* RCU-protected callback to disable virtualization prior to reboot. */
> static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
>
> void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback)
> @@ -815,17 +809,6 @@ void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback)
> }
> EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback);
>
> -static inline void cpu_crash_vmclear_loaded_vmcss(void)
> -{
> - cpu_emergency_virt_cb *callback;
> -
> - rcu_read_lock();
> - callback = rcu_dereference(cpu_emergency_virt_callback);
> - if (callback)
> - callback();
> - rcu_read_unlock();
> -}
> -
> /* This is the CPU performing the emergency shutdown work. */
> int crashing_cpu = -1;
>
> @@ -836,9 +819,15 @@ int crashing_cpu = -1;
> */
> void cpu_emergency_disable_virtualization(void)
> {
> - cpu_crash_vmclear_loaded_vmcss();
> + cpu_emergency_virt_cb *callback;
>
> - cpu_emergency_vmxoff();
> + rcu_read_lock();
> + callback = rcu_dereference(cpu_emergency_virt_callback);
> + if (callback)
> + callback();
> + rcu_read_unlock();
> +
> + /* KVM_AMD doesn't yet utilize the common callback. */
> cpu_emergency_svm_disable();
> }
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index fc9cdb4114cc..76cdb189f1b5 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -744,7 +744,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
> return ret;
> }
>
> -static void crash_vmclear_local_loaded_vmcss(void)
> +static void vmx_emergency_disable(void)
> {
> int cpu = raw_smp_processor_id();
> struct loaded_vmcs *v;
> @@ -752,6 +752,8 @@ static void crash_vmclear_local_loaded_vmcss(void)
> list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
> loaded_vmcss_on_cpu_link)
> vmcs_clear(v->vmcs);
> +
> + __cpu_emergency_vmxoff();
__cpu_emergency_vmxoff() internally checks whether VMX is enabled in CR4.
Logically, looks it's more reasonable to do such check before VMCLEAR active
VMCSes, although in practice there should be no problem I think.
But this problem inherits from the existing code in upstream, so not sure
whether it is worth fixing.
> }
>
> static void __loaded_vmcs_clear(void *arg)
> @@ -8547,7 +8549,7 @@ static void __vmx_exit(void)
> {
> allow_smaller_maxphyaddr = false;
>
> - cpu_emergency_unregister_virt_callback(crash_vmclear_local_loaded_vmcss);
> + cpu_emergency_unregister_virt_callback(vmx_emergency_disable);
>
> vmx_cleanup_l1d_flush();
> }
> @@ -8597,7 +8599,7 @@ static int __init vmx_init(void)
> pi_init_cpu(cpu);
> }
>
> - cpu_emergency_register_virt_callback(crash_vmclear_local_loaded_vmcss);
> + cpu_emergency_register_virt_callback(vmx_emergency_disable);
>
> vmx_check_vmcs12_offsets();
>
> --
> 2.40.1.606.ga4b1b128d6-goog
>
On Mon, May 22, 2023, Kai Huang wrote:
> On Fri, 2023-05-12 at 16:50 -0700, Sean Christopherson wrote:
> > Use KVM VMX's reboot/crash callback to do VMXOFF in an emergency instead
> > of manually and blindly doing VMXOFF. There's no need to attempt VMXOFF
> > if a hypervisor, i.e. KVM, isn't loaded/active, i.e. if the CPU can't
> > possibly be post-VMXON.
> >
> > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > ---
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index fc9cdb4114cc..76cdb189f1b5 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -744,7 +744,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
> > return ret;
> > }
> >
> > -static void crash_vmclear_local_loaded_vmcss(void)
> > +static void vmx_emergency_disable(void)
> > {
> > int cpu = raw_smp_processor_id();
> > struct loaded_vmcs *v;
> > @@ -752,6 +752,8 @@ static void crash_vmclear_local_loaded_vmcss(void)
> > list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
> > loaded_vmcss_on_cpu_link)
> > vmcs_clear(v->vmcs);
> > +
> > + __cpu_emergency_vmxoff();
>
> __cpu_emergency_vmxoff() internally checks whether VMX is enabled in CR4.
> Logically, looks it's more reasonable to do such check before VMCLEAR active
> VMCSes, although in practice there should be no problem I think.
>
> But this problem inherits from the existing code in upstream, so not sure
> whether it is worth fixing.
Hmm, I think it's worth fixing, if only to avoid confusing future readers. Blindly
doing VMCLEAR but then conditionally executing VMXOFF is nonsensical. I'll tack on
a patch, and also add a comment to call out that CR4.VMXE can be _cleared_
asynchronously by NMI, but can't be set after being checked. I.e. explain that
checking CR4.VMXE is a "best effort" sort of thing.
On Mon, 2023-05-22 at 10:58 -0700, Sean Christopherson wrote:
> On Mon, May 22, 2023, Kai Huang wrote:
> > On Fri, 2023-05-12 at 16:50 -0700, Sean Christopherson wrote:
> > > Use KVM VMX's reboot/crash callback to do VMXOFF in an emergency instead
> > > of manually and blindly doing VMXOFF. There's no need to attempt VMXOFF
> > > if a hypervisor, i.e. KVM, isn't loaded/active, i.e. if the CPU can't
> > > possibly be post-VMXON.
> > >
> > > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > > ---
> > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > > index fc9cdb4114cc..76cdb189f1b5 100644
> > > --- a/arch/x86/kvm/vmx/vmx.c
> > > +++ b/arch/x86/kvm/vmx/vmx.c
> > > @@ -744,7 +744,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
> > > return ret;
> > > }
> > >
> > > -static void crash_vmclear_local_loaded_vmcss(void)
> > > +static void vmx_emergency_disable(void)
> > > {
> > > int cpu = raw_smp_processor_id();
> > > struct loaded_vmcs *v;
> > > @@ -752,6 +752,8 @@ static void crash_vmclear_local_loaded_vmcss(void)
> > > list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
> > > loaded_vmcss_on_cpu_link)
> > > vmcs_clear(v->vmcs);
> > > +
> > > + __cpu_emergency_vmxoff();
> >
> > __cpu_emergency_vmxoff() internally checks whether VMX is enabled in CR4.
> > Logically, looks it's more reasonable to do such check before VMCLEAR active
> > VMCSes, although in practice there should be no problem I think.
> >
> > But this problem inherits from the existing code in upstream, so not sure
> > whether it is worth fixing.
>
> Hmm, I think it's worth fixing, if only to avoid confusing future readers. Blindly
> doing VMCLEAR but then conditionally executing VMXOFF is nonsensical. I'll tack on
> a patch, and also add a comment to call out that CR4.VMXE can be _cleared_
> asynchronously by NMI, but can't be set after being checked. I.e. explain that
> checking CR4.VMXE is a "best effort" sort of thing.
Yeah looks good.
@@ -70,16 +70,6 @@ static inline void __cpu_emergency_vmxoff(void)
cpu_vmxoff();
}
-/** Disable VMX if it is supported and enabled on the current CPU
- */
-static inline void cpu_emergency_vmxoff(void)
-{
- if (cpu_has_vmx())
- __cpu_emergency_vmxoff();
-}
-
-
-
/*
* SVM functions:
@@ -787,13 +787,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
}
#endif
-/*
- * This is used to VMCLEAR all VMCSs loaded on the
- * processor. And when loading kvm_intel module, the
- * callback function pointer will be assigned.
- *
- * protected by rcu.
- */
+/* RCU-protected callback to disable virtualization prior to reboot. */
static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback)
@@ -815,17 +809,6 @@ void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback)
}
EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback);
-static inline void cpu_crash_vmclear_loaded_vmcss(void)
-{
- cpu_emergency_virt_cb *callback;
-
- rcu_read_lock();
- callback = rcu_dereference(cpu_emergency_virt_callback);
- if (callback)
- callback();
- rcu_read_unlock();
-}
-
/* This is the CPU performing the emergency shutdown work. */
int crashing_cpu = -1;
@@ -836,9 +819,15 @@ int crashing_cpu = -1;
*/
void cpu_emergency_disable_virtualization(void)
{
- cpu_crash_vmclear_loaded_vmcss();
+ cpu_emergency_virt_cb *callback;
- cpu_emergency_vmxoff();
+ rcu_read_lock();
+ callback = rcu_dereference(cpu_emergency_virt_callback);
+ if (callback)
+ callback();
+ rcu_read_unlock();
+
+ /* KVM_AMD doesn't yet utilize the common callback. */
cpu_emergency_svm_disable();
}
@@ -744,7 +744,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
return ret;
}
-static void crash_vmclear_local_loaded_vmcss(void)
+static void vmx_emergency_disable(void)
{
int cpu = raw_smp_processor_id();
struct loaded_vmcs *v;
@@ -752,6 +752,8 @@ static void crash_vmclear_local_loaded_vmcss(void)
list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
loaded_vmcss_on_cpu_link)
vmcs_clear(v->vmcs);
+
+ __cpu_emergency_vmxoff();
}
static void __loaded_vmcs_clear(void *arg)
@@ -8547,7 +8549,7 @@ static void __vmx_exit(void)
{
allow_smaller_maxphyaddr = false;
- cpu_emergency_unregister_virt_callback(crash_vmclear_local_loaded_vmcss);
+ cpu_emergency_unregister_virt_callback(vmx_emergency_disable);
vmx_cleanup_l1d_flush();
}
@@ -8597,7 +8599,7 @@ static int __init vmx_init(void)
pi_init_cpu(cpu);
}
- cpu_emergency_register_virt_callback(crash_vmclear_local_loaded_vmcss);
+ cpu_emergency_register_virt_callback(vmx_emergency_disable);
vmx_check_vmcs12_offsets();