[7/7] KVM: VMX: Handle NMI VM-Exits in noinstr region

Message ID 20221213060912.654668-8-seanjc@google.com
State New
Headers
Series KVM: VMX: Handle NMI VM-Exits in noinstr section |

Commit Message

Sean Christopherson Dec. 13, 2022, 6:09 a.m. UTC
  Move VMX's handling of NMI VM-Exits into vmx_vcpu_enter_exit() so that
the NMI is handled prior to leaving the safety of noinstr.  Handling the
NMI after leaving noinstr exposes the kernel to potential ordering
problems as an instrumentation-induced fault, e.g. #DB, #BP, #PF, etc.
will unblock NMIs when IRETing back to the faulting instruction.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/vmx/vmcs.h    |  4 ++--
 arch/x86/kvm/vmx/vmenter.S |  8 ++++----
 arch/x86/kvm/vmx/vmx.c     | 34 +++++++++++++++++++++-------------
 arch/x86/kvm/x86.h         |  6 +++---
 4 files changed, 30 insertions(+), 22 deletions(-)
  

Comments

Peter Zijlstra Jan. 19, 2023, 9:49 a.m. UTC | #1
On Tue, Dec 13, 2022 at 06:09:12AM +0000, Sean Christopherson wrote:

> @@ -7119,6 +7118,18 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
>  
>  	vmx_enable_fb_clear(vmx);
>  
> +	if (unlikely(vmx->fail))
> +		vmx->exit_reason.full = 0xdead;
> +	else
> +		vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
> +
> +	if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
> +	    is_nmi(vmx_get_intr_info(vcpu))) {
> +		kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
> +		vmx_do_nmi_irqoff();
> +		kvm_after_interrupt(vcpu);
> +	}
> +
>  	guest_state_exit_irqoff();
>  }

I think we're going to have to sprinkle __always_inline on the
kvm_{before,after}_interrupt() things (or I missed the earlier patches
doing this already), sometimes compilers are just weird.
  
Sean Christopherson Jan. 19, 2023, 3:39 p.m. UTC | #2
On Thu, Jan 19, 2023, Peter Zijlstra wrote:
> On Tue, Dec 13, 2022 at 06:09:12AM +0000, Sean Christopherson wrote:
> 
> > @@ -7119,6 +7118,18 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
> >  
> >  	vmx_enable_fb_clear(vmx);
> >  
> > +	if (unlikely(vmx->fail))
> > +		vmx->exit_reason.full = 0xdead;
> > +	else
> > +		vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
> > +
> > +	if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
> > +	    is_nmi(vmx_get_intr_info(vcpu))) {
> > +		kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
> > +		vmx_do_nmi_irqoff();
> > +		kvm_after_interrupt(vcpu);
> > +	}
> > +
> >  	guest_state_exit_irqoff();
> >  }
> 
> I think we're going to have to sprinkle __always_inline on the
> kvm_{before,after}_interrupt() things (or I missed the earlier patches
> doing this already), sometimes compilers are just weird.

It's in this patch, just lurking at the bottom.

> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 9de72586f406..44d1827f0a30 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -382,13 +382,13 @@ enum kvm_intr_type {
>         KVM_HANDLING_NMI,
>  };
>
> -static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
> -                                       enum kvm_intr_type intr)
> +static __always_inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
> +                                                enum kvm_intr_type intr)
>  {
>         WRITE_ONCE(vcpu->arch.handling_intr_from_guest, (u8)intr);
>  }
>
> -static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
> +static __always_inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
>  {
>         WRITE_ONCE(vcpu->arch.handling_intr_from_guest, 0);
>  }
  
Peter Zijlstra Jan. 19, 2023, 3:52 p.m. UTC | #3
On Thu, Jan 19, 2023 at 03:39:55PM +0000, Sean Christopherson wrote:

> It's in this patch, just lurking at the bottom.

Ha!, so much for reading ..
  

Patch

diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index ac290a44a693..7c1996b433e2 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -75,7 +75,7 @@  struct loaded_vmcs {
 	struct vmcs_controls_shadow controls_shadow;
 };
 
-static inline bool is_intr_type(u32 intr_info, u32 type)
+static __always_inline bool is_intr_type(u32 intr_info, u32 type)
 {
 	const u32 mask = INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK;
 
@@ -146,7 +146,7 @@  static inline bool is_icebp(u32 intr_info)
 	return is_intr_type(intr_info, INTR_TYPE_PRIV_SW_EXCEPTION);
 }
 
-static inline bool is_nmi(u32 intr_info)
+static __always_inline bool is_nmi(u32 intr_info)
 {
 	return is_intr_type(intr_info, INTR_TYPE_NMI_INTR);
 }
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 9d987e7e48c4..059243085211 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -299,6 +299,10 @@  SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
 
 SYM_FUNC_END(__vmx_vcpu_run)
 
+SYM_FUNC_START(vmx_do_nmi_irqoff)
+	VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
+SYM_FUNC_END(vmx_do_nmi_irqoff)
+
 
 .section .text, "ax"
 
@@ -353,10 +357,6 @@  SYM_FUNC_START(vmread_error_trampoline)
 SYM_FUNC_END(vmread_error_trampoline)
 #endif
 
-SYM_FUNC_START(vmx_do_nmi_irqoff)
-	VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
-SYM_FUNC_END(vmx_do_nmi_irqoff)
-
 SYM_FUNC_START(vmx_do_interrupt_irqoff)
 	VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
 SYM_FUNC_END(vmx_do_interrupt_irqoff)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c242e2591896..b03020ca1840 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5095,8 +5095,13 @@  static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 	vect_info = vmx->idt_vectoring_info;
 	intr_info = vmx_get_intr_info(vcpu);
 
+	/*
+	 * Machine checks are handled by handle_exception_irqoff(), or by
+	 * vmx_vcpu_run() if a #MC occurs on VM-Entry.  NMIs are handled by
+	 * vmx_vcpu_enter_exit().
+	 */
 	if (is_machine_check(intr_info) || is_nmi(intr_info))
-		return 1; /* handled by handle_exception_nmi_irqoff() */
+		return 1;
 
 	/*
 	 * Queue the exception here instead of in handle_nm_fault_irqoff().
@@ -6809,7 +6814,7 @@  static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
 		rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
 }
 
-static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
+static void handle_exception_irqoff(struct vcpu_vmx *vmx)
 {
 	u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
 
@@ -6822,12 +6827,6 @@  static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
 	/* Handle machine checks before interrupts are enabled */
 	else if (is_machine_check(intr_info))
 		kvm_machine_check();
-	/* We need to handle NMIs before interrupts are enabled */
-	else if (is_nmi(intr_info)) {
-		kvm_before_interrupt(&vmx->vcpu, KVM_HANDLING_NMI);
-		vmx_do_nmi_irqoff();
-		kvm_after_interrupt(&vmx->vcpu);
-	}
 }
 
 static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
@@ -6857,7 +6856,7 @@  static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
 	if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
 		handle_external_interrupt_irqoff(vcpu);
 	else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
-		handle_exception_nmi_irqoff(vmx);
+		handle_exception_irqoff(vmx);
 }
 
 /*
@@ -7119,6 +7118,18 @@  static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 
 	vmx_enable_fb_clear(vmx);
 
+	if (unlikely(vmx->fail))
+		vmx->exit_reason.full = 0xdead;
+	else
+		vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+
+	if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
+	    is_nmi(vmx_get_intr_info(vcpu))) {
+		kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
+		vmx_do_nmi_irqoff();
+		kvm_after_interrupt(vcpu);
+	}
+
 	guest_state_exit_irqoff();
 }
 
@@ -7260,12 +7271,9 @@  static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	vmx->idt_vectoring_info = 0;
 
-	if (unlikely(vmx->fail)) {
-		vmx->exit_reason.full = 0xdead;
+	if (unlikely(vmx->fail))
 		return EXIT_FASTPATH_NONE;
-	}
 
-	vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
 	if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
 		kvm_machine_check();
 
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 9de72586f406..44d1827f0a30 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -382,13 +382,13 @@  enum kvm_intr_type {
 	KVM_HANDLING_NMI,
 };
 
-static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
-					enum kvm_intr_type intr)
+static __always_inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
+						 enum kvm_intr_type intr)
 {
 	WRITE_ONCE(vcpu->arch.handling_intr_from_guest, (u8)intr);
 }
 
-static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
+static __always_inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
 {
 	WRITE_ONCE(vcpu->arch.handling_intr_from_guest, 0);
 }