[6/7] KVM: VMX: Provide separate subroutines for invoking NMI vs. IRQ handlers

Message ID 20221213060912.654668-7-seanjc@google.com
State New
Headers
Series KVM: VMX: Handle NMI VM-Exits in noinstr section |

Commit Message

Sean Christopherson Dec. 13, 2022, 6:09 a.m. UTC
  Split the asm subroutines for handling NMIs versus IRQs that occur in the
guest so that the NMI handler can be called from a noinstr section.  As a
bonus, the NMI path doesn't need an indirect branch.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/vmx/vmenter.S | 70 +++++++++++++++++++++-----------------
 arch/x86/kvm/vmx/vmx.c     | 26 ++++++--------
 2 files changed, 50 insertions(+), 46 deletions(-)
  

Comments

Li, Xin3 Dec. 14, 2022, 9:23 p.m. UTC | #1
> +
> +	/*
> +	 * "Restore" RSP from RBP, even though IRET has already unwound
> RSP to
> +	 * the correct value.  objtool doesn't know the callee will IRET and,
> +	 * without the explicit restore, thinks the stack is getting walloped.
> +	 * Using an unwind hint is problematic due to x86-64's dynamic
> alignment.
> +	 */
> +	mov %_ASM_BP, %_ASM_SP
> +	pop %_ASM_BP
> +	RET

For NMI, after this RET instruction, we continue to block NMIs. IRET instead?

> +.endm
> +
>  .section .noinstr.text, "ax"
  
Sean Christopherson Dec. 15, 2022, 12:26 a.m. UTC | #2
On Wed, Dec 14, 2022, Li, Xin3 wrote:
> > +
> > +	/*
> > +	 * "Restore" RSP from RBP, even though IRET has already unwound
> > RSP to
> > +	 * the correct value.  objtool doesn't know the callee will IRET and,
> > +	 * without the explicit restore, thinks the stack is getting walloped.
> > +	 * Using an unwind hint is problematic due to x86-64's dynamic
> > alignment.
> > +	 */
> > +	mov %_ASM_BP, %_ASM_SP
> > +	pop %_ASM_BP
> > +	RET
> 
> For NMI, after this RET instruction, we continue to block NMIs. IRET instead?

No, IRET has already been done by the kernel-provided handler, e.g. asm_exc_nmi_kvm_vmx()
by way of error_return().  That's why the CALL above (that got snipped) is preceded
by the creation of a synthetic NMI/IRQ stack frame: the target returns from the CALL
via IRET, not RET.
  
Li, Xin3 Dec. 15, 2022, 3:06 a.m. UTC | #3
> > > +	 * "Restore" RSP from RBP, even though IRET has already unwound
> > > RSP to
> > > +	 * the correct value.  objtool doesn't know the callee will IRET and,
> > > +	 * without the explicit restore, thinks the stack is getting walloped.
> > > +	 * Using an unwind hint is problematic due to x86-64's dynamic
> > > alignment.
> > > +	 */
> > > +	mov %_ASM_BP, %_ASM_SP
> > > +	pop %_ASM_BP
> > > +	RET
> >
> > For NMI, after this RET instruction, we continue to block NMIs. IRET
> instead?
> 
> No, IRET has already been done by the kernel-provided handler, e.g.
> asm_exc_nmi_kvm_vmx()
> by way of error_return().  That's why the CALL above (that got snipped) is
> preceded
> by the creation of a synthetic NMI/IRQ stack frame: the target returns from
> the CALL
> via IRET, not RET.


You're right, this assembly makes another call into the asm entry point, which
returns here with IRET.
  
Li, Xin3 Dec. 15, 2022, 5:18 a.m. UTC | #4
> > > > +	 * "Restore" RSP from RBP, even though IRET has already unwound
> > > > RSP to
> > > > +	 * the correct value.  objtool doesn't know the callee will IRET and,
> > > > +	 * without the explicit restore, thinks the stack is getting walloped.
> > > > +	 * Using an unwind hint is problematic due to x86-64's dynamic
> > > > alignment.
> > > > +	 */
> > > > +	mov %_ASM_BP, %_ASM_SP
> > > > +	pop %_ASM_BP
> > > > +	RET
> > >
> > > For NMI, after this RET instruction, we continue to block NMIs. IRET
> > instead?
> >
> > No, IRET has already been done by the kernel-provided handler, e.g.
> > asm_exc_nmi_kvm_vmx()
> > by way of error_return().  That's why the CALL above (that got snipped) is
> > preceded
> > by the creation of a synthetic NMI/IRQ stack frame: the target returns from
> > the CALL
> > via IRET, not RET.
> 
> 
> You're right, this assembly makes another call into the asm entry point, which
> returns here with IRET.

Like IRET for IDT, we _need_ ERETS for FRED to unblock NMI ASAP. However
a FRED stack frame has way more information than an IDT stack frame,
thus it's complicated to create a FRED stack frame with assembly.

So I prefer "INT $2" for FRED now.

I will post the FRED patch set soon, lets sync up on this afterwards.

Xin
  

Patch

diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 766c6b3ef5ed..9d987e7e48c4 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -31,6 +31,39 @@ 
 #define VCPU_R15	__VCPU_REGS_R15 * WORD_SIZE
 #endif
 
+.macro VMX_DO_EVENT_IRQOFF call_insn call_target
+	/*
+	 * Unconditionally create a stack frame, getting the correct RSP on the
+	 * stack (for x86-64) would take two instructions anyways, and RBP can
+	 * be used to restore RSP to make objtool happy (see below).
+	 */
+	push %_ASM_BP
+	mov %_ASM_SP, %_ASM_BP
+
+#ifdef CONFIG_X86_64
+	/*
+	 * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
+	 * creating the synthetic interrupt stack frame for the IRQ/NMI.
+	 */
+	and  $-16, %rsp
+	push $__KERNEL_DS
+	push %rbp
+#endif
+	pushf
+	push $__KERNEL_CS
+	\call_insn \call_target
+
+	/*
+	 * "Restore" RSP from RBP, even though IRET has already unwound RSP to
+	 * the correct value.  objtool doesn't know the callee will IRET and,
+	 * without the explicit restore, thinks the stack is getting walloped.
+	 * Using an unwind hint is problematic due to x86-64's dynamic alignment.
+	 */
+	mov %_ASM_BP, %_ASM_SP
+	pop %_ASM_BP
+	RET
+.endm
+
 .section .noinstr.text, "ax"
 
 /**
@@ -320,35 +353,10 @@  SYM_FUNC_START(vmread_error_trampoline)
 SYM_FUNC_END(vmread_error_trampoline)
 #endif
 
-SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff)
-	/*
-	 * Unconditionally create a stack frame, getting the correct RSP on the
-	 * stack (for x86-64) would take two instructions anyways, and RBP can
-	 * be used to restore RSP to make objtool happy (see below).
-	 */
-	push %_ASM_BP
-	mov %_ASM_SP, %_ASM_BP
+SYM_FUNC_START(vmx_do_nmi_irqoff)
+	VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
+SYM_FUNC_END(vmx_do_nmi_irqoff)
 
-#ifdef CONFIG_X86_64
-	/*
-	 * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
-	 * creating the synthetic interrupt stack frame for the IRQ/NMI.
-	 */
-	and  $-16, %rsp
-	push $__KERNEL_DS
-	push %rbp
-#endif
-	pushf
-	push $__KERNEL_CS
-	CALL_NOSPEC _ASM_ARG1
-
-	/*
-	 * "Restore" RSP from RBP, even though IRET has already unwound RSP to
-	 * the correct value.  objtool doesn't know the callee will IRET and,
-	 * without the explicit restore, thinks the stack is getting walloped.
-	 * Using an unwind hint is problematic due to x86-64's dynamic alignment.
-	 */
-	mov %_ASM_BP, %_ASM_SP
-	pop %_ASM_BP
-	RET
-SYM_FUNC_END(vmx_do_interrupt_nmi_irqoff)
+SYM_FUNC_START(vmx_do_interrupt_irqoff)
+	VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
+SYM_FUNC_END(vmx_do_interrupt_irqoff)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7ace22ee240d..c242e2591896 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6786,17 +6786,8 @@  static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
 	memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
 }
 
-void vmx_do_interrupt_nmi_irqoff(unsigned long entry);
-
-static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu,
-					unsigned long entry)
-{
-	bool is_nmi = entry == (unsigned long)asm_exc_nmi_kvm_vmx;
-
-	kvm_before_interrupt(vcpu, is_nmi ? KVM_HANDLING_NMI : KVM_HANDLING_IRQ);
-	vmx_do_interrupt_nmi_irqoff(entry);
-	kvm_after_interrupt(vcpu);
-}
+void vmx_do_interrupt_irqoff(unsigned long entry);
+void vmx_do_nmi_irqoff(void);
 
 static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
 {
@@ -6820,7 +6811,6 @@  static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
 
 static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
 {
-	const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_kvm_vmx;
 	u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
 
 	/* if exit due to PF check for async PF */
@@ -6833,8 +6823,11 @@  static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
 	else if (is_machine_check(intr_info))
 		kvm_machine_check();
 	/* We need to handle NMIs before interrupts are enabled */
-	else if (is_nmi(intr_info))
-		handle_interrupt_nmi_irqoff(&vmx->vcpu, nmi_entry);
+	else if (is_nmi(intr_info)) {
+		kvm_before_interrupt(&vmx->vcpu, KVM_HANDLING_NMI);
+		vmx_do_nmi_irqoff();
+		kvm_after_interrupt(&vmx->vcpu);
+	}
 }
 
 static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
@@ -6847,7 +6840,10 @@  static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 	    "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
 		return;
 
-	handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
+	kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
+	vmx_do_interrupt_irqoff(gate_offset(desc));
+	kvm_after_interrupt(vcpu);
+
 	vcpu->arch.at_instruction_boundary = true;
 }