[6/7] KVM: VMX: Provide separate subroutines for invoking NMI vs. IRQ handlers
Commit Message
Split the asm subroutines for handling NMIs versus IRQs that occur in the
guest so that the NMI handler can be called from a noinstr section. As a
bonus, the NMI path doesn't need an indirect branch.
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/vmx/vmenter.S | 70 +++++++++++++++++++++-----------------
arch/x86/kvm/vmx/vmx.c | 26 ++++++--------
2 files changed, 50 insertions(+), 46 deletions(-)
Comments
> +
> + /*
> + * "Restore" RSP from RBP, even though IRET has already unwound
> RSP to
> + * the correct value. objtool doesn't know the callee will IRET and,
> + * without the explicit restore, thinks the stack is getting walloped.
> + * Using an unwind hint is problematic due to x86-64's dynamic
> alignment.
> + */
> + mov %_ASM_BP, %_ASM_SP
> + pop %_ASM_BP
> + RET
For NMI, after this RET instruction, we continue to block NMIs. IRET instead?
> +.endm
> +
> .section .noinstr.text, "ax"
On Wed, Dec 14, 2022, Li, Xin3 wrote:
> > +
> > + /*
> > + * "Restore" RSP from RBP, even though IRET has already unwound
> > RSP to
> > + * the correct value. objtool doesn't know the callee will IRET and,
> > + * without the explicit restore, thinks the stack is getting walloped.
> > + * Using an unwind hint is problematic due to x86-64's dynamic
> > alignment.
> > + */
> > + mov %_ASM_BP, %_ASM_SP
> > + pop %_ASM_BP
> > + RET
>
> For NMI, after this RET instruction, we continue to block NMIs. IRET instead?
No, IRET has already been done by the kernel-provided handler, e.g. asm_exc_nmi_kvm_vmx()
by way of error_return(). That's why the CALL above (that got snipped) is preceded
by the creation of a synthetic NMI/IRQ stack frame: the target returns from the CALL
via IRET, not RET.
> > > + * "Restore" RSP from RBP, even though IRET has already unwound
> > > RSP to
> > > + * the correct value. objtool doesn't know the callee will IRET and,
> > > + * without the explicit restore, thinks the stack is getting walloped.
> > > + * Using an unwind hint is problematic due to x86-64's dynamic
> > > alignment.
> > > + */
> > > + mov %_ASM_BP, %_ASM_SP
> > > + pop %_ASM_BP
> > > + RET
> >
> > For NMI, after this RET instruction, we continue to block NMIs. IRET
> instead?
>
> No, IRET has already been done by the kernel-provided handler, e.g.
> asm_exc_nmi_kvm_vmx()
> by way of error_return(). That's why the CALL above (that got snipped) is
> preceded
> by the creation of a synthetic NMI/IRQ stack frame: the target returns from
> the CALL
> via IRET, not RET.
You're right, this assembly makes another call into the asm entry point, which
returns here with IRET.
> > > > + * "Restore" RSP from RBP, even though IRET has already unwound
> > > > RSP to
> > > > + * the correct value. objtool doesn't know the callee will IRET and,
> > > > + * without the explicit restore, thinks the stack is getting walloped.
> > > > + * Using an unwind hint is problematic due to x86-64's dynamic
> > > > alignment.
> > > > + */
> > > > + mov %_ASM_BP, %_ASM_SP
> > > > + pop %_ASM_BP
> > > > + RET
> > >
> > > For NMI, after this RET instruction, we continue to block NMIs. IRET
> > instead?
> >
> > No, IRET has already been done by the kernel-provided handler, e.g.
> > asm_exc_nmi_kvm_vmx()
> > by way of error_return(). That's why the CALL above (that got snipped) is
> > preceded
> > by the creation of a synthetic NMI/IRQ stack frame: the target returns from
> > the CALL
> > via IRET, not RET.
>
>
> You're right, this assembly makes another call into the asm entry point, which
> returns here with IRET.
Like IRET for IDT, we _need_ ERETS for FRED to unblock NMI ASAP. However
a FRED stack frame has way more information than an IDT stack frame,
thus it's complicated to create a FRED stack frame with assembly.
So I prefer "INT $2" for FRED now.
I will post the FRED patch set soon, lets sync up on this afterwards.
Xin
@@ -31,6 +31,39 @@
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
#endif
+.macro VMX_DO_EVENT_IRQOFF call_insn call_target
+ /*
+ * Unconditionally create a stack frame, getting the correct RSP on the
+ * stack (for x86-64) would take two instructions anyways, and RBP can
+ * be used to restore RSP to make objtool happy (see below).
+ */
+ push %_ASM_BP
+ mov %_ASM_SP, %_ASM_BP
+
+#ifdef CONFIG_X86_64
+ /*
+ * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
+ * creating the synthetic interrupt stack frame for the IRQ/NMI.
+ */
+ and $-16, %rsp
+ push $__KERNEL_DS
+ push %rbp
+#endif
+ pushf
+ push $__KERNEL_CS
+ \call_insn \call_target
+
+ /*
+ * "Restore" RSP from RBP, even though IRET has already unwound RSP to
+ * the correct value. objtool doesn't know the callee will IRET and,
+ * without the explicit restore, thinks the stack is getting walloped.
+ * Using an unwind hint is problematic due to x86-64's dynamic alignment.
+ */
+ mov %_ASM_BP, %_ASM_SP
+ pop %_ASM_BP
+ RET
+.endm
+
.section .noinstr.text, "ax"
/**
@@ -320,35 +353,10 @@ SYM_FUNC_START(vmread_error_trampoline)
SYM_FUNC_END(vmread_error_trampoline)
#endif
-SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff)
- /*
- * Unconditionally create a stack frame, getting the correct RSP on the
- * stack (for x86-64) would take two instructions anyways, and RBP can
- * be used to restore RSP to make objtool happy (see below).
- */
- push %_ASM_BP
- mov %_ASM_SP, %_ASM_BP
+SYM_FUNC_START(vmx_do_nmi_irqoff)
+ VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
+SYM_FUNC_END(vmx_do_nmi_irqoff)
-#ifdef CONFIG_X86_64
- /*
- * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
- * creating the synthetic interrupt stack frame for the IRQ/NMI.
- */
- and $-16, %rsp
- push $__KERNEL_DS
- push %rbp
-#endif
- pushf
- push $__KERNEL_CS
- CALL_NOSPEC _ASM_ARG1
-
- /*
- * "Restore" RSP from RBP, even though IRET has already unwound RSP to
- * the correct value. objtool doesn't know the callee will IRET and,
- * without the explicit restore, thinks the stack is getting walloped.
- * Using an unwind hint is problematic due to x86-64's dynamic alignment.
- */
- mov %_ASM_BP, %_ASM_SP
- pop %_ASM_BP
- RET
-SYM_FUNC_END(vmx_do_interrupt_nmi_irqoff)
+SYM_FUNC_START(vmx_do_interrupt_irqoff)
+ VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
+SYM_FUNC_END(vmx_do_interrupt_irqoff)
@@ -6786,17 +6786,8 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
}
-void vmx_do_interrupt_nmi_irqoff(unsigned long entry);
-
-static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu,
- unsigned long entry)
-{
- bool is_nmi = entry == (unsigned long)asm_exc_nmi_kvm_vmx;
-
- kvm_before_interrupt(vcpu, is_nmi ? KVM_HANDLING_NMI : KVM_HANDLING_IRQ);
- vmx_do_interrupt_nmi_irqoff(entry);
- kvm_after_interrupt(vcpu);
-}
+void vmx_do_interrupt_irqoff(unsigned long entry);
+void vmx_do_nmi_irqoff(void);
static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
{
@@ -6820,7 +6811,6 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
{
- const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_kvm_vmx;
u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
/* if exit due to PF check for async PF */
@@ -6833,8 +6823,11 @@ static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
else if (is_machine_check(intr_info))
kvm_machine_check();
/* We need to handle NMIs before interrupts are enabled */
- else if (is_nmi(intr_info))
- handle_interrupt_nmi_irqoff(&vmx->vcpu, nmi_entry);
+ else if (is_nmi(intr_info)) {
+ kvm_before_interrupt(&vmx->vcpu, KVM_HANDLING_NMI);
+ vmx_do_nmi_irqoff();
+ kvm_after_interrupt(&vmx->vcpu);
+ }
}
static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
@@ -6847,7 +6840,10 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
"KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
return;
- handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
+ kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
+ vmx_do_interrupt_irqoff(gate_offset(desc));
+ kvm_after_interrupt(vcpu);
+
vcpu->arch.at_instruction_boundary = true;
}