[v10,33/38] x86/entry: Add fred_entry_from_kvm() for VMX to handle IRQ/NMI

Message ID 20230914044805.301390-34-xin3.li@intel.com
State New
Headers
Series x86: enable FRED for x86-64 |

Commit Message

Li, Xin3 Sept. 14, 2023, 4:48 a.m. UTC
  In IRQ/NMI induced VM exits, KVM VMX needs to execute the respective
handlers, which requires the software to create a FRED stack frame,
and use it to invoke the handlers. Add fred_irq_entry_from_kvm() for
this job.

Export fred_entry_from_kvm() because VMX can be compiled as a module.

Suggested-by: Sean Christopherson <seanjc@google.com>
Tested-by: Shan Kang <shan.kang@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Xin Li <xin3.li@intel.com>
---

Changes since v9:
* Shove the whole thing into arch/x86/entry/entry_64_fred.S for invoking
  external_interrupt() and fred_exc_nmi() (Sean Christopherson).
* Correct and improve a few comments (Sean Christopherson).
* Merge the two IRQ/NMI asm entries into one as it's fine to invoke
  noinstr code from regular code (Thomas Gleixner).
* Setup the long mode and NMI flags in the augmented SS field of FRED
  stack frame in C instead of asm (Thomas Gleixner).
* Add UNWIND_HINT_{SAVE,RESTORE} to get rid of the warning: "objtool:
  asm_fred_entry_from_kvm+0x0: unreachable instruction" (Peter Zijlstra).

Changes since v8:
* Add a new macro VMX_DO_FRED_EVENT_IRQOFF for FRED instead of
  refactoring VMX_DO_EVENT_IRQOFF (Sean Christopherson).
* Do NOT use a trampoline, just LEA+PUSH the return RIP, PUSH the error
  code, and jump to the FRED kernel entry point for NMI or call
  external_interrupt() for IRQs (Sean Christopherson).
* Call external_interrupt() only when FRED is enabled, and convert the
  non-FRED handling to external_interrupt() after FRED lands (Sean
  Christopherson).
---
 arch/x86/entry/entry_64_fred.S | 73 ++++++++++++++++++++++++++++++++++
 arch/x86/entry/entry_fred.c    | 14 +++++++
 arch/x86/include/asm/fred.h    | 18 +++++++++
 3 files changed, 105 insertions(+)
  

Comments

Paolo Bonzini Sept. 20, 2023, 5:54 p.m. UTC | #1
On 9/14/23 06:48, Xin Li wrote:
> +	/*
> +	 * Don't check the FRED stack level, the call stack leading to this
> +	 * helper is effectively constant and shallow (relatively speaking).

It's more that we don't need to protect from reentrancy.  The external 
interrupt uses stack level 0 so no adjustment would be needed anyway, 
and NMI does not use an IST even in the non-FRED case.

> +	 * Emulate the FRED-defined redzone and stack alignment.
> +	 */
> +	sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
> +	and $FRED_STACK_FRAME_RSP_MASK, %rsp
  
Li, Xin3 Sept. 20, 2023, 11:10 p.m. UTC | #2
> > +	/*
> > +	 * Don't check the FRED stack level, the call stack leading to this
> > +	 * helper is effectively constant and shallow (relatively speaking).
> 
> It's more that we don't need to protect from reentrancy.  The external
> interrupt uses stack level 0 so no adjustment would be needed anyway,
> and NMI does not use an IST even in the non-FRED case.

I will incorporate this comment.

I think a VMX NMI is kind of like a user level NMI, and we don't need
to worry about nested NMIs.

> 
> > +	 * Emulate the FRED-defined redzone and stack alignment.
> > +	 */
> > +	sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
> > +	and $FRED_STACK_FRAME_RSP_MASK, %rsp
  
Nikolay Borisov Sept. 21, 2023, 12:11 p.m. UTC | #3
On 14.09.23 г. 7:48 ч., Xin Li wrote:
> In IRQ/NMI induced VM exits, KVM VMX needs to execute the respective
> handlers, which requires the software to create a FRED stack frame,
> and use it to invoke the handlers. Add fred_irq_entry_from_kvm() for
> this job.
> 
> Export fred_entry_from_kvm() because VMX can be compiled as a module.
> 
> Suggested-by: Sean Christopherson <seanjc@google.com>
> Tested-by: Shan Kang <shan.kang@intel.com>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Signed-off-by: Xin Li <xin3.li@intel.com>
> ---
> 
> Changes since v9:
> * Shove the whole thing into arch/x86/entry/entry_64_fred.S for invoking
>    external_interrupt() and fred_exc_nmi() (Sean Christopherson).
> * Correct and improve a few comments (Sean Christopherson).
> * Merge the two IRQ/NMI asm entries into one as it's fine to invoke
>    noinstr code from regular code (Thomas Gleixner).
> * Setup the long mode and NMI flags in the augmented SS field of FRED
>    stack frame in C instead of asm (Thomas Gleixner).
> * Add UNWIND_HINT_{SAVE,RESTORE} to get rid of the warning: "objtool:
>    asm_fred_entry_from_kvm+0x0: unreachable instruction" (Peter Zijlstra).
> 
> Changes since v8:
> * Add a new macro VMX_DO_FRED_EVENT_IRQOFF for FRED instead of
>    refactoring VMX_DO_EVENT_IRQOFF (Sean Christopherson).
> * Do NOT use a trampoline, just LEA+PUSH the return RIP, PUSH the error
>    code, and jump to the FRED kernel entry point for NMI or call
>    external_interrupt() for IRQs (Sean Christopherson).
> * Call external_interrupt() only when FRED is enabled, and convert the
>    non-FRED handling to external_interrupt() after FRED lands (Sean
>    Christopherson).
> ---
>   arch/x86/entry/entry_64_fred.S | 73 ++++++++++++++++++++++++++++++++++
>   arch/x86/entry/entry_fred.c    | 14 +++++++
>   arch/x86/include/asm/fred.h    | 18 +++++++++
>   3 files changed, 105 insertions(+)
> 
> diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
> index d1c2fc4af8ae..f1088d6f2054 100644
> --- a/arch/x86/entry/entry_64_fred.S
> +++ b/arch/x86/entry/entry_64_fred.S
> @@ -4,7 +4,9 @@
>    */
>   
>   #include <asm/asm.h>
> +#include <asm/export.h>
>   #include <asm/fred.h>
> +#include <asm/segment.h>
>   
>   #include "calling.h"
>   
> @@ -54,3 +56,74 @@ SYM_CODE_START_NOALIGN(asm_fred_entrypoint_kernel)
>   	FRED_EXIT
>   	ERETS
>   SYM_CODE_END(asm_fred_entrypoint_kernel)
> +
> +#if IS_ENABLED(CONFIG_KVM_INTEL)
> +SYM_FUNC_START(asm_fred_entry_from_kvm)
> +	push %rbp
> +	mov %rsp, %rbp

use FRAME_BEGIN/FRAME_END macros to ommit this code if 
CONFIG_FRAME_POINTER is disabled.

> +
> +	UNWIND_HINT_SAVE
> +
> +	/*
> +	 * Don't check the FRED stack level, the call stack leading to this
> +	 * helper is effectively constant and shallow (relatively speaking).
> +	 *
> +	 * Emulate the FRED-defined redzone and stack alignment.
> +	 */
> +	sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
> +	and $FRED_STACK_FRAME_RSP_MASK, %rsp
> +
> +	/*
> +	 * Start to push a FRED stack frame, which is always 64 bytes:
> +	 *
> +	 * +--------+-----------------+
> +	 * | Bytes  | Usage           |
> +	 * +--------+-----------------+
> +	 * | 63:56  | Reserved        |
> +	 * | 55:48  | Event Data      |
> +	 * | 47:40  | SS + Event Info |
> +	 * | 39:32  | RSP             |
> +	 * | 31:24  | RFLAGS          |
> +	 * | 23:16  | CS + Aux Info   |
> +	 * |  15:8  | RIP             |
> +	 * |   7:0  | Error Code      |
> +	 * +--------+-----------------+
> +	 */
> +	push $0				/* Reserved, must be 0 */
> +	push $0				/* Event data, 0 for IRQ/NMI */
> +	push %rdi			/* fred_ss handed in by the caller */
> +	push %rbp
> +	pushf
> +	mov $__KERNEL_CS, %rax
> +	push %rax
> +
> +	/*
> +	 * Unlike the IDT event delivery, FRED _always_ pushes an error code
> +	 * after pushing the return RIP, thus the CALL instruction CANNOT be
> +	 * used here to push the return RIP, otherwise there is no chance to
> +	 * push an error code before invoking the IRQ/NMI handler.
> +	 *
> +	 * Use LEA to get the return RIP and push it, then push an error code.
> +	 */
> +	lea 1f(%rip), %rax
> +	push %rax				/* Return RIP */
> +	push $0					/* Error code, 0 for IRQ/NMI */
> +
> +	PUSH_AND_CLEAR_REGS clear_bp=0 unwind_hint=0
> +	movq %rsp, %rdi				/* %rdi -> pt_regs */
> +	call __fred_entry_from_kvm		/* Call the C entry point */
> +	POP_REGS
> +	ERETS
> +1:
> +	/*
> +	 * Objtool doesn't understand what ERETS does, this hint tells it that
> +	 * yes, we'll reach here and with what stack state. A save/restore pair
> +	 * isn't strictly needed, but it's the simplest form.
> +	 */
> +	UNWIND_HINT_RESTORE
> +	pop %rbp

FRAME_END

> +	RET
> +
> +SYM_FUNC_END(asm_fred_entry_from_kvm)
> +EXPORT_SYMBOL_GPL(asm_fred_entry_from_kvm);
> +#endif


<snip>
  
Paolo Bonzini Sept. 21, 2023, 12:38 p.m. UTC | #4
On 9/21/23 14:11, Nikolay Borisov wrote:
>>
>> +SYM_FUNC_START(asm_fred_entry_from_kvm)
>> +    push %rbp
>> +    mov %rsp, %rbp
> 
> use FRAME_BEGIN/FRAME_END macros to ommit this code if 
> CONFIG_FRAME_POINTER is disabled.

No, the previous stack pointer is used below, so the code might as well 
use %rbp for that; but it must do so unconditionally.

Paolo

>> +
>> +    UNWIND_HINT_SAVE
>> +
>> +    /*
>> +     * Don't check the FRED stack level, the call stack leading to this
>> +     * helper is effectively constant and shallow (relatively speaking).
>> +     *
>> +     * Emulate the FRED-defined redzone and stack alignment.
>> +     */
>> +    sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
>> +    and $FRED_STACK_FRAME_RSP_MASK, %rsp
>> +
>> +    /*
>> +     * Start to push a FRED stack frame, which is always 64 bytes:
>> +     *
>> +     * +--------+-----------------+
>> +     * | Bytes  | Usage           |
>> +     * +--------+-----------------+
>> +     * | 63:56  | Reserved        |
>> +     * | 55:48  | Event Data      |
>> +     * | 47:40  | SS + Event Info |
>> +     * | 39:32  | RSP             |
>> +     * | 31:24  | RFLAGS          |
>> +     * | 23:16  | CS + Aux Info   |
>> +     * |  15:8  | RIP             |
>> +     * |   7:0  | Error Code      |
>> +     * +--------+-----------------+
>> +     */
>> +    push $0                /* Reserved, must be 0 */
>> +    push $0                /* Event data, 0 for IRQ/NMI */
>> +    push %rdi            /* fred_ss handed in by the caller */
>> +    push %rbp

^^ here

Paolo

>> +    pushf
>> +    mov $__KERNEL_CS, %rax
>> +    push %rax
  

Patch

diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
index d1c2fc4af8ae..f1088d6f2054 100644
--- a/arch/x86/entry/entry_64_fred.S
+++ b/arch/x86/entry/entry_64_fred.S
@@ -4,7 +4,9 @@ 
  */
 
 #include <asm/asm.h>
+#include <asm/export.h>
 #include <asm/fred.h>
+#include <asm/segment.h>
 
 #include "calling.h"
 
@@ -54,3 +56,74 @@  SYM_CODE_START_NOALIGN(asm_fred_entrypoint_kernel)
 	FRED_EXIT
 	ERETS
 SYM_CODE_END(asm_fred_entrypoint_kernel)
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+SYM_FUNC_START(asm_fred_entry_from_kvm)
+	push %rbp
+	mov %rsp, %rbp
+
+	UNWIND_HINT_SAVE
+
+	/*
+	 * Don't check the FRED stack level, the call stack leading to this
+	 * helper is effectively constant and shallow (relatively speaking).
+	 *
+	 * Emulate the FRED-defined redzone and stack alignment.
+	 */
+	sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
+	and $FRED_STACK_FRAME_RSP_MASK, %rsp
+
+	/*
+	 * Start to push a FRED stack frame, which is always 64 bytes:
+	 *
+	 * +--------+-----------------+
+	 * | Bytes  | Usage           |
+	 * +--------+-----------------+
+	 * | 63:56  | Reserved        |
+	 * | 55:48  | Event Data      |
+	 * | 47:40  | SS + Event Info |
+	 * | 39:32  | RSP             |
+	 * | 31:24  | RFLAGS          |
+	 * | 23:16  | CS + Aux Info   |
+	 * |  15:8  | RIP             |
+	 * |   7:0  | Error Code      |
+	 * +--------+-----------------+
+	 */
+	push $0				/* Reserved, must be 0 */
+	push $0				/* Event data, 0 for IRQ/NMI */
+	push %rdi			/* fred_ss handed in by the caller */
+	push %rbp
+	pushf
+	mov $__KERNEL_CS, %rax
+	push %rax
+
+	/*
+	 * Unlike the IDT event delivery, FRED _always_ pushes an error code
+	 * after pushing the return RIP, thus the CALL instruction CANNOT be
+	 * used here to push the return RIP, otherwise there is no chance to
+	 * push an error code before invoking the IRQ/NMI handler.
+	 *
+	 * Use LEA to get the return RIP and push it, then push an error code.
+	 */
+	lea 1f(%rip), %rax
+	push %rax				/* Return RIP */
+	push $0					/* Error code, 0 for IRQ/NMI */
+
+	PUSH_AND_CLEAR_REGS clear_bp=0 unwind_hint=0
+	movq %rsp, %rdi				/* %rdi -> pt_regs */
+	call __fred_entry_from_kvm		/* Call the C entry point */
+	POP_REGS
+	ERETS
+1:
+	/*
+	 * Objtool doesn't understand what ERETS does, this hint tells it that
+	 * yes, we'll reach here and with what stack state. A save/restore pair
+	 * isn't strictly needed, but it's the simplest form.
+	 */
+	UNWIND_HINT_RESTORE
+	pop %rbp
+	RET
+
+SYM_FUNC_END(asm_fred_entry_from_kvm)
+EXPORT_SYMBOL_GPL(asm_fred_entry_from_kvm);
+#endif
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
index 2fd3e421e066..f8774611af80 100644
--- a/arch/x86/entry/entry_fred.c
+++ b/arch/x86/entry/entry_fred.c
@@ -242,3 +242,17 @@  __visible noinstr void fred_entry_from_kernel(struct pt_regs *regs)
 		return fred_bad_type(regs, error_code);
 	}
 }
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+__visible noinstr void __fred_entry_from_kvm(struct pt_regs *regs)
+{
+	switch (regs->fred_ss.type) {
+	case EVENT_TYPE_EXTINT:
+		return fred_extint(regs);
+	case EVENT_TYPE_NMI:
+		return fred_exc_nmi(regs);
+	default:
+		WARN_ON_ONCE(1);
+	}
+}
+#endif
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index 16a64ffecbf8..2fa9f34e5c95 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -9,6 +9,7 @@ 
 #include <linux/const.h>
 
 #include <asm/asm.h>
+#include <asm/trapnr.h>
 
 /*
  * FRED event return instruction opcodes for ERET{S,U}; supported in
@@ -62,12 +63,29 @@  static __always_inline unsigned long fred_event_data(struct pt_regs *regs)
 
 void asm_fred_entrypoint_user(void);
 void asm_fred_entrypoint_kernel(void);
+void asm_fred_entry_from_kvm(struct fred_ss);
 
 __visible void fred_entry_from_user(struct pt_regs *regs);
 __visible void fred_entry_from_kernel(struct pt_regs *regs);
+__visible void __fred_entry_from_kvm(struct pt_regs *regs);
+
+/* Can be called from noinstr code, thus __always_inline */
+static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector)
+{
+	struct fred_ss ss = {
+		.ss     =__KERNEL_DS,
+		.type   = type,
+		.vector = vector,
+		.nmi    = type == EVENT_TYPE_NMI,
+		.lm     = 1,
+	};
+
+	asm_fred_entry_from_kvm(ss);
+}
 
 #else /* CONFIG_X86_FRED */
 static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; }
+static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
 #endif /* CONFIG_X86_FRED */
 #endif /* !__ASSEMBLY__ */