[RFC,V3,12/16] x86/sev: Add a #HV exception handler
Commit Message
From: Tianyu Lan <tiala@microsoft.com>
Add a #HV exception handler that uses IST stack.
Signed-off-by: Tianyu Lan <tiala@microsoft.com>
---
Change since RFC V2:
* Remove unnecessary line in the change log.
---
arch/x86/entry/entry_64.S | 58 +++++++++++++++++++++++++++
arch/x86/include/asm/cpu_entry_area.h | 6 +++
arch/x86/include/asm/idtentry.h | 39 +++++++++++++++++-
arch/x86/include/asm/page_64_types.h | 1 +
arch/x86/include/asm/trapnr.h | 1 +
arch/x86/include/asm/traps.h | 1 +
arch/x86/kernel/cpu/common.c | 1 +
arch/x86/kernel/dumpstack_64.c | 9 ++++-
arch/x86/kernel/idt.c | 1 +
arch/x86/kernel/sev.c | 53 ++++++++++++++++++++++++
arch/x86/kernel/traps.c | 40 ++++++++++++++++++
arch/x86/mm/cpu_entry_area.c | 2 +
12 files changed, 209 insertions(+), 3 deletions(-)
Comments
Hi Tianyu,
Just trying to skim over what all changed in this version.
> From: Tianyu Lan <tiala@microsoft.com>
>
> Add a #HV exception handler that uses IST stack.
>
> Signed-off-by: Tianyu Lan <tiala@microsoft.com>
> ---
> Change since RFC V2:
> * Remove unnecessary line in the change log.
> ---
> arch/x86/entry/entry_64.S | 58 +++++++++++++++++++++++++++
> arch/x86/include/asm/cpu_entry_area.h | 6 +++
> arch/x86/include/asm/idtentry.h | 39 +++++++++++++++++-
> arch/x86/include/asm/page_64_types.h | 1 +
> arch/x86/include/asm/trapnr.h | 1 +
> arch/x86/include/asm/traps.h | 1 +
> arch/x86/kernel/cpu/common.c | 1 +
> arch/x86/kernel/dumpstack_64.c | 9 ++++-
> arch/x86/kernel/idt.c | 1 +
> arch/x86/kernel/sev.c | 53 ++++++++++++++++++++++++
> arch/x86/kernel/traps.c | 40 ++++++++++++++++++
> arch/x86/mm/cpu_entry_area.c | 2 +
> 12 files changed, 209 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index 15739a2c0983..6baec7653f19 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -563,6 +563,64 @@ SYM_CODE_START(\asmsym)
> .Lfrom_usermode_switch_stack_\@:
> idtentry_body user_\cfunc, has_error_code=1
>
> +_ASM_NOKPROBE(\asmsym)
> +SYM_CODE_END(\asmsym)
> +.endm
> +/*
> + * idtentry_hv - Macro to generate entry stub for #HV
> + * @vector: Vector number
> + * @asmsym: ASM symbol for the entry point
> + * @cfunc: C function to be called
> + *
> + * The macro emits code to set up the kernel context for #HV. The #HV handler
> + * runs on an IST stack and needs to be able to support nested #HV exceptions.
> + *
> + * To make this work the #HV entry code tries its best to pretend it doesn't use
> + * an IST stack by switching to the task stack if coming from user-space (which
> + * includes early SYSCALL entry path) or back to the stack in the IRET frame if
> + * entered from kernel-mode.
> + *
> + * If entered from kernel-mode the return stack is validated first, and if it is
> + * not safe to use (e.g. because it points to the entry stack) the #HV handler
> + * will switch to a fall-back stack (HV2) and call a special handler function.
> + *
> + * The macro is only used for one vector, but it is planned to be extended in
> + * the future for the #HV exception.
seems you did not remove this line in the comment.
> + */
> +.macro idtentry_hv vector asmsym cfunc
> +SYM_CODE_START(\asmsym)
> + UNWIND_HINT_IRET_REGS
> + ASM_CLAC
Did you get a chance to review the new instructions
added at the start similar to idtentry_vc and comments
added assuggested here?
https://lore.kernel.org/lkml/16e50239-39b2-4fb4-5110-18f13ba197fe@amd.com/
> + pushq $-1 /* ORIG_RAX: no syscall to restart */
> +
> + testb $3, CS-ORIG_RAX(%rsp)
> + jnz .Lfrom_usermode_switch_stack_\@
> +
> + call paranoid_entry
> +
> + UNWIND_HINT_REGS
> +
> + /*
> + * Switch off the IST stack to make it free for nested exceptions.
> + */
> + movq %rsp, %rdi /* pt_regs pointer */
> + call hv_switch_off_ist
> + movq %rax, %rsp /* Switch to new stack */
> +
> + UNWIND_HINT_REGS
> +
> + /* Update pt_regs */
> + movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/
> + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
> +
> + movq %rsp, %rdi /* pt_regs pointer */
> + call kernel_\cfunc
> +
> + jmp paranoid_exit
> +
> +.Lfrom_usermode_switch_stack_\@:
> + idtentry_body user_\cfunc, has_error_code=1
> +
> _ASM_NOKPROBE(\asmsym)
> SYM_CODE_END(\asmsym)
> .endm
> diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
> index 462fc34f1317..2186ed601b4a 100644
> --- a/arch/x86/include/asm/cpu_entry_area.h
> +++ b/arch/x86/include/asm/cpu_entry_area.h
> @@ -30,6 +30,10 @@
> char VC_stack[optional_stack_size]; \
> char VC2_stack_guard[guardsize]; \
> char VC2_stack[optional_stack_size]; \
> + char HV_stack_guard[guardsize]; \
> + char HV_stack[optional_stack_size]; \
> + char HV2_stack_guard[guardsize]; \
> + char HV2_stack[optional_stack_size]; \
> char IST_top_guard[guardsize]; \
>
> /* The exception stacks' physical storage. No guard pages required */
> @@ -52,6 +56,8 @@ enum exception_stack_ordering {
> ESTACK_MCE,
> ESTACK_VC,
> ESTACK_VC2,
> + ESTACK_HV,
> + ESTACK_HV2,
> N_EXCEPTION_STACKS
> };
>
> diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
> index 72184b0b2219..652fea10d377 100644
> --- a/arch/x86/include/asm/idtentry.h
> +++ b/arch/x86/include/asm/idtentry.h
> @@ -317,6 +317,19 @@ static __always_inline void __##func(struct pt_regs *regs)
> __visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \
> __visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code)
>
> +
> +/**
> + * DECLARE_IDTENTRY_HV - Declare functions for the HV entry point
> + * @vector: Vector number (ignored for C)
> + * @func: Function name of the entry point
> + *
> + * Maps to DECLARE_IDTENTRY_RAW, but declares also the user C handler.
> + */
> +#define DECLARE_IDTENTRY_HV(vector, func) \
> + DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \
> + __visible noinstr void kernel_##func(struct pt_regs *regs); \
> + __visible noinstr void user_##func(struct pt_regs *regs)
> +
> /**
> * DEFINE_IDTENTRY_IST - Emit code for IST entry points
> * @func: Function name of the entry point
> @@ -376,6 +389,26 @@ static __always_inline void __##func(struct pt_regs *regs)
> #define DEFINE_IDTENTRY_VC_USER(func) \
> DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func)
>
> +/**
> + * DEFINE_IDTENTRY_HV_KERNEL - Emit code for HV injection handler
> + * when raised from kernel mode
> + * @func: Function name of the entry point
> + *
> + * Maps to DEFINE_IDTENTRY_RAW
> + */
> +#define DEFINE_IDTENTRY_HV_KERNEL(func) \
> + DEFINE_IDTENTRY_RAW(kernel_##func)
> +
> +/**
> + * DEFINE_IDTENTRY_HV_USER - Emit code for HV injection handler
> + * when raised from user mode
> + * @func: Function name of the entry point
> + *
> + * Maps to DEFINE_IDTENTRY_RAW
> + */
> +#define DEFINE_IDTENTRY_HV_USER(func) \
> + DEFINE_IDTENTRY_RAW(user_##func)
> +
> #else /* CONFIG_X86_64 */
>
> /**
> @@ -465,6 +498,9 @@ __visible noinstr void func(struct pt_regs *regs, \
> # define DECLARE_IDTENTRY_VC(vector, func) \
> idtentry_vc vector asm_##func func
>
> +# define DECLARE_IDTENTRY_HV(vector, func) \
> + idtentry_hv vector asm_##func func
> +
> #else
> # define DECLARE_IDTENTRY_MCE(vector, func) \
> DECLARE_IDTENTRY(vector, func)
> @@ -622,9 +658,10 @@ DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF, xenpv_exc_double_fault);
> DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP, exc_control_protection);
> #endif
>
> -/* #VC */
> +/* #VC & #HV */
> #ifdef CONFIG_AMD_MEM_ENCRYPT
> DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication);
> +DECLARE_IDTENTRY_HV(X86_TRAP_HV, exc_hv_injection);
> #endif
>
> #ifdef CONFIG_XEN_PV
> diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
> index e9e2c3ba5923..0bd7dab676c5 100644
> --- a/arch/x86/include/asm/page_64_types.h
> +++ b/arch/x86/include/asm/page_64_types.h
> @@ -29,6 +29,7 @@
> #define IST_INDEX_DB 2
> #define IST_INDEX_MCE 3
> #define IST_INDEX_VC 4
> +#define IST_INDEX_HV 5
>
> /*
> * Set __PAGE_OFFSET to the most negative possible address +
> diff --git a/arch/x86/include/asm/trapnr.h b/arch/x86/include/asm/trapnr.h
> index f5d2325aa0b7..c6583631cecb 100644
> --- a/arch/x86/include/asm/trapnr.h
> +++ b/arch/x86/include/asm/trapnr.h
> @@ -26,6 +26,7 @@
> #define X86_TRAP_XF 19 /* SIMD Floating-Point Exception */
> #define X86_TRAP_VE 20 /* Virtualization Exception */
> #define X86_TRAP_CP 21 /* Control Protection Exception */
> +#define X86_TRAP_HV 28 /* HV injected exception in SNP restricted mode */
> #define X86_TRAP_VC 29 /* VMM Communication Exception */
> #define X86_TRAP_IRET 32 /* IRET Exception */
>
> diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
> index 47ecfff2c83d..6795d3e517d6 100644
> --- a/arch/x86/include/asm/traps.h
> +++ b/arch/x86/include/asm/traps.h
> @@ -16,6 +16,7 @@ asmlinkage __visible notrace
> struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs);
> void __init trap_init(void);
> asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
> +asmlinkage __visible noinstr struct pt_regs *hv_switch_off_ist(struct pt_regs *eregs);
> #endif
>
> extern bool ibt_selftest(void);
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index 9cfca3d7d0e2..e48a489777ec 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -2162,6 +2162,7 @@ static inline void tss_setup_ist(struct tss_struct *tss)
> tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
> /* Only mapped when SEV-ES is active */
> tss->x86_tss.ist[IST_INDEX_VC] = __this_cpu_ist_top_va(VC);
> + tss->x86_tss.ist[IST_INDEX_HV] = __this_cpu_ist_top_va(HV);
> }
>
> #else /* CONFIG_X86_64 */
> diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
> index f05339fee778..6d8f8864810c 100644
> --- a/arch/x86/kernel/dumpstack_64.c
> +++ b/arch/x86/kernel/dumpstack_64.c
> @@ -26,11 +26,14 @@ static const char * const exception_stack_names[] = {
> [ ESTACK_MCE ] = "#MC",
> [ ESTACK_VC ] = "#VC",
> [ ESTACK_VC2 ] = "#VC2",
> + [ ESTACK_HV ] = "#HV",
> + [ ESTACK_HV2 ] = "#HV2",
> +
> };
>
> const char *stack_type_name(enum stack_type type)
> {
> - BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
> + BUILD_BUG_ON(N_EXCEPTION_STACKS != 8);
>
> if (type == STACK_TYPE_TASK)
> return "TASK";
> @@ -89,6 +92,8 @@ struct estack_pages estack_pages[CEA_ESTACK_PAGES] ____cacheline_aligned = {
> EPAGERANGE(MCE),
> EPAGERANGE(VC),
> EPAGERANGE(VC2),
> + EPAGERANGE(HV),
> + EPAGERANGE(HV2),
> };
>
> static __always_inline bool in_exception_stack(unsigned long *stack, struct stack_info *info)
> @@ -98,7 +103,7 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac
> struct pt_regs *regs;
> unsigned int k;
>
> - BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
> + BUILD_BUG_ON(N_EXCEPTION_STACKS != 8);
>
> begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
> /*
> diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
> index a58c6bc1cd68..48c0a7e1dbcb 100644
> --- a/arch/x86/kernel/idt.c
> +++ b/arch/x86/kernel/idt.c
> @@ -113,6 +113,7 @@ static const __initconst struct idt_data def_idts[] = {
>
> #ifdef CONFIG_AMD_MEM_ENCRYPT
> ISTG(X86_TRAP_VC, asm_exc_vmm_communication, IST_INDEX_VC),
> + ISTG(X86_TRAP_HV, asm_exc_hv_injection, IST_INDEX_HV),
> #endif
>
> SYSG(X86_TRAP_OF, asm_exc_overflow),
> diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
> index 679026a640ef..a8862a2eff67 100644
> --- a/arch/x86/kernel/sev.c
> +++ b/arch/x86/kernel/sev.c
> @@ -2004,6 +2004,59 @@ DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
> irqentry_exit_to_user_mode(regs);
> }
>
> +static bool hv_raw_handle_exception(struct pt_regs *regs)
> +{
> + return false;
> +}
> +
> +static __always_inline bool on_hv_fallback_stack(struct pt_regs *regs)
> +{
> + unsigned long sp = (unsigned long)regs;
> +
> + return (sp >= __this_cpu_ist_bottom_va(HV2) && sp < __this_cpu_ist_top_va(HV2));
> +}
> +
> +DEFINE_IDTENTRY_HV_USER(exc_hv_injection)
> +{
> + irqentry_enter_from_user_mode(regs);
> + instrumentation_begin();
> +
> + if (!hv_raw_handle_exception(regs)) {
> + /*
> + * Do not kill the machine if user-space triggered the
> + * exception. Send SIGBUS instead and let user-space deal
> + * with it.
> + */
> + force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
> + }
> +
> + instrumentation_end();
> + irqentry_exit_to_user_mode(regs);
> +}
> +
> +DEFINE_IDTENTRY_HV_KERNEL(exc_hv_injection)
> +{
> + irqentry_state_t irq_state;
> +
> + irq_state = irqentry_nmi_enter(regs);
> + instrumentation_begin();
> +
> + if (!hv_raw_handle_exception(regs)) {
> + pr_emerg("PANIC: Unhandled #HV exception in kernel space\n");
> +
> + /* Show some debug info */
> + show_regs(regs);
> +
> + /* Ask hypervisor to sev_es_terminate */
> + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
> +
> + panic("Returned from Terminate-Request to Hypervisor\n");
> + }
> +
> + instrumentation_end();
> + irqentry_nmi_exit(regs, irq_state);
> +}
> +
> bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
> {
> unsigned long exit_code = regs->orig_ax;
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index d317dc3d06a3..d29debec8134 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -905,6 +905,46 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
>
> return regs_ret;
> }
> +
> +asmlinkage __visible noinstr struct pt_regs *hv_switch_off_ist(struct pt_regs *regs)
> +{
> + unsigned long sp, *stack;
> + struct stack_info info;
> + struct pt_regs *regs_ret;
> +
> + /*
> + * In the SYSCALL entry path the RSP value comes from user-space - don't
> + * trust it and switch to the current kernel stack
> + */
> + if (ip_within_syscall_gap(regs)) {
> + sp = this_cpu_read(pcpu_hot.top_of_stack);
> + goto sync;
> + }
> +
> + /*
> + * From here on the RSP value is trusted. Now check whether entry
> + * happened from a safe stack. Not safe are the entry or unknown stacks,
> + * use the fall-back stack instead in this case.
> + */
> + sp = regs->sp;
> + stack = (unsigned long *)sp;
> +
> + if (!get_stack_info_noinstr(stack, current, &info) || info.type == STACK_TYPE_ENTRY ||
> + info.type > STACK_TYPE_EXCEPTION_LAST)
> + sp = __this_cpu_ist_top_va(HV2);
> +sync:
> + /*
> + * Found a safe stack - switch to it as if the entry didn't happen via
> + * IST stack. The code below only copies pt_regs, the real switch happens
> + * in assembly code.
> + */
> + sp = ALIGN_DOWN(sp, 8) - sizeof(*regs_ret);
> +
> + regs_ret = (struct pt_regs *)sp;
> + *regs_ret = *regs;
> +
> + return regs_ret;
> +}
> #endif
>
> asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs)
> diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
> index 7316a8224259..3ec844cef652 100644
> --- a/arch/x86/mm/cpu_entry_area.c
> +++ b/arch/x86/mm/cpu_entry_area.c
> @@ -153,6 +153,8 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu)
> if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
> cea_map_stack(VC);
> cea_map_stack(VC2);
> + cea_map_stack(HV);
> + cea_map_stack(HV2);
> }
> }
> }
On 1/23/2023 3:33 PM, Gupta, Pankaj wrote:
>
>> + */
>> +.macro idtentry_hv vector asmsym cfunc
>> +SYM_CODE_START(\asmsym)
>> + UNWIND_HINT_IRET_REGS
>> + ASM_CLAC
>
> Did you get a chance to review the new instructions
> added at the start similar to idtentry_vc and comments
> added assuggested here?
>
> https://lore.kernel.org/lkml/16e50239-39b2-4fb4-5110-18f13ba197fe@amd.com/
Hi Pankaj:
Thanks for your reminder. Yes, CLD should be add after ASM_CLAC. Will
fix it.
On 2/3/2023 8:27 AM, Tianyu Lan wrote:
> On 1/23/2023 3:33 PM, Gupta, Pankaj wrote:
>>
>>> + */
>>> +.macro idtentry_hv vector asmsym cfunc
>>> +SYM_CODE_START(\asmsym)
>>> + UNWIND_HINT_IRET_REGS
>>> + ASM_CLAC
>>
>> Did you get a chance to review the new instructions
>> added at the start similar to idtentry_vc and comments
>> added assuggested here?
>>
>> https://lore.kernel.org/lkml/16e50239-39b2-4fb4-5110-18f13ba197fe@amd.com/
>
> Hi Pankaj:
> Thanks for your reminder. Yes, CLD should be add after ASM_CLAC.
> Will fix it.
Also it looks ENDBR also needs to be added before ASM_CLAC? as I also
get this:
vmlinux.o: warning: objtool: asm_exc_hv_injection+0x0:
UNWIND_HINT_IRET_REGS without ENDBR
vmlinux.o: warning: objtool: ibt_selftest+0x11: sibling call from
callable instruction with modified stack frame
vmlinux.o: warning: objtool: ibt_selftest+0x1e: return with modified
stack frame
vmlinux.o: warning: objtool: def_idts+0x1d8: data relocation to !ENDBR:
asm_exc_hv_injection+0x0
Thanks,
Pankaj
On 1/22/2023 3:46 AM, Tianyu Lan wrote:
> From: Tianyu Lan <tiala@microsoft.com>
>
> Add a #HV exception handler that uses IST stack.
>
> Signed-off-by: Tianyu Lan <tiala@microsoft.com>
> ---
> Change since RFC V2:
> * Remove unnecessary line in the change log.
> ---
> arch/x86/entry/entry_64.S | 58 +++++++++++++++++++++++++++
> arch/x86/include/asm/cpu_entry_area.h | 6 +++
> arch/x86/include/asm/idtentry.h | 39 +++++++++++++++++-
> arch/x86/include/asm/page_64_types.h | 1 +
> arch/x86/include/asm/trapnr.h | 1 +
> arch/x86/include/asm/traps.h | 1 +
> arch/x86/kernel/cpu/common.c | 1 +
> arch/x86/kernel/dumpstack_64.c | 9 ++++-
> arch/x86/kernel/idt.c | 1 +
> arch/x86/kernel/sev.c | 53 ++++++++++++++++++++++++
> arch/x86/kernel/traps.c | 40 ++++++++++++++++++
> arch/x86/mm/cpu_entry_area.c | 2 +
> 12 files changed, 209 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index 15739a2c0983..6baec7653f19 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -563,6 +563,64 @@ SYM_CODE_START(\asmsym)
> .Lfrom_usermode_switch_stack_\@:
> idtentry_body user_\cfunc, has_error_code=1
>
> +_ASM_NOKPROBE(\asmsym)
> +SYM_CODE_END(\asmsym)
> +.endm
> +/*
> + * idtentry_hv - Macro to generate entry stub for #HV
> + * @vector: Vector number
> + * @asmsym: ASM symbol for the entry point
> + * @cfunc: C function to be called
> + *
> + * The macro emits code to set up the kernel context for #HV. The #HV handler
> + * runs on an IST stack and needs to be able to support nested #HV exceptions.
> + *
> + * To make this work the #HV entry code tries its best to pretend it doesn't use
> + * an IST stack by switching to the task stack if coming from user-space (which
> + * includes early SYSCALL entry path) or back to the stack in the IRET frame if
> + * entered from kernel-mode.
> + *
> + * If entered from kernel-mode the return stack is validated first, and if it is
> + * not safe to use (e.g. because it points to the entry stack) the #HV handler
> + * will switch to a fall-back stack (HV2) and call a special handler function.
> + *
> + * The macro is only used for one vector, but it is planned to be extended in
> + * the future for the #HV exception.
> + */
> +.macro idtentry_hv vector asmsym cfunc
> +SYM_CODE_START(\asmsym)
> + UNWIND_HINT_IRET_REGS
> + ASM_CLAC
> + pushq $-1 /* ORIG_RAX: no syscall to restart */
> +
> + testb $3, CS-ORIG_RAX(%rsp)
> + jnz .Lfrom_usermode_switch_stack_\@
> +
> + call paranoid_entry
> +
> + UNWIND_HINT_REGS
> +
> + /*
> + * Switch off the IST stack to make it free for nested exceptions.
> + */
> + movq %rsp, %rdi /* pt_regs pointer */
> + call hv_switch_off_ist
> + movq %rax, %rsp /* Switch to new stack */
> +
We need "ENCODE_FRAME_POINTER" similar to "vc_switch_off_ist" here as we
are switching stack?
> + UNWIND_HINT_REGS
> +
> + /* Update pt_regs */
> + movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/
> + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
> +
> + movq %rsp, %rdi /* pt_regs pointer */
> + call kernel_\cfunc
> +
> + jmp paranoid_exit
> +
> +.Lfrom_usermode_switch_stack_\@:
> + idtentry_body user_\cfunc, has_error_code=1
> +
> _ASM_NOKPROBE(\asmsym)
> SYM_CODE_END(\asmsym)
> .endm
> diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
> index 462fc34f1317..2186ed601b4a 100644
> --- a/arch/x86/include/asm/cpu_entry_area.h
> +++ b/arch/x86/include/asm/cpu_entry_area.h
> @@ -30,6 +30,10 @@
> char VC_stack[optional_stack_size]; \
> char VC2_stack_guard[guardsize]; \
> char VC2_stack[optional_stack_size]; \
> + char HV_stack_guard[guardsize]; \
> + char HV_stack[optional_stack_size]; \
> + char HV2_stack_guard[guardsize]; \
> + char HV2_stack[optional_stack_size]; \
> char IST_top_guard[guardsize]; \
>
> /* The exception stacks' physical storage. No guard pages required */
> @@ -52,6 +56,8 @@ enum exception_stack_ordering {
> ESTACK_MCE,
> ESTACK_VC,
> ESTACK_VC2,
> + ESTACK_HV,
> + ESTACK_HV2,
> N_EXCEPTION_STACKS
> };
>
> diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
> index 72184b0b2219..652fea10d377 100644
> --- a/arch/x86/include/asm/idtentry.h
> +++ b/arch/x86/include/asm/idtentry.h
> @@ -317,6 +317,19 @@ static __always_inline void __##func(struct pt_regs *regs)
> __visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \
> __visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code)
>
> +
> +/**
> + * DECLARE_IDTENTRY_HV - Declare functions for the HV entry point
> + * @vector: Vector number (ignored for C)
> + * @func: Function name of the entry point
> + *
> + * Maps to DECLARE_IDTENTRY_RAW, but declares also the user C handler.
> + */
> +#define DECLARE_IDTENTRY_HV(vector, func) \
> + DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \
> + __visible noinstr void kernel_##func(struct pt_regs *regs); \
> + __visible noinstr void user_##func(struct pt_regs *regs)
> +
> /**
> * DEFINE_IDTENTRY_IST - Emit code for IST entry points
> * @func: Function name of the entry point
> @@ -376,6 +389,26 @@ static __always_inline void __##func(struct pt_regs *regs)
> #define DEFINE_IDTENTRY_VC_USER(func) \
> DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func)
>
> +/**
> + * DEFINE_IDTENTRY_HV_KERNEL - Emit code for HV injection handler
> + * when raised from kernel mode
> + * @func: Function name of the entry point
> + *
> + * Maps to DEFINE_IDTENTRY_RAW
> + */
> +#define DEFINE_IDTENTRY_HV_KERNEL(func) \
> + DEFINE_IDTENTRY_RAW(kernel_##func)
> +
> +/**
> + * DEFINE_IDTENTRY_HV_USER - Emit code for HV injection handler
> + * when raised from user mode
> + * @func: Function name of the entry point
> + *
> + * Maps to DEFINE_IDTENTRY_RAW
> + */
> +#define DEFINE_IDTENTRY_HV_USER(func) \
> + DEFINE_IDTENTRY_RAW(user_##func)
> +
> #else /* CONFIG_X86_64 */
>
> /**
> @@ -465,6 +498,9 @@ __visible noinstr void func(struct pt_regs *regs, \
> # define DECLARE_IDTENTRY_VC(vector, func) \
> idtentry_vc vector asm_##func func
>
> +# define DECLARE_IDTENTRY_HV(vector, func) \
> + idtentry_hv vector asm_##func func
> +
> #else
> # define DECLARE_IDTENTRY_MCE(vector, func) \
> DECLARE_IDTENTRY(vector, func)
> @@ -622,9 +658,10 @@ DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF, xenpv_exc_double_fault);
> DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP, exc_control_protection);
> #endif
>
> -/* #VC */
> +/* #VC & #HV */
> #ifdef CONFIG_AMD_MEM_ENCRYPT
> DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication);
> +DECLARE_IDTENTRY_HV(X86_TRAP_HV, exc_hv_injection);
> #endif
>
> #ifdef CONFIG_XEN_PV
> diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
> index e9e2c3ba5923..0bd7dab676c5 100644
> --- a/arch/x86/include/asm/page_64_types.h
> +++ b/arch/x86/include/asm/page_64_types.h
> @@ -29,6 +29,7 @@
> #define IST_INDEX_DB 2
> #define IST_INDEX_MCE 3
> #define IST_INDEX_VC 4
> +#define IST_INDEX_HV 5
>
> /*
> * Set __PAGE_OFFSET to the most negative possible address +
> diff --git a/arch/x86/include/asm/trapnr.h b/arch/x86/include/asm/trapnr.h
> index f5d2325aa0b7..c6583631cecb 100644
> --- a/arch/x86/include/asm/trapnr.h
> +++ b/arch/x86/include/asm/trapnr.h
> @@ -26,6 +26,7 @@
> #define X86_TRAP_XF 19 /* SIMD Floating-Point Exception */
> #define X86_TRAP_VE 20 /* Virtualization Exception */
> #define X86_TRAP_CP 21 /* Control Protection Exception */
> +#define X86_TRAP_HV 28 /* HV injected exception in SNP restricted mode */
> #define X86_TRAP_VC 29 /* VMM Communication Exception */
> #define X86_TRAP_IRET 32 /* IRET Exception */
>
> diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
> index 47ecfff2c83d..6795d3e517d6 100644
> --- a/arch/x86/include/asm/traps.h
> +++ b/arch/x86/include/asm/traps.h
> @@ -16,6 +16,7 @@ asmlinkage __visible notrace
> struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs);
> void __init trap_init(void);
> asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
> +asmlinkage __visible noinstr struct pt_regs *hv_switch_off_ist(struct pt_regs *eregs);
> #endif
>
> extern bool ibt_selftest(void);
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index 9cfca3d7d0e2..e48a489777ec 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -2162,6 +2162,7 @@ static inline void tss_setup_ist(struct tss_struct *tss)
> tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
> /* Only mapped when SEV-ES is active */
> tss->x86_tss.ist[IST_INDEX_VC] = __this_cpu_ist_top_va(VC);
> + tss->x86_tss.ist[IST_INDEX_HV] = __this_cpu_ist_top_va(HV);
> }
>
> #else /* CONFIG_X86_64 */
> diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
> index f05339fee778..6d8f8864810c 100644
> --- a/arch/x86/kernel/dumpstack_64.c
> +++ b/arch/x86/kernel/dumpstack_64.c
> @@ -26,11 +26,14 @@ static const char * const exception_stack_names[] = {
> [ ESTACK_MCE ] = "#MC",
> [ ESTACK_VC ] = "#VC",
> [ ESTACK_VC2 ] = "#VC2",
> + [ ESTACK_HV ] = "#HV",
> + [ ESTACK_HV2 ] = "#HV2",
> +
> };
>
> const char *stack_type_name(enum stack_type type)
> {
> - BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
> + BUILD_BUG_ON(N_EXCEPTION_STACKS != 8);
>
> if (type == STACK_TYPE_TASK)
> return "TASK";
> @@ -89,6 +92,8 @@ struct estack_pages estack_pages[CEA_ESTACK_PAGES] ____cacheline_aligned = {
> EPAGERANGE(MCE),
> EPAGERANGE(VC),
> EPAGERANGE(VC2),
> + EPAGERANGE(HV),
> + EPAGERANGE(HV2),
> };
>
> static __always_inline bool in_exception_stack(unsigned long *stack, struct stack_info *info)
> @@ -98,7 +103,7 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac
> struct pt_regs *regs;
> unsigned int k;
>
> - BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
> + BUILD_BUG_ON(N_EXCEPTION_STACKS != 8);
>
> begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
> /*
> diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
> index a58c6bc1cd68..48c0a7e1dbcb 100644
> --- a/arch/x86/kernel/idt.c
> +++ b/arch/x86/kernel/idt.c
> @@ -113,6 +113,7 @@ static const __initconst struct idt_data def_idts[] = {
>
> #ifdef CONFIG_AMD_MEM_ENCRYPT
> ISTG(X86_TRAP_VC, asm_exc_vmm_communication, IST_INDEX_VC),
> + ISTG(X86_TRAP_HV, asm_exc_hv_injection, IST_INDEX_HV),
> #endif
>
> SYSG(X86_TRAP_OF, asm_exc_overflow),
> diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
> index 679026a640ef..a8862a2eff67 100644
> --- a/arch/x86/kernel/sev.c
> +++ b/arch/x86/kernel/sev.c
> @@ -2004,6 +2004,59 @@ DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
> irqentry_exit_to_user_mode(regs);
> }
>
> +static bool hv_raw_handle_exception(struct pt_regs *regs)
> +{
> + return false;
> +}
> +
> +static __always_inline bool on_hv_fallback_stack(struct pt_regs *regs)
> +{
> + unsigned long sp = (unsigned long)regs;
> +
> + return (sp >= __this_cpu_ist_bottom_va(HV2) && sp < __this_cpu_ist_top_va(HV2));
> +}
> +
> +DEFINE_IDTENTRY_HV_USER(exc_hv_injection)
> +{
> + irqentry_enter_from_user_mode(regs);
> + instrumentation_begin();
> +
> + if (!hv_raw_handle_exception(regs)) {
> + /*
> + * Do not kill the machine if user-space triggered the
> + * exception. Send SIGBUS instead and let user-space deal
> + * with it.
> + */
> + force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
> + }
> +
> + instrumentation_end();
> + irqentry_exit_to_user_mode(regs);
> +}
> +
> +DEFINE_IDTENTRY_HV_KERNEL(exc_hv_injection)
> +{
> + irqentry_state_t irq_state;
> +
> + irq_state = irqentry_nmi_enter(regs);
> + instrumentation_begin();
> +
> + if (!hv_raw_handle_exception(regs)) {
> + pr_emerg("PANIC: Unhandled #HV exception in kernel space\n");
> +
> + /* Show some debug info */
> + show_regs(regs);
> +
> + /* Ask hypervisor to sev_es_terminate */
> + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
> +
> + panic("Returned from Terminate-Request to Hypervisor\n");
> + }
> +
> + instrumentation_end();
> + irqentry_nmi_exit(regs, irq_state);
> +}
> +
> bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
> {
> unsigned long exit_code = regs->orig_ax;
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index d317dc3d06a3..d29debec8134 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -905,6 +905,46 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
>
> return regs_ret;
> }
> +
> +asmlinkage __visible noinstr struct pt_regs *hv_switch_off_ist(struct pt_regs *regs)
> +{
> + unsigned long sp, *stack;
> + struct stack_info info;
> + struct pt_regs *regs_ret;
> +
> + /*
> + * In the SYSCALL entry path the RSP value comes from user-space - don't
> + * trust it and switch to the current kernel stack
> + */
> + if (ip_within_syscall_gap(regs)) {
> + sp = this_cpu_read(pcpu_hot.top_of_stack);
> + goto sync;
> + }
> +
> + /*
> + * From here on the RSP value is trusted. Now check whether entry
> + * happened from a safe stack. Not safe are the entry or unknown stacks,
> + * use the fall-back stack instead in this case.
> + */
> + sp = regs->sp;
> + stack = (unsigned long *)sp;
> +
> + if (!get_stack_info_noinstr(stack, current, &info) || info.type == STACK_TYPE_ENTRY ||
> + info.type > STACK_TYPE_EXCEPTION_LAST)
> + sp = __this_cpu_ist_top_va(HV2);
> +sync:
> + /*
> + * Found a safe stack - switch to it as if the entry didn't happen via
> + * IST stack. The code below only copies pt_regs, the real switch happens
> + * in assembly code.
> + */
> + sp = ALIGN_DOWN(sp, 8) - sizeof(*regs_ret);
> +
> + regs_ret = (struct pt_regs *)sp;
> + *regs_ret = *regs;
> +
> + return regs_ret;
> +}
> #endif
>
> asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs)
> diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
> index 7316a8224259..3ec844cef652 100644
> --- a/arch/x86/mm/cpu_entry_area.c
> +++ b/arch/x86/mm/cpu_entry_area.c
> @@ -153,6 +153,8 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu)
> if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
> cea_map_stack(VC);
> cea_map_stack(VC2);
> + cea_map_stack(HV);
> + cea_map_stack(HV2);
> }
> }
> }
On 3/9/2023 7:48 PM, Gupta, Pankaj wrote:
> On 1/22/2023 3:46 AM, Tianyu Lan wrote:
>> From: Tianyu Lan <tiala@microsoft.com>
>> + UNWIND_HINT_IRET_REGS
>> + ASM_CLAC
>> + pushq $-1 /* ORIG_RAX: no syscall to restart */
>> +
>> + testb $3, CS-ORIG_RAX(%rsp)
>> + jnz .Lfrom_usermode_switch_stack_\@
>> +
>> + call paranoid_entry
>> +
>> + UNWIND_HINT_REGS
>> +
>> + /*
>> + * Switch off the IST stack to make it free for nested exceptions.
>> + */
>> + movq %rsp, %rdi /* pt_regs pointer */
>> + call hv_switch_off_ist
>> + movq %rax, %rsp /* Switch to new stack */
>> +
>
> We need "ENCODE_FRAME_POINTER" similar to "vc_switch_off_ist" here as we
> are switching stack?
>
Agree. Will add it into the next version. Thanks.
On Sat, Jan 21, 2023 at 09:46:02PM -0500, Tianyu Lan wrote:
> From: Tianyu Lan <tiala@microsoft.com>
>
> Add a #HV exception handler that uses IST stack.
>
> Signed-off-by: Tianyu Lan <tiala@microsoft.com>
> ---
> Change since RFC V2:
> * Remove unnecessary line in the change log.
> ---
> arch/x86/entry/entry_64.S | 58 +++++++++++++++++++++++++++
> arch/x86/include/asm/cpu_entry_area.h | 6 +++
> arch/x86/include/asm/idtentry.h | 39 +++++++++++++++++-
> arch/x86/include/asm/page_64_types.h | 1 +
> arch/x86/include/asm/trapnr.h | 1 +
> arch/x86/include/asm/traps.h | 1 +
> arch/x86/kernel/cpu/common.c | 1 +
> arch/x86/kernel/dumpstack_64.c | 9 ++++-
> arch/x86/kernel/idt.c | 1 +
> arch/x86/kernel/sev.c | 53 ++++++++++++++++++++++++
> arch/x86/kernel/traps.c | 40 ++++++++++++++++++
> arch/x86/mm/cpu_entry_area.c | 2 +
> 12 files changed, 209 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index 15739a2c0983..6baec7653f19 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -563,6 +563,64 @@ SYM_CODE_START(\asmsym)
> .Lfrom_usermode_switch_stack_\@:
> idtentry_body user_\cfunc, has_error_code=1
>
> +_ASM_NOKPROBE(\asmsym)
> +SYM_CODE_END(\asmsym)
> +.endm
> +/*
> + * idtentry_hv - Macro to generate entry stub for #HV
> + * @vector: Vector number
> + * @asmsym: ASM symbol for the entry point
> + * @cfunc: C function to be called
> + *
> + * The macro emits code to set up the kernel context for #HV. The #HV handler
> + * runs on an IST stack and needs to be able to support nested #HV exceptions.
> + *
> + * To make this work the #HV entry code tries its best to pretend it doesn't use
> + * an IST stack by switching to the task stack if coming from user-space (which
> + * includes early SYSCALL entry path) or back to the stack in the IRET frame if
> + * entered from kernel-mode.
> + *
> + * If entered from kernel-mode the return stack is validated first, and if it is
> + * not safe to use (e.g. because it points to the entry stack) the #HV handler
> + * will switch to a fall-back stack (HV2) and call a special handler function.
> + *
> + * The macro is only used for one vector, but it is planned to be extended in
> + * the future for the #HV exception.
> + */
> +.macro idtentry_hv vector asmsym cfunc
> +SYM_CODE_START(\asmsym)
...
why is this so much duplicated code instead of sharing it with
idtentry_vc and all the facilities it does?
> + UNWIND_HINT_IRET_REGS
> + ASM_CLAC
> + pushq $-1 /* ORIG_RAX: no syscall to restart */
> +
> + testb $3, CS-ORIG_RAX(%rsp)
> + jnz .Lfrom_usermode_switch_stack_\@
> +
> + call paranoid_entry
> +
> + UNWIND_HINT_REGS
> +
> + /*
> + * Switch off the IST stack to make it free for nested exceptions.
> + */
> + movq %rsp, %rdi /* pt_regs pointer */
> + call hv_switch_off_ist
> + movq %rax, %rsp /* Switch to new stack */
> +
> + UNWIND_HINT_REGS
> +
> + /* Update pt_regs */
> + movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/
> + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
> +
> + movq %rsp, %rdi /* pt_regs pointer */
> + call kernel_\cfunc
> +
> + jmp paranoid_exit
> +
> +.Lfrom_usermode_switch_stack_\@:
> + idtentry_body user_\cfunc, has_error_code=1
> +
> _ASM_NOKPROBE(\asmsym)
> SYM_CODE_END(\asmsym)
> .endm
> diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
> index 462fc34f1317..2186ed601b4a 100644
> --- a/arch/x86/include/asm/cpu_entry_area.h
> +++ b/arch/x86/include/asm/cpu_entry_area.h
> @@ -30,6 +30,10 @@
> char VC_stack[optional_stack_size]; \
> char VC2_stack_guard[guardsize]; \
> char VC2_stack[optional_stack_size]; \
> + char HV_stack_guard[guardsize]; \
> + char HV_stack[optional_stack_size]; \
> + char HV2_stack_guard[guardsize]; \
> + char HV2_stack[optional_stack_size]; \
> char IST_top_guard[guardsize]; \
>
> /* The exception stacks' physical storage. No guard pages required */
> @@ -52,6 +56,8 @@ enum exception_stack_ordering {
> ESTACK_MCE,
> ESTACK_VC,
> ESTACK_VC2,
> + ESTACK_HV,
> + ESTACK_HV2,
> N_EXCEPTION_STACKS
Ditto.
And so on...
Please share code - not duplicate.
On 3/31/2023 11:57 PM, Borislav Petkov wrote:
>> + *
>> + * If entered from kernel-mode the return stack is validated first, and if it is
>> + * not safe to use (e.g. because it points to the entry stack) the #HV handler
>> + * will switch to a fall-back stack (HV2) and call a special handler function.
>> + *
>> + * The macro is only used for one vector, but it is planned to be extended in
>> + * the future for the #HV exception.
>> + */
>> +.macro idtentry_hv vector asmsym cfunc
>> +SYM_CODE_START(\asmsym)
> ...
>
> why is this so much duplicated code instead of sharing it with
> idtentry_vc and all the facilities it does?
>
Hi Boris:
#VC and #HV use different stack. I try reusing vc code path for #HV
doesn't work. I will continue to work on this direction and report back
later. In the RFC v4, I still keep the old version and other patches may
be reviewed in the parellel.
Thanks.
@@ -563,6 +563,64 @@ SYM_CODE_START(\asmsym)
.Lfrom_usermode_switch_stack_\@:
idtentry_body user_\cfunc, has_error_code=1
+_ASM_NOKPROBE(\asmsym)
+SYM_CODE_END(\asmsym)
+.endm
+/*
+ * idtentry_hv - Macro to generate entry stub for #HV
+ * @vector: Vector number
+ * @asmsym: ASM symbol for the entry point
+ * @cfunc: C function to be called
+ *
+ * The macro emits code to set up the kernel context for #HV. The #HV handler
+ * runs on an IST stack and needs to be able to support nested #HV exceptions.
+ *
+ * To make this work the #HV entry code tries its best to pretend it doesn't use
+ * an IST stack by switching to the task stack if coming from user-space (which
+ * includes early SYSCALL entry path) or back to the stack in the IRET frame if
+ * entered from kernel-mode.
+ *
+ * If entered from kernel-mode the return stack is validated first, and if it is
+ * not safe to use (e.g. because it points to the entry stack) the #HV handler
+ * will switch to a fall-back stack (HV2) and call a special handler function.
+ *
+ * The macro is only used for one vector, but it is planned to be extended in
+ * the future for the #HV exception.
+ */
+.macro idtentry_hv vector asmsym cfunc
+SYM_CODE_START(\asmsym)
+ UNWIND_HINT_IRET_REGS
+ ASM_CLAC
+ pushq $-1 /* ORIG_RAX: no syscall to restart */
+
+ testb $3, CS-ORIG_RAX(%rsp)
+ jnz .Lfrom_usermode_switch_stack_\@
+
+ call paranoid_entry
+
+ UNWIND_HINT_REGS
+
+ /*
+ * Switch off the IST stack to make it free for nested exceptions.
+ */
+ movq %rsp, %rdi /* pt_regs pointer */
+ call hv_switch_off_ist
+ movq %rax, %rsp /* Switch to new stack */
+
+ UNWIND_HINT_REGS
+
+ /* Update pt_regs */
+ movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/
+ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
+
+ movq %rsp, %rdi /* pt_regs pointer */
+ call kernel_\cfunc
+
+ jmp paranoid_exit
+
+.Lfrom_usermode_switch_stack_\@:
+ idtentry_body user_\cfunc, has_error_code=1
+
_ASM_NOKPROBE(\asmsym)
SYM_CODE_END(\asmsym)
.endm
@@ -30,6 +30,10 @@
char VC_stack[optional_stack_size]; \
char VC2_stack_guard[guardsize]; \
char VC2_stack[optional_stack_size]; \
+ char HV_stack_guard[guardsize]; \
+ char HV_stack[optional_stack_size]; \
+ char HV2_stack_guard[guardsize]; \
+ char HV2_stack[optional_stack_size]; \
char IST_top_guard[guardsize]; \
/* The exception stacks' physical storage. No guard pages required */
@@ -52,6 +56,8 @@ enum exception_stack_ordering {
ESTACK_MCE,
ESTACK_VC,
ESTACK_VC2,
+ ESTACK_HV,
+ ESTACK_HV2,
N_EXCEPTION_STACKS
};
@@ -317,6 +317,19 @@ static __always_inline void __##func(struct pt_regs *regs)
__visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \
__visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code)
+
+/**
+ * DECLARE_IDTENTRY_HV - Declare functions for the HV entry point
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Maps to DECLARE_IDTENTRY_RAW, but declares also the user C handler.
+ */
+#define DECLARE_IDTENTRY_HV(vector, func) \
+ DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \
+ __visible noinstr void kernel_##func(struct pt_regs *regs); \
+ __visible noinstr void user_##func(struct pt_regs *regs)
+
/**
* DEFINE_IDTENTRY_IST - Emit code for IST entry points
* @func: Function name of the entry point
@@ -376,6 +389,26 @@ static __always_inline void __##func(struct pt_regs *regs)
#define DEFINE_IDTENTRY_VC_USER(func) \
DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func)
+/**
+ * DEFINE_IDTENTRY_HV_KERNEL - Emit code for HV injection handler
+ * when raised from kernel mode
+ * @func: Function name of the entry point
+ *
+ * Maps to DEFINE_IDTENTRY_RAW
+ */
+#define DEFINE_IDTENTRY_HV_KERNEL(func) \
+ DEFINE_IDTENTRY_RAW(kernel_##func)
+
+/**
+ * DEFINE_IDTENTRY_HV_USER - Emit code for HV injection handler
+ * when raised from user mode
+ * @func: Function name of the entry point
+ *
+ * Maps to DEFINE_IDTENTRY_RAW
+ */
+#define DEFINE_IDTENTRY_HV_USER(func) \
+ DEFINE_IDTENTRY_RAW(user_##func)
+
#else /* CONFIG_X86_64 */
/**
@@ -465,6 +498,9 @@ __visible noinstr void func(struct pt_regs *regs, \
# define DECLARE_IDTENTRY_VC(vector, func) \
idtentry_vc vector asm_##func func
+# define DECLARE_IDTENTRY_HV(vector, func) \
+ idtentry_hv vector asm_##func func
+
#else
# define DECLARE_IDTENTRY_MCE(vector, func) \
DECLARE_IDTENTRY(vector, func)
@@ -622,9 +658,10 @@ DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF, xenpv_exc_double_fault);
DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP, exc_control_protection);
#endif
-/* #VC */
+/* #VC & #HV */
#ifdef CONFIG_AMD_MEM_ENCRYPT
DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication);
+DECLARE_IDTENTRY_HV(X86_TRAP_HV, exc_hv_injection);
#endif
#ifdef CONFIG_XEN_PV
@@ -29,6 +29,7 @@
#define IST_INDEX_DB 2
#define IST_INDEX_MCE 3
#define IST_INDEX_VC 4
+#define IST_INDEX_HV 5
/*
* Set __PAGE_OFFSET to the most negative possible address +
@@ -26,6 +26,7 @@
#define X86_TRAP_XF 19 /* SIMD Floating-Point Exception */
#define X86_TRAP_VE 20 /* Virtualization Exception */
#define X86_TRAP_CP 21 /* Control Protection Exception */
+#define X86_TRAP_HV 28 /* HV injected exception in SNP restricted mode */
#define X86_TRAP_VC 29 /* VMM Communication Exception */
#define X86_TRAP_IRET 32 /* IRET Exception */
@@ -16,6 +16,7 @@ asmlinkage __visible notrace
struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs);
void __init trap_init(void);
asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
+asmlinkage __visible noinstr struct pt_regs *hv_switch_off_ist(struct pt_regs *eregs);
#endif
extern bool ibt_selftest(void);
@@ -2162,6 +2162,7 @@ static inline void tss_setup_ist(struct tss_struct *tss)
tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
/* Only mapped when SEV-ES is active */
tss->x86_tss.ist[IST_INDEX_VC] = __this_cpu_ist_top_va(VC);
+ tss->x86_tss.ist[IST_INDEX_HV] = __this_cpu_ist_top_va(HV);
}
#else /* CONFIG_X86_64 */
@@ -26,11 +26,14 @@ static const char * const exception_stack_names[] = {
[ ESTACK_MCE ] = "#MC",
[ ESTACK_VC ] = "#VC",
[ ESTACK_VC2 ] = "#VC2",
+ [ ESTACK_HV ] = "#HV",
+ [ ESTACK_HV2 ] = "#HV2",
+
};
const char *stack_type_name(enum stack_type type)
{
- BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
+ BUILD_BUG_ON(N_EXCEPTION_STACKS != 8);
if (type == STACK_TYPE_TASK)
return "TASK";
@@ -89,6 +92,8 @@ struct estack_pages estack_pages[CEA_ESTACK_PAGES] ____cacheline_aligned = {
EPAGERANGE(MCE),
EPAGERANGE(VC),
EPAGERANGE(VC2),
+ EPAGERANGE(HV),
+ EPAGERANGE(HV2),
};
static __always_inline bool in_exception_stack(unsigned long *stack, struct stack_info *info)
@@ -98,7 +103,7 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac
struct pt_regs *regs;
unsigned int k;
- BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
+ BUILD_BUG_ON(N_EXCEPTION_STACKS != 8);
begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
/*
@@ -113,6 +113,7 @@ static const __initconst struct idt_data def_idts[] = {
#ifdef CONFIG_AMD_MEM_ENCRYPT
ISTG(X86_TRAP_VC, asm_exc_vmm_communication, IST_INDEX_VC),
+ ISTG(X86_TRAP_HV, asm_exc_hv_injection, IST_INDEX_HV),
#endif
SYSG(X86_TRAP_OF, asm_exc_overflow),
@@ -2004,6 +2004,59 @@ DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
irqentry_exit_to_user_mode(regs);
}
+static bool hv_raw_handle_exception(struct pt_regs *regs)
+{
+ return false;
+}
+
+static __always_inline bool on_hv_fallback_stack(struct pt_regs *regs)
+{
+ unsigned long sp = (unsigned long)regs;
+
+ return (sp >= __this_cpu_ist_bottom_va(HV2) && sp < __this_cpu_ist_top_va(HV2));
+}
+
+DEFINE_IDTENTRY_HV_USER(exc_hv_injection)
+{
+ irqentry_enter_from_user_mode(regs);
+ instrumentation_begin();
+
+ if (!hv_raw_handle_exception(regs)) {
+ /*
+ * Do not kill the machine if user-space triggered the
+ * exception. Send SIGBUS instead and let user-space deal
+ * with it.
+ */
+ force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
+ }
+
+ instrumentation_end();
+ irqentry_exit_to_user_mode(regs);
+}
+
+DEFINE_IDTENTRY_HV_KERNEL(exc_hv_injection)
+{
+ irqentry_state_t irq_state;
+
+ irq_state = irqentry_nmi_enter(regs);
+ instrumentation_begin();
+
+ if (!hv_raw_handle_exception(regs)) {
+ pr_emerg("PANIC: Unhandled #HV exception in kernel space\n");
+
+ /* Show some debug info */
+ show_regs(regs);
+
+ /* Ask hypervisor to sev_es_terminate */
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
+
+ panic("Returned from Terminate-Request to Hypervisor\n");
+ }
+
+ instrumentation_end();
+ irqentry_nmi_exit(regs, irq_state);
+}
+
bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
{
unsigned long exit_code = regs->orig_ax;
@@ -905,6 +905,46 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
return regs_ret;
}
+
+asmlinkage __visible noinstr struct pt_regs *hv_switch_off_ist(struct pt_regs *regs)
+{
+ unsigned long sp, *stack;
+ struct stack_info info;
+ struct pt_regs *regs_ret;
+
+ /*
+ * In the SYSCALL entry path the RSP value comes from user-space - don't
+ * trust it and switch to the current kernel stack
+ */
+ if (ip_within_syscall_gap(regs)) {
+ sp = this_cpu_read(pcpu_hot.top_of_stack);
+ goto sync;
+ }
+
+ /*
+ * From here on the RSP value is trusted. Now check whether entry
+ * happened from a safe stack. Not safe are the entry or unknown stacks,
+ * use the fall-back stack instead in this case.
+ */
+ sp = regs->sp;
+ stack = (unsigned long *)sp;
+
+ if (!get_stack_info_noinstr(stack, current, &info) || info.type == STACK_TYPE_ENTRY ||
+ info.type > STACK_TYPE_EXCEPTION_LAST)
+ sp = __this_cpu_ist_top_va(HV2);
+sync:
+ /*
+ * Found a safe stack - switch to it as if the entry didn't happen via
+ * IST stack. The code below only copies pt_regs, the real switch happens
+ * in assembly code.
+ */
+ sp = ALIGN_DOWN(sp, 8) - sizeof(*regs_ret);
+
+ regs_ret = (struct pt_regs *)sp;
+ *regs_ret = *regs;
+
+ return regs_ret;
+}
#endif
asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs)
@@ -153,6 +153,8 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu)
if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
cea_map_stack(VC);
cea_map_stack(VC2);
+ cea_map_stack(HV);
+ cea_map_stack(HV2);
}
}
}