[v2,2/6] x86/entry/64: Convert SYSRET validation tests to C

Message ID 20230721161018.50214-3-brgerst@gmail.com
State New
Headers
Series x86: Clean up fast syscall return validation |

Commit Message

Brian Gerst July 21, 2023, 4:10 p.m. UTC
  Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/entry/common.c        | 40 ++++++++++++++++++++++++-
 arch/x86/entry/entry_64.S      | 55 ++--------------------------------
 arch/x86/include/asm/syscall.h |  2 +-
 3 files changed, 42 insertions(+), 55 deletions(-)
  

Comments

Li, Xin3 July 23, 2023, 9:53 a.m. UTC | #1
> @@ -84,6 +85,43 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int
> nr)
> 
>  	instrumentation_end();
>  	syscall_exit_to_user_mode(regs);

Would it be better to make the following code a new function?

And then the similar changes in patch 6 could be merged into the new
function with #ifdef CONFIG_X86_64.

> +
> +	/*
> +	 * Check that the register state is valid for using SYSRET to exit
> +	 * to userspace.  Otherwise use the slower but fully capable IRET
> +	 * exit path.
> +	 */
> +
> +	/* XEN PV guests always use IRET path */
> +	if (cpu_feature_enabled(X86_FEATURE_XENPV))
> +		return false;
> +
> +	/* SYSRET requires RCX == RIP and R11 == EFLAGS */
> +	if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
> +		return false;
> +
> +	/* CS and SS must match the values set in MSR_STAR */
> +	if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
> +		return false;
> +
> +	/*
> +	 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> +	 * in kernel space.  This essentially lets the user take over
> +	 * the kernel, since userspace controls RSP.
> +	 */
> +	if (unlikely(!__is_canonical_address(regs->ip, __VIRTUAL_MASK_SHIFT +
> 1)))
> +		return false;
> +
> +	/*
> +	 * SYSRET cannot restore RF.  It can restore TF, but unlike IRET,
> +	 * restoring TF results in a trap from userspace immediately after
> +	 * SYSRET.
> +	 */
> +	if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
> +		return false;
> +
> +	/* Use SYSRET to exit to userspace */
> +	return true;
>  }
>  #endif
>
  
Brian Gerst July 23, 2023, 11:17 a.m. UTC | #2
On Sun, Jul 23, 2023 at 5:53 AM Li, Xin3 <xin3.li@intel.com> wrote:
>
>
> > @@ -84,6 +85,43 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int
> > nr)
> >
> >       instrumentation_end();
> >       syscall_exit_to_user_mode(regs);
>
> Would it be better to make the following code a new function?
>
> And then the similar changes in patch 6 could be merged into the new
> function with #ifdef CONFIG_X86_64.
>
> > +
> > +     /*
> > +      * Check that the register state is valid for using SYSRET to exit
> > +      * to userspace.  Otherwise use the slower but fully capable IRET
> > +      * exit path.
> > +      */
> > +
> > +     /* XEN PV guests always use IRET path */
> > +     if (cpu_feature_enabled(X86_FEATURE_XENPV))
> > +             return false;
> > +
> > +     /* SYSRET requires RCX == RIP and R11 == EFLAGS */
> > +     if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
> > +             return false;
> > +
> > +     /* CS and SS must match the values set in MSR_STAR */
> > +     if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
> > +             return false;
> > +
> > +     /*
> > +      * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> > +      * in kernel space.  This essentially lets the user take over
> > +      * the kernel, since userspace controls RSP.
> > +      */
> > +     if (unlikely(!__is_canonical_address(regs->ip, __VIRTUAL_MASK_SHIFT +
> > 1)))
> > +             return false;
> > +
> > +     /*
> > +      * SYSRET cannot restore RF.  It can restore TF, but unlike IRET,
> > +      * restoring TF results in a trap from userspace immediately after
> > +      * SYSRET.
> > +      */
> > +     if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
> > +             return false;
> > +
> > +     /* Use SYSRET to exit to userspace */
> > +     return true;
> >  }
> >  #endif
> >

The tests are similar but not enough to combine them.  If
IA32_EMULATION is enabled, both versions are needed so one copy of the
function with #ifdefs won't work..

Brian Gerst
  

Patch

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 6c2826417b33..cccdd18c8304 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -70,7 +70,8 @@  static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
 	return false;
 }
 
-__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
+/* Returns true to return using SYSRET, or false to use IRET */
+__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
 {
 	add_random_kstack_offset();
 	nr = syscall_enter_from_user_mode(regs, nr);
@@ -84,6 +85,43 @@  __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
 
 	instrumentation_end();
 	syscall_exit_to_user_mode(regs);
+
+	/*
+	 * Check that the register state is valid for using SYSRET to exit
+	 * to userspace.  Otherwise use the slower but fully capable IRET
+	 * exit path.
+	 */
+
+	/* XEN PV guests always use IRET path */
+	if (cpu_feature_enabled(X86_FEATURE_XENPV))
+		return false;
+
+	/* SYSRET requires RCX == RIP and R11 == EFLAGS */
+	if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
+		return false;
+
+	/* CS and SS must match the values set in MSR_STAR */
+	if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
+		return false;
+
+	/*
+	 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+	 * in kernel space.  This essentially lets the user take over
+	 * the kernel, since userspace controls RSP.
+	 */
+	if (unlikely(!__is_canonical_address(regs->ip, __VIRTUAL_MASK_SHIFT + 1)))
+		return false;
+
+	/*
+	 * SYSRET cannot restore RF.  It can restore TF, but unlike IRET,
+	 * restoring TF results in a trap from userspace immediately after
+	 * SYSRET.
+	 */
+	if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
+		return false;
+
+	/* Use SYSRET to exit to userspace */
+	return true;
 }
 #endif
 
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index c01776a51545..b1288e22cae8 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -123,60 +123,9 @@  SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
 	 * Try to use SYSRET instead of IRET if we're returning to
 	 * a completely clean 64-bit userspace context.  If we're not,
 	 * go to the slow exit path.
-	 * In the Xen PV case we must use iret anyway.
 	 */
-
-	ALTERNATIVE "", "jmp	swapgs_restore_regs_and_return_to_usermode", \
-		X86_FEATURE_XENPV
-
-	movq	RCX(%rsp), %rcx
-	movq	RIP(%rsp), %r11
-
-	cmpq	%rcx, %r11	/* SYSRET requires RCX == RIP */
-	jne	swapgs_restore_regs_and_return_to_usermode
-
-	/*
-	 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
-	 * in kernel space.  This essentially lets the user take over
-	 * the kernel, since userspace controls RSP.
-	 *
-	 * If width of "canonical tail" ever becomes variable, this will need
-	 * to be updated to remain correct on both old and new CPUs.
-	 *
-	 * Change top bits to match most significant bit (47th or 56th bit
-	 * depending on paging mode) in the address.
-	 */
-#ifdef CONFIG_X86_5LEVEL
-	ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
-		"shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
-#else
-	shl	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
-	sar	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
-#endif
-
-	/* If this changed %rcx, it was not canonical */
-	cmpq	%rcx, %r11
-	jne	swapgs_restore_regs_and_return_to_usermode
-
-	cmpq	$__USER_CS, CS(%rsp)		/* CS must match SYSRET */
-	jne	swapgs_restore_regs_and_return_to_usermode
-
-	movq	R11(%rsp), %r11
-	cmpq	%r11, EFLAGS(%rsp)		/* R11 == RFLAGS */
-	jne	swapgs_restore_regs_and_return_to_usermode
-
-	/*
-	 * SYSRET cannot restore RF.  It can restore TF, but unlike IRET,
-	 * restoring TF results in a trap from userspace immediately after
-	 * SYSRET.
-	 */
-	testq	$(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
-	jnz	swapgs_restore_regs_and_return_to_usermode
-
-	/* nothing to check for RSP */
-
-	cmpq	$__USER_DS, SS(%rsp)		/* SS must match SYSRET */
-	jne	swapgs_restore_regs_and_return_to_usermode
+	testb	%al, %al
+	jz	swapgs_restore_regs_and_return_to_usermode
 
 	/*
 	 * We win! This label is here just for ease of understanding
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 4fb36fba4b5a..be6c5515e0b9 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -126,7 +126,7 @@  static inline int syscall_get_arch(struct task_struct *task)
 		? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 }
 
-void do_syscall_64(struct pt_regs *regs, int nr);
+bool do_syscall_64(struct pt_regs *regs, int nr);
 
 #endif	/* CONFIG_X86_32 */