[2/6] x86/entry/64: Convert SYSRET validation tests to C

Message ID 20230718134446.168654-3-brgerst@gmail.com
State New
Headers
Series x86: Clean up fast syscall return validation |

Commit Message

Brian Gerst July 18, 2023, 1:44 p.m. UTC
  Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/entry/common.c        | 50 ++++++++++++++++++++++++++++++-
 arch/x86/entry/entry_64.S      | 55 ++--------------------------------
 arch/x86/include/asm/syscall.h |  2 +-
 3 files changed, 52 insertions(+), 55 deletions(-)
  

Comments

Mika Penttilä July 18, 2023, 2:16 p.m. UTC | #1
Hi,


On 18.7.2023 16.44, Brian Gerst wrote:
> Signed-off-by: Brian Gerst <brgerst@gmail.com>
> ---
>   arch/x86/entry/common.c        | 50 ++++++++++++++++++++++++++++++-
>   arch/x86/entry/entry_64.S      | 55 ++--------------------------------
>   arch/x86/include/asm/syscall.h |  2 +-
>   3 files changed, 52 insertions(+), 55 deletions(-)
> 
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index 6c2826417b33..afe79c3f1c5b 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -70,8 +70,12 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
>   	return false;
>   }
>   
> -__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
> +/* Returns true to return using SYSRET, or false to use IRET */
> +__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
>   {
> +	long rip;
> +	unsigned int shift_rip;
> +
>   	add_random_kstack_offset();
>   	nr = syscall_enter_from_user_mode(regs, nr);
>   
> @@ -84,6 +88,50 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
>   
>   	instrumentation_end();
>   	syscall_exit_to_user_mode(regs);
> +
> +	/*
> +	 * Check that the register state is valid for using SYSRET to exit
> +	 * to userspace.  Otherwise use the slower but fully capable IRET
> +	 * exit path.
> +	 */
> +
> +	/* XEN PV guests always use IRET path */
> +	if (cpu_feature_enabled(X86_FEATURE_XENPV))
> +		return false;
> +
> +	/* SYSRET requires RCX == RIP and R11 == EFLAGS */
> +	if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
> +		return false;
> +
> +	/* CS and SS must match the values set in MSR_STAR */
> +	if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
> +		return false;
> +
> +	/*
> +	 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> +	 * in kernel space.  This essentially lets the user take over
> +	 * the kernel, since userspace controls RSP.
> +	 *
> +	 * Change top bits to match most significant bit (47th or 56th bit
> +	 * depending on paging mode) in the address.
> +	 */
> +	shift_rip = (64 - __VIRTUAL_MASK_SHIFT + 1);

Should this be:

	shift_rip = (64 - __VIRTUAL_MASK_SHIFT - 1);
?

> +	rip = (long) regs->ip;
> +	rip <<= shift_rip;
> +	rip >>= shift_rip;
> +	if (unlikely((unsigned long) rip != regs->ip))
> +		return false;
> +
> +	/*
> +	 * SYSRET cannot restore RF.  It can restore TF, but unlike IRET,
> +	 * restoring TF results in a trap from userspace immediately after
> +	 * SYSRET.
> +	 */
> +	if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
> +		return false;
> +
> +	/* Use SYSRET to exit to userspace */
> +	return true;
>   }
>   #endif
>   
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index c01776a51545..b1288e22cae8 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -123,60 +123,9 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
>   	 * Try to use SYSRET instead of IRET if we're returning to
>   	 * a completely clean 64-bit userspace context.  If we're not,
>   	 * go to the slow exit path.
> -	 * In the Xen PV case we must use iret anyway.
>   	 */
> -
> -	ALTERNATIVE "", "jmp	swapgs_restore_regs_and_return_to_usermode", \
> -		X86_FEATURE_XENPV
> -
> -	movq	RCX(%rsp), %rcx
> -	movq	RIP(%rsp), %r11
> -
> -	cmpq	%rcx, %r11	/* SYSRET requires RCX == RIP */
> -	jne	swapgs_restore_regs_and_return_to_usermode
> -
> -	/*
> -	 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> -	 * in kernel space.  This essentially lets the user take over
> -	 * the kernel, since userspace controls RSP.
> -	 *
> -	 * If width of "canonical tail" ever becomes variable, this will need
> -	 * to be updated to remain correct on both old and new CPUs.
> -	 *
> -	 * Change top bits to match most significant bit (47th or 56th bit
> -	 * depending on paging mode) in the address.
> -	 */
> -#ifdef CONFIG_X86_5LEVEL
> -	ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
> -		"shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
> -#else
> -	shl	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
> -	sar	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
> -#endif
> -
> -	/* If this changed %rcx, it was not canonical */
> -	cmpq	%rcx, %r11
> -	jne	swapgs_restore_regs_and_return_to_usermode
> -
> -	cmpq	$__USER_CS, CS(%rsp)		/* CS must match SYSRET */
> -	jne	swapgs_restore_regs_and_return_to_usermode
> -
> -	movq	R11(%rsp), %r11
> -	cmpq	%r11, EFLAGS(%rsp)		/* R11 == RFLAGS */
> -	jne	swapgs_restore_regs_and_return_to_usermode
> -
> -	/*
> -	 * SYSRET cannot restore RF.  It can restore TF, but unlike IRET,
> -	 * restoring TF results in a trap from userspace immediately after
> -	 * SYSRET.
> -	 */
> -	testq	$(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
> -	jnz	swapgs_restore_regs_and_return_to_usermode
> -
> -	/* nothing to check for RSP */
> -
> -	cmpq	$__USER_DS, SS(%rsp)		/* SS must match SYSRET */
> -	jne	swapgs_restore_regs_and_return_to_usermode
> +	testb	%al, %al
> +	jz	swapgs_restore_regs_and_return_to_usermode
>   
>   	/*
>   	 * We win! This label is here just for ease of understanding
> diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
> index 4fb36fba4b5a..be6c5515e0b9 100644
> --- a/arch/x86/include/asm/syscall.h
> +++ b/arch/x86/include/asm/syscall.h
> @@ -126,7 +126,7 @@ static inline int syscall_get_arch(struct task_struct *task)
>   		? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
>   }
>   
> -void do_syscall_64(struct pt_regs *regs, int nr);
> +bool do_syscall_64(struct pt_regs *regs, int nr);
>   
>   #endif	/* CONFIG_X86_32 */
>   


--Mika
  
Brian Gerst July 18, 2023, 2:25 p.m. UTC | #2
On Tue, Jul 18, 2023 at 10:17 AM Mika Penttilä <mpenttil@redhat.com> wrote:
>
> Hi,
>
>
> On 18.7.2023 16.44, Brian Gerst wrote:
> > Signed-off-by: Brian Gerst <brgerst@gmail.com>
> > ---
> >   arch/x86/entry/common.c        | 50 ++++++++++++++++++++++++++++++-
> >   arch/x86/entry/entry_64.S      | 55 ++--------------------------------
> >   arch/x86/include/asm/syscall.h |  2 +-
> >   3 files changed, 52 insertions(+), 55 deletions(-)
> >
> > diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> > index 6c2826417b33..afe79c3f1c5b 100644
> > --- a/arch/x86/entry/common.c
> > +++ b/arch/x86/entry/common.c
> > @@ -70,8 +70,12 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
> >       return false;
> >   }
> >
> > -__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
> > +/* Returns true to return using SYSRET, or false to use IRET */
> > +__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
> >   {
> > +     long rip;
> > +     unsigned int shift_rip;
> > +
> >       add_random_kstack_offset();
> >       nr = syscall_enter_from_user_mode(regs, nr);
> >
> > @@ -84,6 +88,50 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
> >
> >       instrumentation_end();
> >       syscall_exit_to_user_mode(regs);
> > +
> > +     /*
> > +      * Check that the register state is valid for using SYSRET to exit
> > +      * to userspace.  Otherwise use the slower but fully capable IRET
> > +      * exit path.
> > +      */
> > +
> > +     /* XEN PV guests always use IRET path */
> > +     if (cpu_feature_enabled(X86_FEATURE_XENPV))
> > +             return false;
> > +
> > +     /* SYSRET requires RCX == RIP and R11 == EFLAGS */
> > +     if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
> > +             return false;
> > +
> > +     /* CS and SS must match the values set in MSR_STAR */
> > +     if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
> > +             return false;
> > +
> > +     /*
> > +      * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> > +      * in kernel space.  This essentially lets the user take over
> > +      * the kernel, since userspace controls RSP.
> > +      *
> > +      * Change top bits to match most significant bit (47th or 56th bit
> > +      * depending on paging mode) in the address.
> > +      */
> > +     shift_rip = (64 - __VIRTUAL_MASK_SHIFT + 1);
>
> Should this be:
>
>         shift_rip = (64 - __VIRTUAL_MASK_SHIFT - 1);
> ?

I removed a set of parentheses, which switched the sign from -1 to +1.
I could put it back if that's less confusing.

Brian Gerst
  
Mika Penttilä July 18, 2023, 2:49 p.m. UTC | #3
On 18.7.2023 17.25, Brian Gerst wrote:
> On Tue, Jul 18, 2023 at 10:17 AM Mika Penttilä <mpenttil@redhat.com> wrote:
>>
>> Hi,
>>
>>
>> On 18.7.2023 16.44, Brian Gerst wrote:
>>> Signed-off-by: Brian Gerst <brgerst@gmail.com>
>>> ---
>>>    arch/x86/entry/common.c        | 50 ++++++++++++++++++++++++++++++-
>>>    arch/x86/entry/entry_64.S      | 55 ++--------------------------------
>>>    arch/x86/include/asm/syscall.h |  2 +-
>>>    3 files changed, 52 insertions(+), 55 deletions(-)
>>>
>>> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
>>> index 6c2826417b33..afe79c3f1c5b 100644
>>> --- a/arch/x86/entry/common.c
>>> +++ b/arch/x86/entry/common.c
>>> @@ -70,8 +70,12 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
>>>        return false;
>>>    }
>>>
>>> -__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
>>> +/* Returns true to return using SYSRET, or false to use IRET */
>>> +__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
>>>    {
>>> +     long rip;
>>> +     unsigned int shift_rip;
>>> +
>>>        add_random_kstack_offset();
>>>        nr = syscall_enter_from_user_mode(regs, nr);
>>>
>>> @@ -84,6 +88,50 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
>>>
>>>        instrumentation_end();
>>>        syscall_exit_to_user_mode(regs);
>>> +
>>> +     /*
>>> +      * Check that the register state is valid for using SYSRET to exit
>>> +      * to userspace.  Otherwise use the slower but fully capable IRET
>>> +      * exit path.
>>> +      */
>>> +
>>> +     /* XEN PV guests always use IRET path */
>>> +     if (cpu_feature_enabled(X86_FEATURE_XENPV))
>>> +             return false;
>>> +
>>> +     /* SYSRET requires RCX == RIP and R11 == EFLAGS */
>>> +     if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
>>> +             return false;
>>> +
>>> +     /* CS and SS must match the values set in MSR_STAR */
>>> +     if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
>>> +             return false;
>>> +
>>> +     /*
>>> +      * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
>>> +      * in kernel space.  This essentially lets the user take over
>>> +      * the kernel, since userspace controls RSP.
>>> +      *
>>> +      * Change top bits to match most significant bit (47th or 56th bit
>>> +      * depending on paging mode) in the address.
>>> +      */
>>> +     shift_rip = (64 - __VIRTUAL_MASK_SHIFT + 1);
>>
>> Should this be:
>>
>>          shift_rip = (64 - __VIRTUAL_MASK_SHIFT - 1);
>> ?
> 
> I removed a set of parentheses, which switched the sign from -1 to +1.
> I could put it back if that's less confusing.
> 

I mean isn't it supposed to be:
shift_rip = (64 - 48) for 4 level, now it's
shift_rip = (64 - 46)

__VIRTUAL_MASK_SHIFT == 47


> Brian Gerst
>
  
Brian Gerst July 18, 2023, 3:21 p.m. UTC | #4
On Tue, Jul 18, 2023 at 10:49 AM Mika Penttilä <mpenttil@redhat.com> wrote:
>
>
>
> On 18.7.2023 17.25, Brian Gerst wrote:
> > On Tue, Jul 18, 2023 at 10:17 AM Mika Penttilä <mpenttil@redhat.com> wrote:
> >>
> >> Hi,
> >>
> >>
> >> On 18.7.2023 16.44, Brian Gerst wrote:
> >>> Signed-off-by: Brian Gerst <brgerst@gmail.com>
> >>> ---
> >>>    arch/x86/entry/common.c        | 50 ++++++++++++++++++++++++++++++-
> >>>    arch/x86/entry/entry_64.S      | 55 ++--------------------------------
> >>>    arch/x86/include/asm/syscall.h |  2 +-
> >>>    3 files changed, 52 insertions(+), 55 deletions(-)
> >>>
> >>> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> >>> index 6c2826417b33..afe79c3f1c5b 100644
> >>> --- a/arch/x86/entry/common.c
> >>> +++ b/arch/x86/entry/common.c
> >>> @@ -70,8 +70,12 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
> >>>        return false;
> >>>    }
> >>>
> >>> -__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
> >>> +/* Returns true to return using SYSRET, or false to use IRET */
> >>> +__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
> >>>    {
> >>> +     long rip;
> >>> +     unsigned int shift_rip;
> >>> +
> >>>        add_random_kstack_offset();
> >>>        nr = syscall_enter_from_user_mode(regs, nr);
> >>>
> >>> @@ -84,6 +88,50 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
> >>>
> >>>        instrumentation_end();
> >>>        syscall_exit_to_user_mode(regs);
> >>> +
> >>> +     /*
> >>> +      * Check that the register state is valid for using SYSRET to exit
> >>> +      * to userspace.  Otherwise use the slower but fully capable IRET
> >>> +      * exit path.
> >>> +      */
> >>> +
> >>> +     /* XEN PV guests always use IRET path */
> >>> +     if (cpu_feature_enabled(X86_FEATURE_XENPV))
> >>> +             return false;
> >>> +
> >>> +     /* SYSRET requires RCX == RIP and R11 == EFLAGS */
> >>> +     if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
> >>> +             return false;
> >>> +
> >>> +     /* CS and SS must match the values set in MSR_STAR */
> >>> +     if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
> >>> +             return false;
> >>> +
> >>> +     /*
> >>> +      * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> >>> +      * in kernel space.  This essentially lets the user take over
> >>> +      * the kernel, since userspace controls RSP.
> >>> +      *
> >>> +      * Change top bits to match most significant bit (47th or 56th bit
> >>> +      * depending on paging mode) in the address.
> >>> +      */
> >>> +     shift_rip = (64 - __VIRTUAL_MASK_SHIFT + 1);
> >>
> >> Should this be:
> >>
> >>          shift_rip = (64 - __VIRTUAL_MASK_SHIFT - 1);
> >> ?
> >
> > I removed a set of parentheses, which switched the sign from -1 to +1.
> > I could put it back if that's less confusing.
> >
>
> I mean isn't it supposed to be:
> shift_rip = (64 - 48) for 4 level, now it's
> shift_rip = (64 - 46)
>
> __VIRTUAL_MASK_SHIFT == 47

Original:
(64 - (47 + 1)) = (64 - 48) = 16

  c5:   48 c1 e1 10             shl    $0x10,%rcx
  c9:   48 c1 f9 10             sar    $0x10,%rcx

New:
(64 - 47 - 1) = (17 - 1) = 16

 18b:   b9 10 00 00 00          mov    $0x10,%ecx
 193:   48 d3 e2                shl    %cl,%rdx
 196:   48 d3 fa                sar    %cl,%rdx

Anyways, I'll switch it back to the original formula.  I'm not going
to argue any more about basic math.

Brian Gerst
  
Brian Gerst July 18, 2023, 3:46 p.m. UTC | #5
On Tue, Jul 18, 2023 at 11:21 AM Brian Gerst <brgerst@gmail.com> wrote:
>
> On Tue, Jul 18, 2023 at 10:49 AM Mika Penttilä <mpenttil@redhat.com> wrote:
> >
> >
> >
> > On 18.7.2023 17.25, Brian Gerst wrote:
> > > On Tue, Jul 18, 2023 at 10:17 AM Mika Penttilä <mpenttil@redhat.com> wrote:
> > >>
> > >> Hi,
> > >>
> > >>
> > >> On 18.7.2023 16.44, Brian Gerst wrote:
> > >>> Signed-off-by: Brian Gerst <brgerst@gmail.com>
> > >>> ---
> > >>>    arch/x86/entry/common.c        | 50 ++++++++++++++++++++++++++++++-
> > >>>    arch/x86/entry/entry_64.S      | 55 ++--------------------------------
> > >>>    arch/x86/include/asm/syscall.h |  2 +-
> > >>>    3 files changed, 52 insertions(+), 55 deletions(-)
> > >>>
> > >>> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> > >>> index 6c2826417b33..afe79c3f1c5b 100644
> > >>> --- a/arch/x86/entry/common.c
> > >>> +++ b/arch/x86/entry/common.c
> > >>> @@ -70,8 +70,12 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
> > >>>        return false;
> > >>>    }
> > >>>
> > >>> -__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
> > >>> +/* Returns true to return using SYSRET, or false to use IRET */
> > >>> +__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
> > >>>    {
> > >>> +     long rip;
> > >>> +     unsigned int shift_rip;
> > >>> +
> > >>>        add_random_kstack_offset();
> > >>>        nr = syscall_enter_from_user_mode(regs, nr);
> > >>>
> > >>> @@ -84,6 +88,50 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
> > >>>
> > >>>        instrumentation_end();
> > >>>        syscall_exit_to_user_mode(regs);
> > >>> +
> > >>> +     /*
> > >>> +      * Check that the register state is valid for using SYSRET to exit
> > >>> +      * to userspace.  Otherwise use the slower but fully capable IRET
> > >>> +      * exit path.
> > >>> +      */
> > >>> +
> > >>> +     /* XEN PV guests always use IRET path */
> > >>> +     if (cpu_feature_enabled(X86_FEATURE_XENPV))
> > >>> +             return false;
> > >>> +
> > >>> +     /* SYSRET requires RCX == RIP and R11 == EFLAGS */
> > >>> +     if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
> > >>> +             return false;
> > >>> +
> > >>> +     /* CS and SS must match the values set in MSR_STAR */
> > >>> +     if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
> > >>> +             return false;
> > >>> +
> > >>> +     /*
> > >>> +      * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> > >>> +      * in kernel space.  This essentially lets the user take over
> > >>> +      * the kernel, since userspace controls RSP.
> > >>> +      *
> > >>> +      * Change top bits to match most significant bit (47th or 56th bit
> > >>> +      * depending on paging mode) in the address.
> > >>> +      */
> > >>> +     shift_rip = (64 - __VIRTUAL_MASK_SHIFT + 1);
> > >>
> > >> Should this be:
> > >>
> > >>          shift_rip = (64 - __VIRTUAL_MASK_SHIFT - 1);
> > >> ?
> > >
> > > I removed a set of parentheses, which switched the sign from -1 to +1.
> > > I could put it back if that's less confusing.
> > >
> >
> > I mean isn't it supposed to be:
> > shift_rip = (64 - 48) for 4 level, now it's
> > shift_rip = (64 - 46)
> >
> > __VIRTUAL_MASK_SHIFT == 47

My apologies, you were right.  I've been sitting on this series for a
while and finally got around to posting it and didn't catch that
error.

>
> Original:
> (64 - (47 + 1)) = (64 - 48) = 16
>
>   c5:   48 c1 e1 10             shl    $0x10,%rcx
>   c9:   48 c1 f9 10             sar    $0x10,%rcx

This was wrong.  I hastily compiled this after I had reverted to the
original formula.

> New:
> (64 - 47 - 1) = (17 - 1) = 16
>
>  18b:   b9 10 00 00 00          mov    $0x10,%ecx
>  193:   48 d3 e2                shl    %cl,%rdx
>  196:   48 d3 fa                sar    %cl,%rdx
>
> Anyways, I'll switch it back to the original formula.  I'm not going
> to argue any more about basic math.

I'll send a v2 later after any more feedback.  Thanks.

 Brian Gerst
  

Patch

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 6c2826417b33..afe79c3f1c5b 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -70,8 +70,12 @@  static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
 	return false;
 }
 
-__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
+/* Returns true to return using SYSRET, or false to use IRET */
+__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
 {
+	long rip;
+	unsigned int shift_rip;
+
 	add_random_kstack_offset();
 	nr = syscall_enter_from_user_mode(regs, nr);
 
@@ -84,6 +88,50 @@  __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
 
 	instrumentation_end();
 	syscall_exit_to_user_mode(regs);
+
+	/*
+	 * Check that the register state is valid for using SYSRET to exit
+	 * to userspace.  Otherwise use the slower but fully capable IRET
+	 * exit path.
+	 */
+
+	/* XEN PV guests always use IRET path */
+	if (cpu_feature_enabled(X86_FEATURE_XENPV))
+		return false;
+
+	/* SYSRET requires RCX == RIP and R11 == EFLAGS */
+	if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
+		return false;
+
+	/* CS and SS must match the values set in MSR_STAR */
+	if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
+		return false;
+
+	/*
+	 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+	 * in kernel space.  This essentially lets the user take over
+	 * the kernel, since userspace controls RSP.
+	 *
+	 * Change top bits to match most significant bit (47th or 56th bit
+	 * depending on paging mode) in the address.
+	 */
+	shift_rip = (64 - __VIRTUAL_MASK_SHIFT + 1);
+	rip = (long) regs->ip;
+	rip <<= shift_rip;
+	rip >>= shift_rip;
+	if (unlikely((unsigned long) rip != regs->ip))
+		return false;
+
+	/*
+	 * SYSRET cannot restore RF.  It can restore TF, but unlike IRET,
+	 * restoring TF results in a trap from userspace immediately after
+	 * SYSRET.
+	 */
+	if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
+		return false;
+
+	/* Use SYSRET to exit to userspace */
+	return true;
 }
 #endif
 
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index c01776a51545..b1288e22cae8 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -123,60 +123,9 @@  SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
 	 * Try to use SYSRET instead of IRET if we're returning to
 	 * a completely clean 64-bit userspace context.  If we're not,
 	 * go to the slow exit path.
-	 * In the Xen PV case we must use iret anyway.
 	 */
-
-	ALTERNATIVE "", "jmp	swapgs_restore_regs_and_return_to_usermode", \
-		X86_FEATURE_XENPV
-
-	movq	RCX(%rsp), %rcx
-	movq	RIP(%rsp), %r11
-
-	cmpq	%rcx, %r11	/* SYSRET requires RCX == RIP */
-	jne	swapgs_restore_regs_and_return_to_usermode
-
-	/*
-	 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
-	 * in kernel space.  This essentially lets the user take over
-	 * the kernel, since userspace controls RSP.
-	 *
-	 * If width of "canonical tail" ever becomes variable, this will need
-	 * to be updated to remain correct on both old and new CPUs.
-	 *
-	 * Change top bits to match most significant bit (47th or 56th bit
-	 * depending on paging mode) in the address.
-	 */
-#ifdef CONFIG_X86_5LEVEL
-	ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
-		"shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
-#else
-	shl	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
-	sar	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
-#endif
-
-	/* If this changed %rcx, it was not canonical */
-	cmpq	%rcx, %r11
-	jne	swapgs_restore_regs_and_return_to_usermode
-
-	cmpq	$__USER_CS, CS(%rsp)		/* CS must match SYSRET */
-	jne	swapgs_restore_regs_and_return_to_usermode
-
-	movq	R11(%rsp), %r11
-	cmpq	%r11, EFLAGS(%rsp)		/* R11 == RFLAGS */
-	jne	swapgs_restore_regs_and_return_to_usermode
-
-	/*
-	 * SYSRET cannot restore RF.  It can restore TF, but unlike IRET,
-	 * restoring TF results in a trap from userspace immediately after
-	 * SYSRET.
-	 */
-	testq	$(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
-	jnz	swapgs_restore_regs_and_return_to_usermode
-
-	/* nothing to check for RSP */
-
-	cmpq	$__USER_DS, SS(%rsp)		/* SS must match SYSRET */
-	jne	swapgs_restore_regs_and_return_to_usermode
+	testb	%al, %al
+	jz	swapgs_restore_regs_and_return_to_usermode
 
 	/*
 	 * We win! This label is here just for ease of understanding
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 4fb36fba4b5a..be6c5515e0b9 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -126,7 +126,7 @@  static inline int syscall_get_arch(struct task_struct *task)
 		? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 }
 
-void do_syscall_64(struct pt_regs *regs, int nr);
+bool do_syscall_64(struct pt_regs *regs, int nr);
 
 #endif	/* CONFIG_X86_32 */