[v7,31/41] x86/shstk: Introduce routines modifying shstk

Message ID 20230227222957.24501-32-rick.p.edgecombe@intel.com
State New
Headers
Series Shadow stacks for userspace |

Commit Message

Edgecombe, Rick P Feb. 27, 2023, 10:29 p.m. UTC
  From: Yu-cheng Yu <yu-cheng.yu@intel.com>

Shadow stacks are normally written to via CALL/RET or specific CET
instructions like RSTORSSP/SAVEPREVSSP. However during some Linux
operations the kernel will need to write to directly using the ring-0 only
WRUSS instruction.

A shadow stack restore token marks a restore point of the shadow stack, and
the address in a token must point directly above the token, which is within
the same shadow stack. This is distinctively different from other pointers
on the shadow stack, since those pointers point to executable code area.

Introduce token setup and verify routines. Also introduce WRUSS, which is
a kernel-mode instruction but writes directly to user shadow stack.

In future patches that enable shadow stack to work with signals, the kernel
will need something to denote the point in the stack where sigreturn may be
called. This will prevent attackers calling sigreturn at arbitrary places
in the stack, in order to help prevent SROP attacks.

To do this, something that can only be written by the kernel needs to be
placed on the shadow stack. This can be accomplished by setting bit 63 in
the frame written to the shadow stack. Userspace return addresses can't
have this bit set as it is in the kernel range. It is also can't be a
valid restore token.

Tested-by: Pengfei Xu <pengfei.xu@intel.com>
Tested-by: John Allen <john.allen@amd.com>
Tested-by: Kees Cook <keescook@chromium.org>
Acked-by: Mike Rapoport (IBM) <rppt@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Cc: Kees Cook <keescook@chromium.org>

---
v5:
 - Fix typo in commit log

v3:
 - Drop shstk_check_rstor_token()
 - Fail put_shstk_data() if bit 63 is set in the data (Kees)
 - Add comment in create_rstor_token() (Kees)
 - Pull in create_rstor_token() changes from future patch (Kees)

v2:
 - Add data helpers for writing to shadow stack.

v1:
 - Use xsave helpers.
---
 arch/x86/include/asm/special_insns.h | 13 +++++
 arch/x86/kernel/shstk.c              | 73 ++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)
  

Comments

Borislav Petkov March 9, 2023, 4:48 p.m. UTC | #1
On Mon, Feb 27, 2023 at 02:29:47PM -0800, Rick Edgecombe wrote:
> From: Yu-cheng Yu <yu-cheng.yu@intel.com>
> 
> Shadow stacks are normally written to via CALL/RET or specific CET
				       ^
				       indirectly.

> instructions like RSTORSSP/SAVEPREVSSP. However during some Linux
> operations the kernel will need to write to directly using the ring-0 only

"However, sometimes the kernel will need to..."

> WRUSS instruction.
> 
> A shadow stack restore token marks a restore point of the shadow stack, and
> the address in a token must point directly above the token, which is within
> the same shadow stack. This is distinctively different from other pointers
> on the shadow stack, since those pointers point to executable code area.
> 
> Introduce token setup and verify routines. Also introduce WRUSS, which is
> a kernel-mode instruction but writes directly to user shadow stack.
> 
> In future patches that enable shadow stack to work with signals, the kernel
> will need something to denote the point in the stack where sigreturn may be
> called. This will prevent attackers calling sigreturn at arbitrary places
> in the stack, in order to help prevent SROP attacks.
> 
> To do this, something that can only be written by the kernel needs to be
> placed on the shadow stack. This can be accomplished by setting bit 63 in
> the frame written to the shadow stack. Userspace return addresses can't
> have this bit set as it is in the kernel range. It is also can't be a

s/is //

> valid restore token.

...

> diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
> index de48d1389936..d6cd9344f6c7 100644
> --- a/arch/x86/include/asm/special_insns.h
> +++ b/arch/x86/include/asm/special_insns.h
> @@ -202,6 +202,19 @@ static inline void clwb(volatile void *__p)
>  		: [pax] "a" (p));
>  }
>  
> +#ifdef CONFIG_X86_USER_SHADOW_STACK
> +static inline int write_user_shstk_64(u64 __user *addr, u64 val)
> +{
> +	asm_volatile_goto("1: wrussq %[val], (%[addr])\n"
> +			  _ASM_EXTABLE(1b, %l[fail])
> +			  :: [addr] "r" (addr), [val] "r" (val)
> +			  :: fail);
> +	return 0;
> +fail:
> +	return -EFAULT;

Nice!

> +}
> +#endif /* CONFIG_X86_USER_SHADOW_STACK */
> +
>  #define nop() asm volatile ("nop")
>  
>  static inline void serialize(void)

...

> +static int put_shstk_data(u64 __user *addr, u64 data)
> +{
> +	if (WARN_ON_ONCE(data & BIT(63)))

Dunno, maybe something like:

/*
 * A comment explaining what that is...
 */
#define SHSTK_SIGRETURN_TOKEN	BIT_ULL(63)

or so?

And use that instead of that magical bit 63.
  
Edgecombe, Rick P March 9, 2023, 5:03 p.m. UTC | #2
On Thu, 2023-03-09 at 17:48 +0100, Borislav Petkov wrote:
> On Mon, Feb 27, 2023 at 02:29:47PM -0800, Rick Edgecombe wrote:
> > From: Yu-cheng Yu <yu-cheng.yu@intel.com>
> > 
> > Shadow stacks are normally written to via CALL/RET or specific CET
> 
>                                        ^
>                                        indirectly.

Dunno here, RSTORSSP/SAVEPREVSSP are kind of direct.

> 
> > instructions like RSTORSSP/SAVEPREVSSP. However during some Linux
> > operations the kernel will need to write to directly using the
> > ring-0 only
> 
> "However, sometimes the kernel will need to..."

Ok.

> 
> > WRUSS instruction.
> > 
> > A shadow stack restore token marks a restore point of the shadow
> > stack, and
> > the address in a token must point directly above the token, which
> > is within
> > the same shadow stack. This is distinctively different from other
> > pointers
> > on the shadow stack, since those pointers point to executable code
> > area.
> > 
> > Introduce token setup and verify routines. Also introduce WRUSS,
> > which is
> > a kernel-mode instruction but writes directly to user shadow stack.
> > 
> > In future patches that enable shadow stack to work with signals,
> > the kernel
> > will need something to denote the point in the stack where
> > sigreturn may be
> > called. This will prevent attackers calling sigreturn at arbitrary
> > places
> > in the stack, in order to help prevent SROP attacks.
> > 
> > To do this, something that can only be written by the kernel needs
> > to be
> > placed on the shadow stack. This can be accomplished by setting bit
> > 63 in
> > the frame written to the shadow stack. Userspace return addresses
> > can't
> > have this bit set as it is in the kernel range. It is also can't be
> > a
> 
> s/is //

Yep, thanks.

> 
> > valid restore token.
> 
> ...
> 
> > diff --git a/arch/x86/include/asm/special_insns.h
> > b/arch/x86/include/asm/special_insns.h
> > index de48d1389936..d6cd9344f6c7 100644
> > --- a/arch/x86/include/asm/special_insns.h
> > +++ b/arch/x86/include/asm/special_insns.h
> > @@ -202,6 +202,19 @@ static inline void clwb(volatile void *__p)
> >                : [pax] "a" (p));
> >   }
> >   
> > +#ifdef CONFIG_X86_USER_SHADOW_STACK
> > +static inline int write_user_shstk_64(u64 __user *addr, u64 val)
> > +{
> > +     asm_volatile_goto("1: wrussq %[val], (%[addr])\n"
> > +                       _ASM_EXTABLE(1b, %l[fail])
> > +                       :: [addr] "r" (addr), [val] "r" (val)
> > +                       :: fail);
> > +     return 0;
> > +fail:
> > +     return -EFAULT;
> 
> Nice!
> 
> > +}
> > +#endif /* CONFIG_X86_USER_SHADOW_STACK */
> > +
> >   #define nop() asm volatile ("nop")
> >   
> >   static inline void serialize(void)
> 
> ...
> 
> > +static int put_shstk_data(u64 __user *addr, u64 data)
> > +{
> > +     if (WARN_ON_ONCE(data & BIT(63)))
> 
> Dunno, maybe something like:
> 
> /*
>  * A comment explaining what that is...
>  */
> #define SHSTK_SIGRETURN_TOKEN   BIT_ULL(63)
> 
> or so?
> 
> And use that instead of that magical bit 63.

Seems very reasonable. Since we are calling this the "data format", I
might go with SHSTK_DATA_BIT.
  
Borislav Petkov March 9, 2023, 5:22 p.m. UTC | #3
On Thu, Mar 09, 2023 at 05:03:26PM +0000, Edgecombe, Rick P wrote:
> On Thu, 2023-03-09 at 17:48 +0100, Borislav Petkov wrote:
> > On Mon, Feb 27, 2023 at 02:29:47PM -0800, Rick Edgecombe wrote:
> > > From: Yu-cheng Yu <yu-cheng.yu@intel.com>
> > > 
> > > Shadow stacks are normally written to via CALL/RET or specific CET
> > 
> >                                        ^
> >                                        indirectly.
> 
> Dunno here, RSTORSSP/SAVEPREVSSP are kind of direct.
> 
> > 
> > > instructions like RSTORSSP/SAVEPREVSSP. However during some Linux
> > > operations the kernel will need to write to directly using the
						  ^^^^^^^^^

Yes, I was trying to make the contrast more obvious because you say
"directly" here.

But not too important.
  

Patch

diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index de48d1389936..d6cd9344f6c7 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -202,6 +202,19 @@  static inline void clwb(volatile void *__p)
 		: [pax] "a" (p));
 }
 
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+static inline int write_user_shstk_64(u64 __user *addr, u64 val)
+{
+	asm_volatile_goto("1: wrussq %[val], (%[addr])\n"
+			  _ASM_EXTABLE(1b, %l[fail])
+			  :: [addr] "r" (addr), [val] "r" (val)
+			  :: fail);
+	return 0;
+fail:
+	return -EFAULT;
+}
+#endif /* CONFIG_X86_USER_SHADOW_STACK */
+
 #define nop() asm volatile ("nop")
 
 static inline void serialize(void)
diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c
index 1d30295e0066..13c02747386f 100644
--- a/arch/x86/kernel/shstk.c
+++ b/arch/x86/kernel/shstk.c
@@ -25,6 +25,8 @@ 
 #include <asm/fpu/api.h>
 #include <asm/prctl.h>
 
+#define SS_FRAME_SIZE 8
+
 static bool features_enabled(unsigned long features)
 {
 	return current->thread.features & features;
@@ -40,6 +42,35 @@  static void features_clr(unsigned long features)
 	current->thread.features &= ~features;
 }
 
+/*
+ * Create a restore token on the shadow stack.  A token is always 8-byte
+ * and aligned to 8.
+ */
+static int create_rstor_token(unsigned long ssp, unsigned long *token_addr)
+{
+	unsigned long addr;
+
+	/* Token must be aligned */
+	if (!IS_ALIGNED(ssp, 8))
+		return -EINVAL;
+
+	addr = ssp - SS_FRAME_SIZE;
+
+	/*
+	 * SSP is aligned, so reserved bits and mode bit are a zero, just mark
+	 * the token 64-bit.
+	 */
+	ssp |= BIT(0);
+
+	if (write_user_shstk_64((u64 __user *)addr, (u64)ssp))
+		return -EFAULT;
+
+	if (token_addr)
+		*token_addr = addr;
+
+	return 0;
+}
+
 static unsigned long alloc_shstk(unsigned long size)
 {
 	int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_ABOVE4G;
@@ -159,6 +190,48 @@  int shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long clone_flags,
 	return 0;
 }
 
+static unsigned long get_user_shstk_addr(void)
+{
+	unsigned long long ssp;
+
+	fpregs_lock_and_load();
+
+	rdmsrl(MSR_IA32_PL3_SSP, ssp);
+
+	fpregs_unlock();
+
+	return ssp;
+}
+
+static int put_shstk_data(u64 __user *addr, u64 data)
+{
+	if (WARN_ON_ONCE(data & BIT(63)))
+		return -EINVAL;
+
+	/*
+	 * Mark the high bit so that the sigframe can't be processed as a
+	 * return address.
+	 */
+	if (write_user_shstk_64(addr, data | BIT(63)))
+		return -EFAULT;
+	return 0;
+}
+
+static int get_shstk_data(unsigned long *data, unsigned long __user *addr)
+{
+	unsigned long ldata;
+
+	if (unlikely(get_user(ldata, addr)))
+		return -EFAULT;
+
+	if (!(ldata & BIT(63)))
+		return -EINVAL;
+
+	*data = ldata & ~BIT(63);
+
+	return 0;
+}
+
 void shstk_free(struct task_struct *tsk)
 {
 	struct thread_shstk *shstk = &tsk->thread.shstk;