[v3,27/37] x86/shstk: Introduce routines modifying shstk

Message ID 20221104223604.29615-28-rick.p.edgecombe@intel.com
State New
Headers
Series Shadow stacks for userspace |

Commit Message

Edgecombe, Rick P Nov. 4, 2022, 10:35 p.m. UTC
  From: Yu-cheng Yu <yu-cheng.yu@intel.com>

Shadow stack's are normally written to via CALL/RET or specific CET
instuctions like RSTORSSP/SAVEPREVSSP. However during some Linux
operations the kernel will need to write to directly using the ring-0 only
WRUSS instruction.

A shadow stack restore token marks a restore point of the shadow stack, and
the address in a token must point directly above the token, which is within
the same shadow stack. This is distinctively different from other pointers
on the shadow stack, since those pointers point to executable code area.

Introduce token setup and verify routines. Also introduce WRUSS, which is
a kernel-mode instruction but writes directly to user shadow stack.

In future patches that enable shadow stack to work with signals, the kernel
will need something to denote the point in the stack where sigreturn may be
called. This will prevent attackers calling sigreturn at arbitrary places
in the stack, in order to help prevent SROP attacks.

To do this, something that can only be written by the kernel needs to be
placed on the shadow stack. This can be accomplished by setting bit 63 in
the frame written to the shadow stack. Userspace return addresses can't
have this bit set as it is in the kernel range. It is also can't be a
valid restore token.

Tested-by: Pengfei Xu <pengfei.xu@intel.com>
Tested-by: John Allen <john.allen@amd.com>
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Cc: Kees Cook <keescook@chromium.org>

---

v3:
 - Drop shstk_check_rstor_token()
 - Fail put_shstk_data() if bit 63 is set in the data (Kees)
 - Add comment in create_rstor_token() (Kees)
 - Pull in create_rstor_token() changes from future patch (Kees)

v2:
 - Add data helpers for writing to shadow stack.

v1:
 - Use xsave helpers.

Yu-cheng v30:
 - Update commit log, remove description about signals.
 - Update various comments.
 - Remove variable 'ssp' init and adjust return value accordingly.
 - Check get_user_shstk_addr() return value.
 - Replace 'ia32' with 'proc32'.

 arch/x86/include/asm/special_insns.h | 13 +++++
 arch/x86/kernel/shstk.c              | 73 ++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)
  

Comments

Peter Zijlstra Nov. 15, 2022, 2:18 p.m. UTC | #1
On Fri, Nov 04, 2022 at 03:35:54PM -0700, Rick Edgecombe wrote:

> +#ifdef CONFIG_X86_USER_SHADOW_STACK
> +static inline int write_user_shstk_64(u64 __user *addr, u64 val)
> +{
> +	asm_volatile_goto("1: wrussq %[val], (%[addr])\n"
> +			  _ASM_EXTABLE(1b, %l[fail])
> +			  :: [addr] "r" (addr), [val] "r" (val)
> +			  :: fail);
> +	return 0;
> +fail:
> +	return -EFAULT;
> +}
> +#endif /* CONFIG_X86_USER_SHADOW_STACK */

Why isn't this modelled after put_user() ?

Should you write a 64bit value even if the task receiving a signal is
32bit ?
  
Peter Zijlstra Nov. 15, 2022, 2:22 p.m. UTC | #2
On Fri, Nov 04, 2022 at 03:35:54PM -0700, Rick Edgecombe wrote:
> +static unsigned long get_user_shstk_addr(void)
> +{
> +	unsigned long long ssp;
> +
> +	fpregs_lock_and_load();
> +
> +	rdmsrl(MSR_IA32_PL3_SSP, ssp);
> +
> +	fpregs_unlock();
> +
> +	return ssp;
> +}

This doesn't return the shstk addr, unlike what the name suggests,
right?
  
Edgecombe, Rick P Nov. 15, 2022, 8:56 p.m. UTC | #3
On Tue, 2022-11-15 at 15:22 +0100, Peter Zijlstra wrote:
> On Fri, Nov 04, 2022 at 03:35:54PM -0700, Rick Edgecombe wrote:
> > +static unsigned long get_user_shstk_addr(void)
> > +{
> > +     unsigned long long ssp;
> > +
> > +     fpregs_lock_and_load();
> > +
> > +     rdmsrl(MSR_IA32_PL3_SSP, ssp);
> > +
> > +     fpregs_unlock();
> > +
> > +     return ssp;
> > +}
> 
> This doesn't return the shstk addr, unlike what the name suggests,
> right?

That's a good point. get_user_ssp() would be a better name.
  
Edgecombe, Rick P Nov. 15, 2022, 11:42 p.m. UTC | #4
On Tue, 2022-11-15 at 15:18 +0100, Peter Zijlstra wrote:
> On Fri, Nov 04, 2022 at 03:35:54PM -0700, Rick Edgecombe wrote:
> 
> > +#ifdef CONFIG_X86_USER_SHADOW_STACK
> > +static inline int write_user_shstk_64(u64 __user *addr, u64 val)
> > +{
> > +     asm_volatile_goto("1: wrussq %[val], (%[addr])\n"
> > +                       _ASM_EXTABLE(1b, %l[fail])
> > +                       :: [addr] "r" (addr), [val] "r" (val)
> > +                       :: fail);
> > +     return 0;
> > +fail:
> > +     return -EFAULT;
> > +}
> > +#endif /* CONFIG_X86_USER_SHADOW_STACK */
> 
> Why isn't this modelled after put_user() ?

You mean as far as supporting multiple sizes? It just isn't really
needed yet. We are only writing single frames. I suppose it might make
more sense with the alt shadow stack support, but that is dropped for
now.

The other difference here is that WRUSS is a weird instruction that is
treated as a user access even if it comes from the kernel mode. So it's
doesn't need to stac/clac.

> 
> Should you write a 64bit value even if the task receiving a signal is
> 32bit ?

32 bit support was also dropped.
  
Peter Zijlstra Nov. 16, 2022, 10:18 a.m. UTC | #5
On Tue, Nov 15, 2022 at 11:42:46PM +0000, Edgecombe, Rick P wrote:
> On Tue, 2022-11-15 at 15:18 +0100, Peter Zijlstra wrote:
> > On Fri, Nov 04, 2022 at 03:35:54PM -0700, Rick Edgecombe wrote:
> > 
> > > +#ifdef CONFIG_X86_USER_SHADOW_STACK
> > > +static inline int write_user_shstk_64(u64 __user *addr, u64 val)
> > > +{
> > > +     asm_volatile_goto("1: wrussq %[val], (%[addr])\n"
> > > +                       _ASM_EXTABLE(1b, %l[fail])
> > > +                       :: [addr] "r" (addr), [val] "r" (val)
> > > +                       :: fail);
> > > +     return 0;
> > > +fail:
> > > +     return -EFAULT;
> > > +}
> > > +#endif /* CONFIG_X86_USER_SHADOW_STACK */
> > 
> > Why isn't this modelled after put_user() ?
> 
> You mean as far as supporting multiple sizes? It just isn't really
> needed yet. We are only writing single frames. I suppose it might make
> more sense with the alt shadow stack support, but that is dropped for
> now.
> 
> The other difference here is that WRUSS is a weird instruction that is
> treated as a user access even if it comes from the kernel mode. So it's
> doesn't need to stac/clac.
> 
> > 
> > Should you write a 64bit value even if the task receiving a signal is
> > 32bit ?
> 
> 32 bit support was also dropped.

How? Task could start life as 64bit, frob LDT to set up 32bit code
segment and jump into it and start doing 32bit syscalls, then what?

AFAICT those 32bit syscalls will end up doing SA_IA32_ABI sigframes.
  
Edgecombe, Rick P Nov. 16, 2022, 10:38 p.m. UTC | #6
On Wed, 2022-11-16 at 11:18 +0100, Peter Zijlstra wrote:
> > > 
> > > Should you write a 64bit value even if the task receiving a
> > > signal is
> > > 32bit ?
> > 
> > 32 bit support was also dropped.
> 
> How? Task could start life as 64bit, frob LDT to set up 32bit code
> segment and jump into it and start doing 32bit syscalls, then what?
> 
> AFAICT those 32bit syscalls will end up doing SA_IA32_ABI sigframes.

Hmm, good point. This series used to support normal 32 bit apps via
ia32 emulation which would have handled this. But I removed it (blocked
in the enabling logic) because it didn't seem like it would get enough
use to justify the extra code. That doesn't block this scenario here
though.

Pardon the possibly naive question, but is this 32/64 bit mixing
something any normal, shstk-desiring, applications would actually do? O
r more that they could do?

Thanks,

Rick
  
Peter Zijlstra Nov. 17, 2022, 2:17 p.m. UTC | #7
On Wed, Nov 16, 2022 at 10:38:19PM +0000, Edgecombe, Rick P wrote:
> On Wed, 2022-11-16 at 11:18 +0100, Peter Zijlstra wrote:
> > > > 
> > > > Should you write a 64bit value even if the task receiving a
> > > > signal is
> > > > 32bit ?
> > > 
> > > 32 bit support was also dropped.
> > 
> > How? Task could start life as 64bit, frob LDT to set up 32bit code
> > segment and jump into it and start doing 32bit syscalls, then what?
> > 
> > AFAICT those 32bit syscalls will end up doing SA_IA32_ABI sigframes.
> 
> Hmm, good point. This series used to support normal 32 bit apps via
> ia32 emulation which would have handled this. But I removed it (blocked
> in the enabling logic) because it didn't seem like it would get enough
> use to justify the extra code. That doesn't block this scenario here
> though.
> 
> Pardon the possibly naive question, but is this 32/64 bit mixing
> something any normal, shstk-desiring, applications would actually do? O
> r more that they could do?

It is not something common, but it is something that things like Wine
do IIRC, and it would be a real shame if Wine could not use shadow
stacks or something, right ;-)

But more to the point; since the kernel cannot forbit this scenario
(aside from taking away the LDT entirely) it is something that needs
handling.
  
Edgecombe, Rick P Nov. 18, 2022, 5:05 p.m. UTC | #8
On Thu, 2022-11-17 at 15:17 +0100, Peter Zijlstra wrote:
> On Wed, Nov 16, 2022 at 10:38:19PM +0000, Edgecombe, Rick P wrote:
> > On Wed, 2022-11-16 at 11:18 +0100, Peter Zijlstra wrote:
> > > > > 
> > > > > Should you write a 64bit value even if the task receiving a
> > > > > signal is
> > > > > 32bit ?
> > > > 
> > > > 32 bit support was also dropped.
> > > 
> > > How? Task could start life as 64bit, frob LDT to set up 32bit
> > > code
> > > segment and jump into it and start doing 32bit syscalls, then
> > > what?
> > > 
> > > AFAICT those 32bit syscalls will end up doing SA_IA32_ABI
> > > sigframes.
> > 
> > Hmm, good point. This series used to support normal 32 bit apps via
> > ia32 emulation which would have handled this. But I removed it
> > (blocked
> > in the enabling logic) because it didn't seem like it would get
> > enough
> > use to justify the extra code. That doesn't block this scenario
> > here
> > though.
> > 
> > Pardon the possibly naive question, but is this 32/64 bit mixing
> > something any normal, shstk-desiring, applications would actually
> > do? O
> > r more that they could do?
> 
> It is not something common, but it is something that things like Wine
> do IIRC, and it would be a real shame if Wine could not use shadow
> stacks or something, right ;-)

Since windows has shadow stack support, I guess. But it looks like it
doesn't support shadow stacks on 32 bit either. So for the time being,
it seems Wine wouldn't use this either... I think...

> 
> But more to the point; since the kernel cannot forbit this scenario
> (aside from taking away the LDT entirely) it is something that needs
> handling.

I'm having to go educate myself a bit on this kind of code mixing and
existing ABI expectations. It seems you could also just make 32 bit
syscalls from 64 bit code to trigger the same behavior.

On one hand if we think that no one will use this, it would be a shame
to have to maintain 32 bit shadow stack support. But on the other hand,
we have all these apps being automatically marked as supporting shadow
stack. If this was not the case, I would think just declaring this
unsupported would be the best.

For bringing back 32 bit support, the tricky part might be a 32 bit
implementation of the new shadow stack sigframe design that supports
alt shadow stacks. Setting the high bit to guarantee the frame will not
point to user space won't work for 32 bit. But if we are mostly worried
about making sure it is still functional we could maybe just have a
slightly less protective format for the shadow stack sigframe for 32
bit. It would not have the same SROP protections. Have to think if this
is a security hole for 64 bit though.

Anyway, I'm still digging on this one, and just wanted to let you know
where I was at.
  

Patch

diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 35f709f619fb..6d51a87aea7f 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -223,6 +223,19 @@  static inline void clwb(volatile void *__p)
 		: [pax] "a" (p));
 }
 
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+static inline int write_user_shstk_64(u64 __user *addr, u64 val)
+{
+	asm_volatile_goto("1: wrussq %[val], (%[addr])\n"
+			  _ASM_EXTABLE(1b, %l[fail])
+			  :: [addr] "r" (addr), [val] "r" (val)
+			  :: fail);
+	return 0;
+fail:
+	return -EFAULT;
+}
+#endif /* CONFIG_X86_USER_SHADOW_STACK */
+
 #define nop() asm volatile ("nop")
 
 static inline void serialize(void)
diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c
index a7a982924b9a..755b4af40413 100644
--- a/arch/x86/kernel/shstk.c
+++ b/arch/x86/kernel/shstk.c
@@ -25,6 +25,8 @@ 
 #include <asm/fpu/api.h>
 #include <asm/prctl.h>
 
+#define SS_FRAME_SIZE 8
+
 static bool features_enabled(unsigned long features)
 {
 	return current->thread.features & features;
@@ -40,6 +42,35 @@  static void features_clr(unsigned long features)
 	current->thread.features &= ~features;
 }
 
+/*
+ * Create a restore token on the shadow stack.  A token is always 8-byte
+ * and aligned to 8.
+ */
+static int create_rstor_token(unsigned long ssp, unsigned long *token_addr)
+{
+	unsigned long addr;
+
+	/* Token must be aligned */
+	if (!IS_ALIGNED(ssp, 8))
+		return -EINVAL;
+
+	addr = ssp - SS_FRAME_SIZE;
+
+	/*
+	 * SSP is aligned, so reserved bits and mode bit are a zero, just mark
+	 * the token 64-bit.
+	 */
+	ssp |= BIT(0);
+
+	if (write_user_shstk_64((u64 __user *)addr, (u64)ssp))
+		return -EFAULT;
+
+	if (token_addr)
+		*token_addr = addr;
+
+	return 0;
+}
+
 static unsigned long alloc_shstk(unsigned long size)
 {
 	int flags = MAP_ANONYMOUS | MAP_PRIVATE;
@@ -160,6 +191,48 @@  int shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long clone_flags,
 	return 0;
 }
 
+static unsigned long get_user_shstk_addr(void)
+{
+	unsigned long long ssp;
+
+	fpregs_lock_and_load();
+
+	rdmsrl(MSR_IA32_PL3_SSP, ssp);
+
+	fpregs_unlock();
+
+	return ssp;
+}
+
+static int put_shstk_data(u64 __user *addr, u64 data)
+{
+	if (WARN_ON_ONCE(data & BIT(63)))
+		return -EINVAL;
+
+	/*
+	 * Mark the high bit so that the sigframe can't be processed as a
+	 * return address.
+	 */
+	if (write_user_shstk_64(addr, data | BIT(63)))
+		return -EFAULT;
+	return 0;
+}
+
+static int get_shstk_data(unsigned long *data, unsigned long __user *addr)
+{
+	unsigned long ldata;
+
+	if (unlikely(get_user(ldata, addr)))
+		return -EFAULT;
+
+	if (!(ldata & BIT(63)))
+		return -EINVAL;
+
+	*data = ldata & ~BIT(63);
+
+	return 0;
+}
+
 void shstk_free(struct task_struct *tsk)
 {
 	struct thread_shstk *shstk = &tsk->thread.shstk;