[v3,25/37] x86/shstk: Add user-mode shadow stack support
Commit Message
From: Yu-cheng Yu <yu-cheng.yu@intel.com>
Introduce basic shadow stack enabling/disabling/allocation routines.
A task's shadow stack is allocated from memory with VM_SHADOW_STACK flag
and has a fixed size of min(RLIMIT_STACK, 4GB).
Keep the task's shadow stack address and size in thread_struct. This will
be copied when cloning new threads, but needs to be cleared during exec,
so add a function to do this.
Do not support IA32 emulation or x32.
Tested-by: Pengfei Xu <pengfei.xu@intel.com>
Tested-by: John Allen <john.allen@amd.com>
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Cc: Kees Cook <keescook@chromium.org>
---
v3:
- Use define for set_clr_bits_msrl() (Kees)
- Make some functions static (Kees)
- Change feature_foo() to features_foo() (Kees)
- Centralize shadow stack size rlimit checks (Kees)
- Disable x32 support
v2:
- Get rid of unnessary shstk->base checks
- Don't support IA32 emulation
v1:
- Switch to xsave helpers.
- Expand commit log.
Yu-cheng v30:
- Remove superfluous comments for struct thread_shstk.
- Replace 'populate' with 'unused'.
arch/x86/include/asm/cet.h | 7 ++
arch/x86/include/asm/msr.h | 11 +++
arch/x86/include/asm/processor.h | 3 +
arch/x86/include/uapi/asm/prctl.h | 3 +
arch/x86/kernel/shstk.c | 146 ++++++++++++++++++++++++++++++
5 files changed, 170 insertions(+)
Comments
On Fri, Nov 04, 2022 at 03:35:52PM -0700, Rick Edgecombe wrote:
> +static int shstk_setup(void)
> +{
> + struct thread_shstk *shstk = ¤t->thread.shstk;
> + unsigned long addr, size;
> +
> + /* Already enabled */
> + if (features_enabled(CET_SHSTK))
> + return 0;
> +
> + /* Also not supported for 32 bit and x32 */
> + if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || in_32bit_syscall())
> + return -EOPNOTSUPP;
> +
> + size = adjust_shstk_size(0);
> + addr = alloc_shstk(size);
> + if (IS_ERR_VALUE(addr))
> + return PTR_ERR((void *)addr);
> +
> + fpregs_lock_and_load();
> + wrmsrl(MSR_IA32_PL3_SSP, addr + size);
> + wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
This..
> + fpregs_unlock();
> +
> + shstk->base = addr;
> + shstk->size = size;
> + features_set(CET_SHSTK);
> +
> + return 0;
> +}
> +static int shstk_disable(void)
> +{
> + if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
> + return -EOPNOTSUPP;
> +
> + /* Already disabled? */
> + if (!features_enabled(CET_SHSTK))
> + return 0;
> +
> + fpregs_lock_and_load();
> + /* Disable WRSS too when disabling shadow stack */
> + set_clr_bits_msrl(MSR_IA32_U_CET, 0, CET_SHSTK_EN);
And this... aren't very consistent in approach. Given there is no U_IBT
yet, why complicate matters like this?
> + wrmsrl(MSR_IA32_PL3_SSP, 0);
> + fpregs_unlock();
> +
> + shstk_free(current);
> + features_clr(CET_SHSTK);
> +
> + return 0;
> +}
On Tue, 2022-11-15 at 13:32 +0100, Peter Zijlstra wrote:
> > + struct thread_shstk *shstk = ¤t->thread.shstk;
> > + unsigned long addr, size;
> > +
> > + /* Already enabled */
> > + if (features_enabled(CET_SHSTK))
> > + return 0;
> > +
> > + /* Also not supported for 32 bit and x32 */
> > + if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
> > in_32bit_syscall())
> > + return -EOPNOTSUPP;
> > +
> > + size = adjust_shstk_size(0);
> > + addr = alloc_shstk(size);
> > + if (IS_ERR_VALUE(addr))
> > + return PTR_ERR((void *)addr);
> > +
> > + fpregs_lock_and_load();
> > + wrmsrl(MSR_IA32_PL3_SSP, addr + size);
> > + wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
>
> This..
>
> > + fpregs_unlock();
> > +
> > + shstk->base = addr;
> > + shstk->size = size;
> > + features_set(CET_SHSTK);
> > +
> > + return 0;
> > +}
> > +static int shstk_disable(void)
> > +{
> > + if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
> > + return -EOPNOTSUPP;
> > +
> > + /* Already disabled? */
> > + if (!features_enabled(CET_SHSTK))
> > + return 0;
> > +
> > + fpregs_lock_and_load();
> > + /* Disable WRSS too when disabling shadow stack */
Oops, this comment is in wrong patch.
> > + set_clr_bits_msrl(MSR_IA32_U_CET, 0, CET_SHSTK_EN);
>
> And this... aren't very consistent in approach. Given there is no
> U_IBT
> yet, why complicate matters like this?
Sure, I can change it.
@@ -8,12 +8,19 @@
struct task_struct;
#ifdef CONFIG_X86_USER_SHADOW_STACK
+struct thread_shstk {
+ u64 base;
+ u64 size;
+};
+
long cet_prctl(struct task_struct *task, int option, unsigned long features);
void reset_thread_features(void);
+void shstk_free(struct task_struct *p);
#else
static inline long cet_prctl(struct task_struct *task, int option,
unsigned long features) { return -EINVAL; }
static inline void reset_thread_features(void) {}
+static inline void shstk_free(struct task_struct *p) {}
#endif /* CONFIG_X86_USER_SHADOW_STACK */
#endif /* __ASSEMBLY__ */
@@ -310,6 +310,17 @@ void msrs_free(struct msr *msrs);
int msr_set_bit(u32 msr, u8 bit);
int msr_clear_bit(u32 msr, u8 bit);
+/* Helper that can never get accidentally un-inlined. */
+#define set_clr_bits_msrl(msr, set, clear) do { \
+ u64 __val, __new_val; \
+ \
+ rdmsrl(msr, __val); \
+ __new_val = (__val & ~(clear)) | (set); \
+ \
+ if (__new_val != __val) \
+ wrmsrl(msr, __new_val); \
+} while (0)
+
#ifdef CONFIG_SMP
int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
@@ -27,6 +27,7 @@ struct vm86;
#include <asm/unwind_hints.h>
#include <asm/vmxfeatures.h>
#include <asm/vdso/processor.h>
+#include <asm/cet.h>
#include <linux/personality.h>
#include <linux/cache.h>
@@ -533,6 +534,8 @@ struct thread_struct {
#ifdef CONFIG_X86_USER_SHADOW_STACK
unsigned long features;
unsigned long features_locked;
+
+ struct thread_shstk shstk;
#endif
/* Floating point and extended processor state */
@@ -26,4 +26,7 @@
#define ARCH_CET_DISABLE 0x5002
#define ARCH_CET_LOCK 0x5003
+/* ARCH_CET_ features bits */
+#define CET_SHSTK (1ULL << 0)
+
#endif /* _ASM_X86_PRCTL_H */
@@ -8,14 +8,160 @@
#include <linux/sched.h>
#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/sched/signal.h>
+#include <linux/compat.h>
+#include <linux/sizes.h>
+#include <linux/user.h>
+#include <asm/msr.h>
+#include <asm/fpu/xstate.h>
+#include <asm/fpu/types.h>
+#include <asm/cet.h>
+#include <asm/special_insns.h>
+#include <asm/fpu/api.h>
#include <asm/prctl.h>
+static bool features_enabled(unsigned long features)
+{
+ return current->thread.features & features;
+}
+
+static void features_set(unsigned long features)
+{
+ current->thread.features |= features;
+}
+
+static void features_clr(unsigned long features)
+{
+ current->thread.features &= ~features;
+}
+
+static unsigned long alloc_shstk(unsigned long size)
+{
+ int flags = MAP_ANONYMOUS | MAP_PRIVATE;
+ struct mm_struct *mm = current->mm;
+ unsigned long addr, unused;
+
+ mmap_write_lock(mm);
+ addr = do_mmap(NULL, addr, size, PROT_READ, flags,
+ VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL);
+
+ mmap_write_unlock(mm);
+
+ return addr;
+}
+
+static unsigned long adjust_shstk_size(unsigned long size)
+{
+ if (size)
+ return PAGE_ALIGN(size);
+
+ return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
+}
+
+static void unmap_shadow_stack(u64 base, u64 size)
+{
+ while (1) {
+ int r;
+
+ r = vm_munmap(base, size);
+
+ /*
+ * vm_munmap() returns -EINTR when mmap_lock is held by
+ * something else, and that lock should not be held for a
+ * long time. Retry it for the case.
+ */
+ if (r == -EINTR) {
+ cond_resched();
+ continue;
+ }
+
+ /*
+ * For all other types of vm_munmap() failure, either the
+ * system is out of memory or there is bug.
+ */
+ WARN_ON_ONCE(r);
+ break;
+ }
+}
+
+static int shstk_setup(void)
+{
+ struct thread_shstk *shstk = ¤t->thread.shstk;
+ unsigned long addr, size;
+
+ /* Already enabled */
+ if (features_enabled(CET_SHSTK))
+ return 0;
+
+ /* Also not supported for 32 bit and x32 */
+ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || in_32bit_syscall())
+ return -EOPNOTSUPP;
+
+ size = adjust_shstk_size(0);
+ addr = alloc_shstk(size);
+ if (IS_ERR_VALUE(addr))
+ return PTR_ERR((void *)addr);
+
+ fpregs_lock_and_load();
+ wrmsrl(MSR_IA32_PL3_SSP, addr + size);
+ wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
+ fpregs_unlock();
+
+ shstk->base = addr;
+ shstk->size = size;
+ features_set(CET_SHSTK);
+
+ return 0;
+}
+
void reset_thread_features(void)
{
+ memset(¤t->thread.shstk, 0, sizeof(struct thread_shstk));
current->thread.features = 0;
current->thread.features_locked = 0;
}
+void shstk_free(struct task_struct *tsk)
+{
+ struct thread_shstk *shstk = &tsk->thread.shstk;
+
+ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
+ !features_enabled(CET_SHSTK))
+ return;
+
+ if (!tsk->mm)
+ return;
+
+ unmap_shadow_stack(shstk->base, shstk->size);
+}
+
+
+static int shstk_disable(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
+ return -EOPNOTSUPP;
+
+ /* Already disabled? */
+ if (!features_enabled(CET_SHSTK))
+ return 0;
+
+ fpregs_lock_and_load();
+ /* Disable WRSS too when disabling shadow stack */
+ set_clr_bits_msrl(MSR_IA32_U_CET, 0, CET_SHSTK_EN);
+ wrmsrl(MSR_IA32_PL3_SSP, 0);
+ fpregs_unlock();
+
+ shstk_free(current);
+ features_clr(CET_SHSTK);
+
+ return 0;
+}
+
long cet_prctl(struct task_struct *task, int option, unsigned long features)
{
if (option == ARCH_CET_LOCK) {