[PATCHv11,05/16] x86/uaccess: Provide untagged_addr() and remove tags before address check

Message ID 20221025001722.17466-6-kirill.shutemov@linux.intel.com
State New
Headers
Series Linear Address Masking enabling |

Commit Message

Kirill A. Shutemov Oct. 25, 2022, 12:17 a.m. UTC
  untagged_addr() is a helper used by the core-mm to strip tag bits and
get the address to the canonical shape. In only handles userspace
addresses. The untagging mask is stored in mmu_context and will be set
on enabling LAM for the process.

The tags must not be included into check whether it's okay to access the
userspace address.

Strip tags in access_ok().

get_user() and put_user() don't use access_ok(), but check access
against TASK_SIZE directly in assembly. Strip tags, before calling into
the assembly helper.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Alexander Potapenko <glider@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/mmu.h         |  3 +++
 arch/x86/include/asm/mmu_context.h | 11 ++++++++
 arch/x86/include/asm/uaccess.h     | 42 +++++++++++++++++++++++++++---
 arch/x86/kernel/process.c          |  3 +++
 4 files changed, 56 insertions(+), 3 deletions(-)
  

Comments

Andy Lutomirski Nov. 7, 2022, 2:50 p.m. UTC | #1
On 10/24/22 17:17, Kirill A. Shutemov wrote:
> untagged_addr() is a helper used by the core-mm to strip tag bits and
> get the address to the canonical shape. In only handles userspace
> addresses. The untagging mask is stored in mmu_context and will be set
> on enabling LAM for the process.
> 
> The tags must not be included into check whether it's okay to access the
> userspace address.
> 
> Strip tags in access_ok().
> 
> get_user() and put_user() don't use access_ok(), but check access
> against TASK_SIZE directly in assembly. Strip tags, before calling into
> the assembly helper.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Tested-by: Alexander Potapenko <glider@google.com>
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>   arch/x86/include/asm/mmu.h         |  3 +++
>   arch/x86/include/asm/mmu_context.h | 11 ++++++++
>   arch/x86/include/asm/uaccess.h     | 42 +++++++++++++++++++++++++++---
>   arch/x86/kernel/process.c          |  3 +++
>   4 files changed, 56 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> index 002889ca8978..2fdb390040b5 100644
> --- a/arch/x86/include/asm/mmu.h
> +++ b/arch/x86/include/asm/mmu.h
> @@ -43,6 +43,9 @@ typedef struct {
>   
>   	/* Active LAM mode:  X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */
>   	unsigned long lam_cr3_mask;
> +
> +	/* Significant bits of the virtual address. Excludes tag bits. */
> +	u64 untag_mask;
>   #endif
>   
>   	struct mutex lock;
> diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> index 69c943b2ae90..5bd3d46685dc 100644
> --- a/arch/x86/include/asm/mmu_context.h
> +++ b/arch/x86/include/asm/mmu_context.h
> @@ -100,6 +100,12 @@ static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
>   static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
>   {
>   	mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask;
> +	mm->context.untag_mask = oldmm->context.untag_mask;
> +}
> +
> +static inline void mm_reset_untag_mask(struct mm_struct *mm)
> +{
> +	mm->context.untag_mask = -1UL;
>   }
>   
>   #else
> @@ -112,6 +118,10 @@ static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
>   static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
>   {
>   }
> +
> +static inline void mm_reset_untag_mask(struct mm_struct *mm)
> +{
> +}
>   #endif
>   
>   #define enter_lazy_tlb enter_lazy_tlb
> @@ -138,6 +148,7 @@ static inline int init_new_context(struct task_struct *tsk,
>   		mm->context.execute_only_pkey = -1;
>   	}
>   #endif
> +	mm_reset_untag_mask(mm);
>   	init_new_context_ldt(mm);
>   	return 0;
>   }
> diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
> index 8bc614cfe21b..c6062c07ccd2 100644
> --- a/arch/x86/include/asm/uaccess.h
> +++ b/arch/x86/include/asm/uaccess.h
> @@ -7,6 +7,7 @@
>   #include <linux/compiler.h>
>   #include <linux/instrumented.h>
>   #include <linux/kasan-checks.h>
> +#include <linux/mm_types.h>
>   #include <linux/string.h>
>   #include <asm/asm.h>
>   #include <asm/page.h>
> @@ -21,6 +22,30 @@ static inline bool pagefault_disabled(void);
>   # define WARN_ON_IN_IRQ()
>   #endif
>   
> +#ifdef CONFIG_X86_64
> +/*
> + * Mask out tag bits from the address.
> + *
> + * Magic with the 'sign' allows to untag userspace pointer without any branches
> + * while leaving kernel addresses intact.
> + */
> +#define untagged_addr(mm, addr)	({					\
> +	u64 __addr = (__force u64)(addr);				\
> +	s64 sign = (s64)__addr >> 63;					\
> +	__addr &= (mm)->context.untag_mask | sign;			\
> +	(__force __typeof__(addr))__addr;				\
> +})
> +

I think this implementation is correct, but I'm wondering if there are 
any callers of untagged_addr that actually need to preserve kernel 
addresses.  Are there?  (There certainly *were* back when we had set_fs().)

I'm also mildly uneasy about a potential edge case.  Naively, one would 
expect:

untagged_addr(current->mm, addr) + size ==
untagged_addr(current->mm, addr + size)

at least for an address that is valid enough to be potentially 
dereferenced.  This isn't true any more for size that overflows into the 
tag bit range.

I *think* we're okay though -- __access_ok requires that addr <= limit - 
size, so any range that overflows into tag bits will be rejected even if 
the entire range consists of valid (tagged) user addresses.

So:

Acked-by: Andy Lutomirski <luto@kernel.org>
  
Kirill A. Shutemov Nov. 7, 2022, 5:33 p.m. UTC | #2
On Mon, Nov 07, 2022 at 06:50:51AM -0800, Andy Lutomirski wrote:
> > @@ -21,6 +22,30 @@ static inline bool pagefault_disabled(void);
> >   # define WARN_ON_IN_IRQ()
> >   #endif
> > +#ifdef CONFIG_X86_64
> > +/*
> > + * Mask out tag bits from the address.
> > + *
> > + * Magic with the 'sign' allows to untag userspace pointer without any branches
> > + * while leaving kernel addresses intact.
> > + */
> > +#define untagged_addr(mm, addr)	({					\
> > +	u64 __addr = (__force u64)(addr);				\
> > +	s64 sign = (s64)__addr >> 63;					\
> > +	__addr &= (mm)->context.untag_mask | sign;			\
> > +	(__force __typeof__(addr))__addr;				\
> > +})
> > +
> 
> I think this implementation is correct, but I'm wondering if there are any
> callers of untagged_addr that actually need to preserve kernel addresses.
> Are there?  (There certainly *were* back when we had set_fs().)

I don't think there's any.

CONFIG_KASAN_SW_TAGS uses untagged_addr() on kernel addresses, but it is
only enabled on arm64. On x86, it will use CR4.LAM_SUP and the enabling
would require a new helper for untagging kernel addresses.

That said, I would rather stay on the safe side.

> I'm also mildly uneasy about a potential edge case.  Naively, one would
> expect:
> 
> untagged_addr(current->mm, addr) + size ==
> untagged_addr(current->mm, addr + size)
> 
> at least for an address that is valid enough to be potentially dereferenced.
> This isn't true any more for size that overflows into the tag bit range.

That's definitely a new edge case.

From quick grep, the only CONFIG_KASAN_SW_TAGS code obviously does
arithmetics on address before untagging.

> I *think* we're okay though -- __access_ok requires that addr <= limit -
> size, so any range that overflows into tag bits will be rejected even if the
> entire range consists of valid (tagged) user addresses.

True.

> So:
> 
> Acked-by: Andy Lutomirski <luto@kernel.org>

Thanks!
  

Patch

diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 002889ca8978..2fdb390040b5 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -43,6 +43,9 @@  typedef struct {
 
 	/* Active LAM mode:  X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */
 	unsigned long lam_cr3_mask;
+
+	/* Significant bits of the virtual address. Excludes tag bits. */
+	u64 untag_mask;
 #endif
 
 	struct mutex lock;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 69c943b2ae90..5bd3d46685dc 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -100,6 +100,12 @@  static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
 static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
 {
 	mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask;
+	mm->context.untag_mask = oldmm->context.untag_mask;
+}
+
+static inline void mm_reset_untag_mask(struct mm_struct *mm)
+{
+	mm->context.untag_mask = -1UL;
 }
 
 #else
@@ -112,6 +118,10 @@  static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
 static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
 {
 }
+
+static inline void mm_reset_untag_mask(struct mm_struct *mm)
+{
+}
 #endif
 
 #define enter_lazy_tlb enter_lazy_tlb
@@ -138,6 +148,7 @@  static inline int init_new_context(struct task_struct *tsk,
 		mm->context.execute_only_pkey = -1;
 	}
 #endif
+	mm_reset_untag_mask(mm);
 	init_new_context_ldt(mm);
 	return 0;
 }
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 8bc614cfe21b..c6062c07ccd2 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -7,6 +7,7 @@ 
 #include <linux/compiler.h>
 #include <linux/instrumented.h>
 #include <linux/kasan-checks.h>
+#include <linux/mm_types.h>
 #include <linux/string.h>
 #include <asm/asm.h>
 #include <asm/page.h>
@@ -21,6 +22,30 @@  static inline bool pagefault_disabled(void);
 # define WARN_ON_IN_IRQ()
 #endif
 
+#ifdef CONFIG_X86_64
+/*
+ * Mask out tag bits from the address.
+ *
+ * Magic with the 'sign' allows to untag userspace pointer without any branches
+ * while leaving kernel addresses intact.
+ */
+#define untagged_addr(mm, addr)	({					\
+	u64 __addr = (__force u64)(addr);				\
+	s64 sign = (s64)__addr >> 63;					\
+	__addr &= (mm)->context.untag_mask | sign;			\
+	(__force __typeof__(addr))__addr;				\
+})
+
+#define untagged_ptr(mm, ptr)	({					\
+	u64 __ptrval = (__force u64)(ptr);				\
+	__ptrval = untagged_addr(mm, __ptrval);				\
+	(__force __typeof__(*(ptr)) *)__ptrval;				\
+})
+#else
+#define untagged_addr(mm, addr)	(addr)
+#define untagged_ptr(mm, ptr)	(ptr)
+#endif
+
 /**
  * access_ok - Checks if a user space pointer is valid
  * @addr: User space pointer to start of block to check
@@ -41,7 +66,7 @@  static inline bool pagefault_disabled(void);
 #define access_ok(addr, size)					\
 ({									\
 	WARN_ON_IN_IRQ();						\
-	likely(__access_ok(addr, size));				\
+	likely(__access_ok(untagged_addr(current->mm, addr), size));	\
 })
 
 #include <asm-generic/access_ok.h>
@@ -127,7 +152,13 @@  extern int __get_user_bad(void);
  * Return: zero on success, or -EFAULT on error.
  * On error, the variable @x is set to zero.
  */
-#define get_user(x,ptr) ({ might_fault(); do_get_user_call(get_user,x,ptr); })
+#define get_user(x,ptr)							\
+({									\
+	__typeof__(*(ptr)) __user *__ptr_clean;				\
+	__ptr_clean = untagged_ptr(current->mm, ptr);			\
+	might_fault();							\
+	do_get_user_call(get_user,x,__ptr_clean);			\
+})
 
 /**
  * __get_user - Get a simple variable from user space, with less checking.
@@ -227,7 +258,12 @@  extern void __put_user_nocheck_8(void);
  *
  * Return: zero on success, or -EFAULT on error.
  */
-#define put_user(x, ptr) ({ might_fault(); do_put_user_call(put_user,x,ptr); })
+#define put_user(x, ptr) ({						\
+	__typeof__(*(ptr)) __user *__ptr_clean;				\
+	__ptr_clean = untagged_ptr(current->mm, ptr);			\
+	might_fault();							\
+	do_put_user_call(put_user,x,__ptr_clean);			\
+})
 
 /**
  * __put_user - Write a simple value into user space, with less checking.
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c21b7347a26d..d1e83ba21130 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -47,6 +47,7 @@ 
 #include <asm/frame.h>
 #include <asm/unwind.h>
 #include <asm/tdx.h>
+#include <asm/mmu_context.h>
 
 #include "process.h"
 
@@ -367,6 +368,8 @@  void arch_setup_new_exec(void)
 		task_clear_spec_ssb_noexec(current);
 		speculation_ctrl_update(read_thread_flags());
 	}
+
+	mm_reset_untag_mask(current->mm);
 }
 
 #ifdef CONFIG_X86_IOPL_IOPERM