[v8,1/4] RISC-V: mm: Restrict address space for sv39,sv48,sv57

Message ID 20230727212647.4182407-2-charlie@rivosinc.com
State New
Headers
Series RISC-V: mm: Make SV48 the default address space |

Commit Message

Charlie Jenkins July 27, 2023, 9:26 p.m. UTC
  Make sv48 the default address space for mmap as some applications
currently depend on this assumption. A hint address passed to mmap will
cause the largest address space that fits entirely into the hint to be
used. If the hint is less than or equal to 1<<38, an sv39 address will
be used. An exception is that if the hint address is 0, then a sv48
address will be used. After an address space is completely full, the next
smallest address space will be used.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
---
 arch/riscv/include/asm/elf.h       |  2 +-
 arch/riscv/include/asm/pgtable.h   | 20 +++++++++++-
 arch/riscv/include/asm/processor.h | 52 ++++++++++++++++++++++++++----
 3 files changed, 66 insertions(+), 8 deletions(-)
  

Comments

Alexandre Ghiti Aug. 6, 2023, 9:53 a.m. UTC | #1
On 27/07/2023 23:26, Charlie Jenkins wrote:
> Make sv48 the default address space for mmap as some applications
> currently depend on this assumption. A hint address passed to mmap will
> cause the largest address space that fits entirely into the hint to be
> used. If the hint is less than or equal to 1<<38, an sv39 address will
> be used. An exception is that if the hint address is 0, then a sv48
> address will be used. After an address space is completely full, the next
> smallest address space will be used.
>
> Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> ---
>   arch/riscv/include/asm/elf.h       |  2 +-
>   arch/riscv/include/asm/pgtable.h   | 20 +++++++++++-
>   arch/riscv/include/asm/processor.h | 52 ++++++++++++++++++++++++++----
>   3 files changed, 66 insertions(+), 8 deletions(-)
>
> diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
> index c24280774caf..5d3368d5585c 100644
> --- a/arch/riscv/include/asm/elf.h
> +++ b/arch/riscv/include/asm/elf.h
> @@ -49,7 +49,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
>    * the loader.  We need to make sure that it is out of the way of the program
>    * that it will "exec", and that there is sufficient room for the brk.
>    */
> -#define ELF_ET_DYN_BASE		((TASK_SIZE / 3) * 2)
> +#define ELF_ET_DYN_BASE		((DEFAULT_MAP_WINDOW / 3) * 2)
>   
>   #ifdef CONFIG_64BIT
>   #ifdef CONFIG_COMPAT
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index 75970ee2bda2..c76a1ef094a4 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -63,8 +63,26 @@
>    * position vmemmap directly below the VMALLOC region.
>    */
>   #ifdef CONFIG_64BIT
> +#define VA_BITS_SV39 39
> +#define VA_BITS_SV48 48
> +#define VA_BITS_SV57 57
> +
> +#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1))
> +#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1))
> +#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1))
> +
>   #define VA_BITS		(pgtable_l5_enabled ? \
> -				57 : (pgtable_l4_enabled ? 48 : 39))
> +				VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39))
> +
> +#ifdef CONFIG_COMPAT
> +#define MMAP_VA_BITS_64 ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS)
> +#define MMAP_MIN_VA_BITS_64 ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS)


Here the condition is always true right?


> +#define MMAP_VA_BITS (test_thread_flag(TIF_32BIT) ? 32 : MMAP_VA_BITS_64)
> +#define MMAP_MIN_VA_BITS (test_thread_flag(TIF_32BIT) ? 32 : MMAP_MIN_VA_BITS_64)


I think you should use is_compat_task() here instead of 
test_thread_flag(TIF_32BIT). And what about introducing VA_BITS_SV32 
instead of hardcoding 32?


> +#else
> +#define MMAP_VA_BITS ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS)
> +#define MMAP_MIN_VA_BITS ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS)


Ditto here.


> +#endif
>   #else
>   #define VA_BITS		32
>   #endif
> diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> index c950a8d9edef..e810244ea951 100644
> --- a/arch/riscv/include/asm/processor.h
> +++ b/arch/riscv/include/asm/processor.h
> @@ -13,19 +13,59 @@
>   
>   #include <asm/ptrace.h>
>   
> +#ifdef CONFIG_64BIT
> +#define DEFAULT_MAP_WINDOW	(UL(1) << (MMAP_VA_BITS - 1))
> +#define STACK_TOP_MAX		TASK_SIZE_64
> +
> +#define arch_get_mmap_end(addr, len, flags)	\
> +({	\
> +	unsigned long mmap_end;	\
> +	typeof(addr) _addr = (addr); \
> +	if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && test_thread_flag(TIF_32BIT))) \
> +		mmap_end = DEFAULT_MAP_WINDOW;	\


Wouldn't that prevent a sv57 system to allocate sv57 addresses when sv48 
is full unless explicitly asked?


> +	else if ((_addr) >= VA_USER_SV57)	\
> +		mmap_end = STACK_TOP_MAX;	\
> +	else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48))	\
> +		mmap_end = VA_USER_SV48;	\
> +	else	\
> +		mmap_end = VA_USER_SV39;	\
> +	mmap_end;	\
> +})
> +
> +#define arch_get_mmap_base(addr, base) \
> +({ \
> +	unsigned long mmap_base; \
> +	typeof(addr) _addr = (addr); \
> +	typeof(base) _base = (base); \
> +	unsigned long rnd_gap = (_base) - DEFAULT_MAP_WINDOW; \
> +	if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && test_thread_flag(TIF_32BIT))) \
> +		mmap_base = (_base); \
> +	else if (((_addr) >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) \
> +		mmap_base = VA_USER_SV57 + rnd_gap; \


Shouldn't it be mmap_base = VA_USER_SV57 - rnd_gap?


> +	else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \
> +		mmap_base = VA_USER_SV48 + rnd_gap; \
> +	else \
> +		mmap_base = VA_USER_SV39 + rnd_gap; \
> +	mmap_base; \
> +})
> +
> +#else
> +#define DEFAULT_MAP_WINDOW	TASK_SIZE
> +#define STACK_TOP_MAX		TASK_SIZE
> +#endif
> +#define STACK_ALIGN		16
> +
> +#define STACK_TOP		DEFAULT_MAP_WINDOW
> +
>   /*
>    * This decides where the kernel will search for a free chunk of vm
>    * space during mmap's.
>    */
> -#define TASK_UNMAPPED_BASE	PAGE_ALIGN(TASK_SIZE / 3)
> -
> -#define STACK_TOP		TASK_SIZE
>   #ifdef CONFIG_64BIT
> -#define STACK_TOP_MAX		TASK_SIZE_64
> +#define TASK_UNMAPPED_BASE	PAGE_ALIGN((UL(1) << MMAP_MIN_VA_BITS) / 3)
>   #else
> -#define STACK_TOP_MAX		TASK_SIZE
> +#define TASK_UNMAPPED_BASE	PAGE_ALIGN(TASK_SIZE / 3)
>   #endif
> -#define STACK_ALIGN		16
>   
>   #ifndef __ASSEMBLY__
>
  
Charlie Jenkins Aug. 7, 2023, 9:12 p.m. UTC | #2
On Sun, Aug 06, 2023 at 11:53:51AM +0200, Alexandre Ghiti wrote:
> 
> On 27/07/2023 23:26, Charlie Jenkins wrote:
> > Make sv48 the default address space for mmap as some applications
> > currently depend on this assumption. A hint address passed to mmap will
> > cause the largest address space that fits entirely into the hint to be
> > used. If the hint is less than or equal to 1<<38, an sv39 address will
> > be used. An exception is that if the hint address is 0, then a sv48
> > address will be used. After an address space is completely full, the next
> > smallest address space will be used.
> > 
> > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> > ---
> >   arch/riscv/include/asm/elf.h       |  2 +-
> >   arch/riscv/include/asm/pgtable.h   | 20 +++++++++++-
> >   arch/riscv/include/asm/processor.h | 52 ++++++++++++++++++++++++++----
> >   3 files changed, 66 insertions(+), 8 deletions(-)
> > 
> > diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
> > index c24280774caf..5d3368d5585c 100644
> > --- a/arch/riscv/include/asm/elf.h
> > +++ b/arch/riscv/include/asm/elf.h
> > @@ -49,7 +49,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
> >    * the loader.  We need to make sure that it is out of the way of the program
> >    * that it will "exec", and that there is sufficient room for the brk.
> >    */
> > -#define ELF_ET_DYN_BASE		((TASK_SIZE / 3) * 2)
> > +#define ELF_ET_DYN_BASE		((DEFAULT_MAP_WINDOW / 3) * 2)
> >   #ifdef CONFIG_64BIT
> >   #ifdef CONFIG_COMPAT
> > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> > index 75970ee2bda2..c76a1ef094a4 100644
> > --- a/arch/riscv/include/asm/pgtable.h
> > +++ b/arch/riscv/include/asm/pgtable.h
> > @@ -63,8 +63,26 @@
> >    * position vmemmap directly below the VMALLOC region.
> >    */
> >   #ifdef CONFIG_64BIT
> > +#define VA_BITS_SV39 39
> > +#define VA_BITS_SV48 48
> > +#define VA_BITS_SV57 57
> > +
> > +#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1))
> > +#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1))
> > +#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1))
> > +
> >   #define VA_BITS		(pgtable_l5_enabled ? \
> > -				57 : (pgtable_l4_enabled ? 48 : 39))
> > +				VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39))
> > +
> > +#ifdef CONFIG_COMPAT
> > +#define MMAP_VA_BITS_64 ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS)
> > +#define MMAP_MIN_VA_BITS_64 ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS)
> 
> 
> Here the condition is always true right?
> 
Yes, that condition is always true, I can eliminate the conditional.
> 
> > +#define MMAP_VA_BITS (test_thread_flag(TIF_32BIT) ? 32 : MMAP_VA_BITS_64)
> > +#define MMAP_MIN_VA_BITS (test_thread_flag(TIF_32BIT) ? 32 : MMAP_MIN_VA_BITS_64)
> 
> 
> I think you should use is_compat_task() here instead of
> test_thread_flag(TIF_32BIT). And what about introducing VA_BITS_SV32 instead
> of hardcoding 32?
> 
Sounds good.
> 
> > +#else
> > +#define MMAP_VA_BITS ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS)
> > +#define MMAP_MIN_VA_BITS ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS)
> 
> 
> Ditto here.
> 
> 
> > +#endif
> >   #else
> >   #define VA_BITS		32
> >   #endif
> > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> > index c950a8d9edef..e810244ea951 100644
> > --- a/arch/riscv/include/asm/processor.h
> > +++ b/arch/riscv/include/asm/processor.h
> > @@ -13,19 +13,59 @@
> >   #include <asm/ptrace.h>
> > +#ifdef CONFIG_64BIT
> > +#define DEFAULT_MAP_WINDOW	(UL(1) << (MMAP_VA_BITS - 1))
> > +#define STACK_TOP_MAX		TASK_SIZE_64
> > +
> > +#define arch_get_mmap_end(addr, len, flags)	\
> > +({	\
> > +	unsigned long mmap_end;	\
> > +	typeof(addr) _addr = (addr); \
> > +	if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && test_thread_flag(TIF_32BIT))) \
> > +		mmap_end = DEFAULT_MAP_WINDOW;	\
> 
> 
> Wouldn't that prevent a sv57 system to allocate sv57 addresses when sv48 is
> full unless explicitly asked?
> 
Yes that is a good point, that should be STACK_TOP_MAX as well.
> 
> > +	else if ((_addr) >= VA_USER_SV57)	\
> > +		mmap_end = STACK_TOP_MAX;	\
> > +	else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48))	\
> > +		mmap_end = VA_USER_SV48;	\
> > +	else	\
> > +		mmap_end = VA_USER_SV39;	\
> > +	mmap_end;	\
> > +})
> > +
> > +#define arch_get_mmap_base(addr, base) \
> > +({ \
> > +	unsigned long mmap_base; \
> > +	typeof(addr) _addr = (addr); \
> > +	typeof(base) _base = (base); \
> > +	unsigned long rnd_gap = (_base) - DEFAULT_MAP_WINDOW; \
> > +	if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && test_thread_flag(TIF_32BIT))) \
> > +		mmap_base = (_base); \
> > +	else if (((_addr) >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) \
> > +		mmap_base = VA_USER_SV57 + rnd_gap; \
> 
> 
> Shouldn't it be mmap_base = VA_USER_SV57 - rnd_gap?
> 
No, rnd_gap is a negative number here. 'base' is equal to
DEFAULT_MAP_WINDOW - gap - rnd. It does seem more clear if it is a
positive number so I will set rnd_gap to DEFAULT_MAP_WINDOW - (_base).
> 
> > +	else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \
> > +		mmap_base = VA_USER_SV48 + rnd_gap; \
> > +	else \
> > +		mmap_base = VA_USER_SV39 + rnd_gap; \
> > +	mmap_base; \
> > +})
> > +
> > +#else
> > +#define DEFAULT_MAP_WINDOW	TASK_SIZE
> > +#define STACK_TOP_MAX		TASK_SIZE
> > +#endif
> > +#define STACK_ALIGN		16
> > +
> > +#define STACK_TOP		DEFAULT_MAP_WINDOW
> > +
> >   /*
> >    * This decides where the kernel will search for a free chunk of vm
> >    * space during mmap's.
> >    */
> > -#define TASK_UNMAPPED_BASE	PAGE_ALIGN(TASK_SIZE / 3)
> > -
> > -#define STACK_TOP		TASK_SIZE
> >   #ifdef CONFIG_64BIT
> > -#define STACK_TOP_MAX		TASK_SIZE_64
> > +#define TASK_UNMAPPED_BASE	PAGE_ALIGN((UL(1) << MMAP_MIN_VA_BITS) / 3)
> >   #else
> > -#define STACK_TOP_MAX		TASK_SIZE
> > +#define TASK_UNMAPPED_BASE	PAGE_ALIGN(TASK_SIZE / 3)
> >   #endif
> > -#define STACK_ALIGN		16
> >   #ifndef __ASSEMBLY__
  

Patch

diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
index c24280774caf..5d3368d5585c 100644
--- a/arch/riscv/include/asm/elf.h
+++ b/arch/riscv/include/asm/elf.h
@@ -49,7 +49,7 @@  extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
  * the loader.  We need to make sure that it is out of the way of the program
  * that it will "exec", and that there is sufficient room for the brk.
  */
-#define ELF_ET_DYN_BASE		((TASK_SIZE / 3) * 2)
+#define ELF_ET_DYN_BASE		((DEFAULT_MAP_WINDOW / 3) * 2)
 
 #ifdef CONFIG_64BIT
 #ifdef CONFIG_COMPAT
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 75970ee2bda2..c76a1ef094a4 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -63,8 +63,26 @@ 
  * position vmemmap directly below the VMALLOC region.
  */
 #ifdef CONFIG_64BIT
+#define VA_BITS_SV39 39
+#define VA_BITS_SV48 48
+#define VA_BITS_SV57 57
+
+#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1))
+#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1))
+#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1))
+
 #define VA_BITS		(pgtable_l5_enabled ? \
-				57 : (pgtable_l4_enabled ? 48 : 39))
+				VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39))
+
+#ifdef CONFIG_COMPAT
+#define MMAP_VA_BITS_64 ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS)
+#define MMAP_MIN_VA_BITS_64 ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS)
+#define MMAP_VA_BITS (test_thread_flag(TIF_32BIT) ? 32 : MMAP_VA_BITS_64)
+#define MMAP_MIN_VA_BITS (test_thread_flag(TIF_32BIT) ? 32 : MMAP_MIN_VA_BITS_64)
+#else
+#define MMAP_VA_BITS ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS)
+#define MMAP_MIN_VA_BITS ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS)
+#endif
 #else
 #define VA_BITS		32
 #endif
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index c950a8d9edef..e810244ea951 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -13,19 +13,59 @@ 
 
 #include <asm/ptrace.h>
 
+#ifdef CONFIG_64BIT
+#define DEFAULT_MAP_WINDOW	(UL(1) << (MMAP_VA_BITS - 1))
+#define STACK_TOP_MAX		TASK_SIZE_64
+
+#define arch_get_mmap_end(addr, len, flags)	\
+({	\
+	unsigned long mmap_end;	\
+	typeof(addr) _addr = (addr); \
+	if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && test_thread_flag(TIF_32BIT))) \
+		mmap_end = DEFAULT_MAP_WINDOW;	\
+	else if ((_addr) >= VA_USER_SV57)	\
+		mmap_end = STACK_TOP_MAX;	\
+	else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48))	\
+		mmap_end = VA_USER_SV48;	\
+	else	\
+		mmap_end = VA_USER_SV39;	\
+	mmap_end;	\
+})
+
+#define arch_get_mmap_base(addr, base) \
+({ \
+	unsigned long mmap_base; \
+	typeof(addr) _addr = (addr); \
+	typeof(base) _base = (base); \
+	unsigned long rnd_gap = (_base) - DEFAULT_MAP_WINDOW; \
+	if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && test_thread_flag(TIF_32BIT))) \
+		mmap_base = (_base); \
+	else if (((_addr) >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) \
+		mmap_base = VA_USER_SV57 + rnd_gap; \
+	else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \
+		mmap_base = VA_USER_SV48 + rnd_gap; \
+	else \
+		mmap_base = VA_USER_SV39 + rnd_gap; \
+	mmap_base; \
+})
+
+#else
+#define DEFAULT_MAP_WINDOW	TASK_SIZE
+#define STACK_TOP_MAX		TASK_SIZE
+#endif
+#define STACK_ALIGN		16
+
+#define STACK_TOP		DEFAULT_MAP_WINDOW
+
 /*
  * This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-#define TASK_UNMAPPED_BASE	PAGE_ALIGN(TASK_SIZE / 3)
-
-#define STACK_TOP		TASK_SIZE
 #ifdef CONFIG_64BIT
-#define STACK_TOP_MAX		TASK_SIZE_64
+#define TASK_UNMAPPED_BASE	PAGE_ALIGN((UL(1) << MMAP_MIN_VA_BITS) / 3)
 #else
-#define STACK_TOP_MAX		TASK_SIZE
+#define TASK_UNMAPPED_BASE	PAGE_ALIGN(TASK_SIZE / 3)
 #endif
-#define STACK_ALIGN		16
 
 #ifndef __ASSEMBLY__