[v3] ELF: AT_PAGE_SHIFT_MASK -- supply userspace with available page shifts

Message ID 8582f7c9-b49d-4d21-8948-59d580e5317c@p183
State New
Headers
Series [v3] ELF: AT_PAGE_SHIFT_MASK -- supply userspace with available page shifts |

Commit Message

Alexey Dobriyan Dec. 7, 2023, 6:44 p.m. UTC
  Report available page shifts in arch independent manner, so that
userspace developers won't have to parse /proc/cpuinfo hunting
for arch specific strings.

Main users are supposed to be libhugetlbfs-like libraries which try
to abstract huge mappings across multiple architectures. Regular code
which queries hugepage support before using them benefits too because
it doesn't have to deal with descriptors and parsing sysfs hierarchies
while enjoying the simplicity and speed of getauxval(AT_PAGE_SHIFT_MASK).

Note!

This is strictly for userspace, if some page size is shutdown due
to kernel command line option or CPU bug workaround, than it must
not be reported in aux vector!

x86_64 machine with 1 GiB pages:

	00000030  06 00 00 00 00 00 00 00  00 10 00 00 00 00 00 00
	00000040  1d 00 00 00 00 00 00 00  00 10 20 40 00 00 00 00

x86_64 machine with 2 MiB pages only:

	00000030  06 00 00 00 00 00 00 00  00 10 00 00 00 00 00 00
	00000040  1d 00 00 00 00 00 00 00  00 10 20 00 00 00 00 00

AT_PAGESZ always reports one smallest page size which is not interesting.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

	v3: better comment and changelog
	v2: switch to page shifts, rename to ARCH_AT_PAGE_SHIFT_MASK

 arch/x86/include/asm/elf.h  |   12 ++++++++++++
 fs/binfmt_elf.c             |    3 +++
 include/uapi/linux/auxvec.h |   13 +++++++++++++
 3 files changed, 28 insertions(+)
  

Comments

Kees Cook Dec. 12, 2023, 9:09 p.m. UTC | #1
On Thu, Dec 07, 2023 at 09:44:33PM +0300, Alexey Dobriyan wrote:
> Report available page shifts in arch independent manner, so that
> userspace developers won't have to parse /proc/cpuinfo hunting
> for arch specific strings.
> 
> Main users are supposed to be libhugetlbfs-like libraries which try
> to abstract huge mappings across multiple architectures. Regular code
> which queries hugepage support before using them benefits too because
> it doesn't have to deal with descriptors and parsing sysfs hierarchies
> while enjoying the simplicity and speed of getauxval(AT_PAGE_SHIFT_MASK).
> 
> Note!
> 
> This is strictly for userspace, if some page size is shutdown due
> to kernel command line option or CPU bug workaround, than it must
> not be reported in aux vector!
> 
> x86_64 machine with 1 GiB pages:
> 
> 	00000030  06 00 00 00 00 00 00 00  00 10 00 00 00 00 00 00
> 	00000040  1d 00 00 00 00 00 00 00  00 10 20 40 00 00 00 00
> 
> x86_64 machine with 2 MiB pages only:
> 
> 	00000030  06 00 00 00 00 00 00 00  00 10 00 00 00 00 00 00
> 	00000040  1d 00 00 00 00 00 00 00  00 10 20 00 00 00 00 00
> 
> AT_PAGESZ always reports one smallest page size which is not interesting.
> 
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> ---
> 
> 	v3: better comment and changelog
> 	v2: switch to page shifts, rename to ARCH_AT_PAGE_SHIFT_MASK
> 
>  arch/x86/include/asm/elf.h  |   12 ++++++++++++
>  fs/binfmt_elf.c             |    3 +++
>  include/uapi/linux/auxvec.h |   13 +++++++++++++
>  3 files changed, 28 insertions(+)
> 
> --- a/arch/x86/include/asm/elf.h
> +++ b/arch/x86/include/asm/elf.h
> @@ -358,6 +358,18 @@ else if (IS_ENABLED(CONFIG_IA32_EMULATION))				\
>  
>  #define COMPAT_ELF_ET_DYN_BASE	(TASK_UNMAPPED_BASE + 0x1000000)
>  
> +#define ARCH_AT_PAGE_SHIFT_MASK					\
> +	do {							\
> +		u32 val = 1 << 12;				\
> +		if (boot_cpu_has(X86_FEATURE_PSE)) {		\
> +			val |= 1 << 21;				\
> +		}						\
> +		if (boot_cpu_has(X86_FEATURE_GBPAGES)) {	\
> +			val |= 1 << 30;				\
> +		}						\
> +		NEW_AUX_ENT(AT_PAGE_SHIFT_MASK, val);		\
> +	} while (0)
> +
>  #endif /* !CONFIG_X86_32 */
>  
>  #define VDSO_CURRENT_BASE	((unsigned long)current->mm->context.vdso)

If I can get an Ack from x86 maintainers for this, I can carry it in my
execve tree.

Thanks for the updates to the commit log and comments, it reads better
now.

-Kees

> --- a/fs/binfmt_elf.c
> +++ b/fs/binfmt_elf.c
> @@ -240,6 +240,9 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
>  #endif
>  	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
>  	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
> +#ifdef ARCH_AT_PAGE_SHIFT_MASK
> +	ARCH_AT_PAGE_SHIFT_MASK;
> +#endif
>  	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
>  	NEW_AUX_ENT(AT_PHDR, phdr_addr);
>  	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
> --- a/include/uapi/linux/auxvec.h
> +++ b/include/uapi/linux/auxvec.h
> @@ -33,6 +33,19 @@
>  #define AT_RSEQ_FEATURE_SIZE	27	/* rseq supported feature size */
>  #define AT_RSEQ_ALIGN		28	/* rseq allocation alignment */
>  
> +/*
> + * All page sizes supported by CPU encoded as bitmask.
> + *
> + * Example: x86_64 system with pse, pdpe1gb /proc/cpuinfo flags
> + * reports 4 KiB, 2 MiB and 1 GiB page support.
> + *
> + *	$ LD_SHOW_AUXV=1 $(which true) | grep -e AT_PAGE_SHIFT_MASK
> + *	AT_PAGE_SHIFT_MASK: 0x40201000
> + *
> + * For 2^64 hugepage support please contact your Universe sales representative.
> + */
> +#define AT_PAGE_SHIFT_MASK	29
> +
>  #define AT_EXECFN  31	/* filename of program */
>  
>  #ifndef AT_MINSIGSTKSZ
  

Patch

--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -358,6 +358,18 @@  else if (IS_ENABLED(CONFIG_IA32_EMULATION))				\
 
 #define COMPAT_ELF_ET_DYN_BASE	(TASK_UNMAPPED_BASE + 0x1000000)
 
+#define ARCH_AT_PAGE_SHIFT_MASK					\
+	do {							\
+		u32 val = 1 << 12;				\
+		if (boot_cpu_has(X86_FEATURE_PSE)) {		\
+			val |= 1 << 21;				\
+		}						\
+		if (boot_cpu_has(X86_FEATURE_GBPAGES)) {	\
+			val |= 1 << 30;				\
+		}						\
+		NEW_AUX_ENT(AT_PAGE_SHIFT_MASK, val);		\
+	} while (0)
+
 #endif /* !CONFIG_X86_32 */
 
 #define VDSO_CURRENT_BASE	((unsigned long)current->mm->context.vdso)
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -240,6 +240,9 @@  create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
 #endif
 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
+#ifdef ARCH_AT_PAGE_SHIFT_MASK
+	ARCH_AT_PAGE_SHIFT_MASK;
+#endif
 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 	NEW_AUX_ENT(AT_PHDR, phdr_addr);
 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
--- a/include/uapi/linux/auxvec.h
+++ b/include/uapi/linux/auxvec.h
@@ -33,6 +33,19 @@ 
 #define AT_RSEQ_FEATURE_SIZE	27	/* rseq supported feature size */
 #define AT_RSEQ_ALIGN		28	/* rseq allocation alignment */
 
+/*
+ * All page sizes supported by CPU encoded as bitmask.
+ *
+ * Example: x86_64 system with pse, pdpe1gb /proc/cpuinfo flags
+ * reports 4 KiB, 2 MiB and 1 GiB page support.
+ *
+ *	$ LD_SHOW_AUXV=1 $(which true) | grep -e AT_PAGE_SHIFT_MASK
+ *	AT_PAGE_SHIFT_MASK: 0x40201000
+ *
+ * For 2^64 hugepage support please contact your Universe sales representative.
+ */
+#define AT_PAGE_SHIFT_MASK	29
+
 #define AT_EXECFN  31	/* filename of program */
 
 #ifndef AT_MINSIGSTKSZ