ELF: supply userspace with available page shifts (AT_PAGE_SHIFT_LIST)
Commit Message
Report available page shifts in arch independent manner, so that poor
userspace developers won't have to parse /proc/cpuinfo hunting for
arch-specific flag strings:
unsigned long val = getauxval(AT_PAGE_SHIFT_LIST);
while (val && (val & 255) != 30) {
val >>= 8;
}
if (val) {
page_size_1gib = true;
} else {
page_size_1gib = false;
}
Note!
This is strictly for userspace, if some page size is shutdown due
to kernel command line option or CPU bug workaround, than is must not
be reported in aux vector!
x86_64 machine with 1 GiB pages:
$ hexdump -C /proc/self/auxv
00000030 06 00 00 00 00 00 00 00 00 10 00 00 00 00 00 00
00000040 1d 00 00 00 00 00 00 00 0c 15 1e 00 00 00 00 00
x86_64 machine with 2MiB pages only:
$ hexdump -C /proc/self/auxv
00000030 06 00 00 00 00 00 00 00 00 10 00 00 00 00 00 00
00000040 1d 00 00 00 00 00 00 00 0c 15 00 00 00 00 00 00
AT_PAGESZ is always 4096 which is not much information.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
arch/x86/include/asm/elf.h | 13 +++++++++++++
fs/binfmt_elf.c | 3 +++
include/uapi/linux/auxvec.h | 17 +++++++++++++++++
3 files changed, 33 insertions(+)
Comments
* Alexey Dobriyan:
> +/*
> + * Page sizes available for mmap(2) encoded as 1 page shift per byte in
> + * increasing order.
> + *
> + * Thus 32-bit systems get 4 shifts, 64-bit systems get 8 shifts tops.
Couldn't you use the bits in a long instead, to indicate which shifts
are present? That's always going to be enough.
Thanks,
Florian
On Tue, Dec 05, 2023 at 10:51:39AM +0100, Florian Weimer wrote:
> * Alexey Dobriyan:
>
> > +/*
> > + * Page sizes available for mmap(2) encoded as 1 page shift per byte in
> > + * increasing order.
> > + *
> > + * Thus 32-bit systems get 4 shifts, 64-bit systems get 8 shifts tops.
>
> Couldn't you use the bits in a long instead, to indicate which shifts
> are present? That's always going to be enough.
Yes!
I was so proud of myself for this line:
val |= 21 << (s += 8);
Now it is boring bitmask again :-)
@@ -358,6 +358,19 @@ else if (IS_ENABLED(CONFIG_IA32_EMULATION)) \
#define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000)
+#define ARCH_AT_PAGE_SHIFT_LIST \
+ do { \
+ u32 val = 12; \
+ int s = 0; \
+ if (boot_cpu_has(X86_FEATURE_PSE)) { \
+ val |= 21 << (s += 8); \
+ } \
+ if (boot_cpu_has(X86_FEATURE_GBPAGES)) { \
+ val |= 30 << (s += 8); \
+ } \
+ NEW_AUX_ENT(AT_PAGE_SHIFT_LIST, val); \
+ } while (0)
+
#endif /* !CONFIG_X86_32 */
#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso)
@@ -240,6 +240,9 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
#endif
NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
+#ifdef ARCH_AT_PAGE_SHIFT_LIST
+ ARCH_AT_PAGE_SHIFT_LIST;
+#endif
NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
NEW_AUX_ENT(AT_PHDR, phdr_addr);
NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
@@ -33,6 +33,23 @@
#define AT_RSEQ_FEATURE_SIZE 27 /* rseq supported feature size */
#define AT_RSEQ_ALIGN 28 /* rseq allocation alignment */
+/*
+ * Page sizes available for mmap(2) encoded as 1 page shift per byte in
+ * increasing order.
+ *
+ * Thus 32-bit systems get 4 shifts, 64-bit systems get 8 shifts tops.
+ *
+ * Example:
+ * x86_64 system with "pdpe1gb" reports 4 KiB, 2 MiB and 1 GiB page support.
+ *
+ * $ hexdump -C /proc/self/auxv
+ * 00000030 06 00 00 00 00 00 00 00 00 10 00 00 00 00 00 00
+ * 00000040 1d 00 00 00 00 00 00 00 0c 15 1e 00 00 00 00 00
+ *
+ * For 2^256 hugepage support please contact your Universe sales representative.
+ */
+#define AT_PAGE_SHIFT_LIST 29
+
#define AT_EXECFN 31 /* filename of program */
#ifndef AT_MINSIGSTKSZ