[3/9] x86/stackprotector/64: Convert stack protector to normal percpu variable

Message ID 20231023211730.40566-4-brgerst@gmail.com
State New
Headers
Series x86-64: Stack protector and percpu improvements |

Commit Message

Brian Gerst Oct. 23, 2023, 9:17 p.m. UTC
  Older versions of GCC fixed the location of the stack protector canary
at %gs:40.  This constraint forced the percpu section to be linked at
virtual address 0 so that the canary could be the first data object in
the percpu section.  Supporting the zero-based percpu section requires
additional code to handle relocations for RIP-relative references to
percpu data, extra complexity to kallsyms, and workarounds for linker
bugs due to the use of absolute symbols.

Since version 8.1, GCC has options to configure the location of the
canary value.  This allows the canary to be turned into a normal
percpu variable and removes the constraint that the percpu section
be zero-based.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/Kconfig                          |  5 ++-
 arch/x86/Makefile                         | 19 +++++++-----
 arch/x86/entry/entry_64.S                 |  2 +-
 arch/x86/include/asm/processor.h          | 15 +--------
 arch/x86/include/asm/stackprotector.h     | 37 +++++------------------
 arch/x86/kernel/asm-offsets_64.c          |  6 ----
 arch/x86/kernel/cpu/common.c              |  4 +--
 scripts/gcc-x86_64-has-stack-protector.sh |  4 ---
 8 files changed, 24 insertions(+), 68 deletions(-)
 delete mode 100755 scripts/gcc-x86_64-has-stack-protector.sh
  

Comments

Uros Bizjak Oct. 24, 2023, 12:39 p.m. UTC | #1
On Mon, Oct 23, 2023 at 11:17 PM Brian Gerst <brgerst@gmail.com> wrote:
>
> Older versions of GCC fixed the location of the stack protector canary
> at %gs:40.  This constraint forced the percpu section to be linked at
> virtual address 0 so that the canary could be the first data object in
> the percpu section.  Supporting the zero-based percpu section requires
> additional code to handle relocations for RIP-relative references to
> percpu data, extra complexity to kallsyms, and workarounds for linker
> bugs due to the use of absolute symbols.
>
> Since version 8.1, GCC has options to configure the location of the
> canary value.  This allows the canary to be turned into a normal
> percpu variable and removes the constraint that the percpu section
> be zero-based.
>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>
> ---
>  arch/x86/Kconfig                          |  5 ++-
>  arch/x86/Makefile                         | 19 +++++++-----
>  arch/x86/entry/entry_64.S                 |  2 +-
>  arch/x86/include/asm/processor.h          | 15 +--------
>  arch/x86/include/asm/stackprotector.h     | 37 +++++------------------
>  arch/x86/kernel/asm-offsets_64.c          |  6 ----
>  arch/x86/kernel/cpu/common.c              |  4 +--
>  scripts/gcc-x86_64-has-stack-protector.sh |  4 ---
>  8 files changed, 24 insertions(+), 68 deletions(-)
>  delete mode 100755 scripts/gcc-x86_64-has-stack-protector.sh
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 039872be1630..c95e0ce557da 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -410,12 +410,11 @@ config PGTABLE_LEVELS
>
>  config CC_HAS_SANE_STACKPROTECTOR
>         bool
> -       default $(success,$(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) $(CLANG_FLAGS)) if 64BIT
> +       default $(cc-option,-mstack-protector-guard-reg=gs -mstack-protector-guard-symbol=__stack_chk_guard) if 64BIT
>         default $(cc-option,-mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard)
>         help
>           We have to make sure stack protector is unconditionally disabled if
> -         the compiler produces broken code or if it does not let us control
> -         the segment on 32-bit kernels.
> +         the compiler does not allow control of the segment and symbol.

All compilers that support -mstack-protector-guard-reg=gs
-mstack-protector-guard-symbol=__stack_chk_guard correctly emit
segment prefix register, so checking for broken code here is indeed
overkill. gcc-x86_64-has-stack-protector.sh can rightfully be removed.

>
>  menu "Processor type and features"
>
> diff --git a/arch/x86/Makefile b/arch/x86/Makefile
> index 22e41d9dbc23..169f086997ec 100644
> --- a/arch/x86/Makefile
> +++ b/arch/x86/Makefile
> @@ -111,13 +111,7 @@ ifeq ($(CONFIG_X86_32),y)
>          # temporary until string.h is fixed
>          KBUILD_CFLAGS += -ffreestanding
>
> -       ifeq ($(CONFIG_STACKPROTECTOR),y)
> -               ifeq ($(CONFIG_SMP),y)
> -                       KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard
> -               else
> -                       KBUILD_CFLAGS += -mstack-protector-guard=global
> -               endif
> -       endif
> +       percpu_seg := fs
>  else
>          BITS := 64
>          UTS_MACHINE := x86_64
> @@ -167,6 +161,17 @@ else
>          KBUILD_CFLAGS += -mcmodel=kernel
>          KBUILD_RUSTFLAGS += -Cno-redzone=y
>          KBUILD_RUSTFLAGS += -Ccode-model=kernel
> +
> +       percpu_seg := gs
> +endif
> +
> +ifeq ($(CONFIG_STACKPROTECTOR),y)
> +       ifeq ($(CONFIG_SMP),y)
> +               KBUILD_CFLAGS += -mstack-protector-guard-reg=$(percpu_seg) \
> +                                -mstack-protector-guard-symbol=__stack_chk_guard

Maybe use here:

KBUILD_CFLAGS += -mstack-protector-guard-reg=$(percpu_seg)
KBUILD_CFLAGS += -mstack-protector-guard-symbol=__stack_chk_guard

Looks nicer...

Uros.

> +       else
> +               KBUILD_CFLAGS += -mstack-protector-guard=global
> +       endif
>  endif
>
>  #
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index 1a88ad8a7b48..cddcc236aaae 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -190,7 +190,7 @@ SYM_FUNC_START(__switch_to_asm)
>
>  #ifdef CONFIG_STACKPROTECTOR
>         movq    TASK_stack_canary(%rsi), %rbx
> -       movq    %rbx, PER_CPU_VAR(fixed_percpu_data + FIXED_stack_canary)
> +       movq    %rbx, PER_CPU_VAR(__stack_chk_guard)
>  #endif
>
>         /*
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index ae81a7191c1c..04371f60e3c6 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -394,16 +394,7 @@ struct irq_stack {
>
>  #ifdef CONFIG_X86_64
>  struct fixed_percpu_data {
> -       /*
> -        * GCC hardcodes the stack canary as %gs:40.  Since the
> -        * irq_stack is the object at %gs:0, we reserve the bottom
> -        * 48 bytes of the irq stack for the canary.
> -        *
> -        * Once we are willing to require -mstack-protector-guard-symbol=
> -        * support for x86_64 stackprotector, we can get rid of this.
> -        */
>         char            gs_base[40];
> -       unsigned long   stack_canary;
>  };
>
>  DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
> @@ -418,11 +409,7 @@ extern asmlinkage void entry_SYSCALL32_ignore(void);
>
>  /* Save actual FS/GS selectors and bases to current->thread */
>  void current_save_fsgs(void);
> -#else  /* X86_64 */
> -#ifdef CONFIG_STACKPROTECTOR
> -DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
> -#endif
> -#endif /* !X86_64 */
> +#endif /* X86_64 */
>
>  struct perf_event;
>
> diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
> index 00473a650f51..33abbd29ea26 100644
> --- a/arch/x86/include/asm/stackprotector.h
> +++ b/arch/x86/include/asm/stackprotector.h
> @@ -2,26 +2,13 @@
>  /*
>   * GCC stack protector support.
>   *
> - * Stack protector works by putting predefined pattern at the start of
> + * Stack protector works by putting a predefined pattern at the start of
>   * the stack frame and verifying that it hasn't been overwritten when
> - * returning from the function.  The pattern is called stack canary
> - * and unfortunately gcc historically required it to be at a fixed offset
> - * from the percpu segment base.  On x86_64, the offset is 40 bytes.
> + * returning from the function.  The pattern is called the stack canary
> + * and is a unique value for each task.
>   *
> - * The same segment is shared by percpu area and stack canary.  On
> - * x86_64, percpu symbols are zero based and %gs (64-bit) points to the
> - * base of percpu area.  The first occupant of the percpu area is always
> - * fixed_percpu_data which contains stack_canary at the appropriate
> - * offset.  On x86_32, the stack canary is just a regular percpu
> - * variable.
> - *
> - * Putting percpu data in %fs on 32-bit is a minor optimization compared to
> - * using %gs.  Since 32-bit userspace normally has %fs == 0, we are likely
> - * to load 0 into %fs on exit to usermode, whereas with percpu data in
> - * %gs, we are likely to load a non-null %gs on return to user mode.
> - *
> - * Once we are willing to require GCC 8.1 or better for 64-bit stackprotector
> - * support, we can remove some of this complexity.
> + * GCC is configured to read the stack canary value from the __stack_chk_guard
> + * per-cpu variable, which is changed on task switch.
>   */
>
>  #ifndef _ASM_STACKPROTECTOR_H
> @@ -36,6 +23,8 @@
>
>  #include <linux/sched.h>
>
> +DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
> +
>  /*
>   * Initialize the stackprotector canary value.
>   *
> @@ -51,25 +40,13 @@ static __always_inline void boot_init_stack_canary(void)
>  {
>         unsigned long canary = get_random_canary();
>
> -#ifdef CONFIG_X86_64
> -       BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
> -#endif
> -
>         current->stack_canary = canary;
> -#ifdef CONFIG_X86_64
> -       this_cpu_write(fixed_percpu_data.stack_canary, canary);
> -#else
>         this_cpu_write(__stack_chk_guard, canary);
> -#endif
>  }
>
>  static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
>  {
> -#ifdef CONFIG_X86_64
> -       per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary;
> -#else
>         per_cpu(__stack_chk_guard, cpu) = idle->stack_canary;
> -#endif
>  }
>
>  #else  /* STACKPROTECTOR */
> diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
> index bb65371ea9df..590b6cd0eac0 100644
> --- a/arch/x86/kernel/asm-offsets_64.c
> +++ b/arch/x86/kernel/asm-offsets_64.c
> @@ -54,11 +54,5 @@ int main(void)
>         BLANK();
>  #undef ENTRY
>
> -       BLANK();
> -
> -#ifdef CONFIG_STACKPROTECTOR
> -       OFFSET(FIXED_stack_canary, fixed_percpu_data, stack_canary);
> -       BLANK();
> -#endif
>         return 0;
>  }
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index b14fc8c1c953..f9c8bd27b642 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -2103,15 +2103,13 @@ void syscall_init(void)
>                X86_EFLAGS_AC|X86_EFLAGS_ID);
>  }
>
> -#else  /* CONFIG_X86_64 */
> +#endif /* CONFIG_X86_64 */
>
>  #ifdef CONFIG_STACKPROTECTOR
>  DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
>  EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
>  #endif
>
> -#endif /* CONFIG_X86_64 */
> -
>  /*
>   * Clear all 6 debug registers:
>   */
> diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh
> deleted file mode 100755
> index 75e4e22b986a..000000000000
> --- a/scripts/gcc-x86_64-has-stack-protector.sh
> +++ /dev/null
> @@ -1,4 +0,0 @@
> -#!/bin/sh
> -# SPDX-License-Identifier: GPL-2.0
> -
> -echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -m64 -O0 -mcmodel=kernel -fno-PIE -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
> --
> 2.41.0
>
  

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 039872be1630..c95e0ce557da 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -410,12 +410,11 @@  config PGTABLE_LEVELS
 
 config CC_HAS_SANE_STACKPROTECTOR
 	bool
-	default $(success,$(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) $(CLANG_FLAGS)) if 64BIT
+	default $(cc-option,-mstack-protector-guard-reg=gs -mstack-protector-guard-symbol=__stack_chk_guard) if 64BIT
 	default $(cc-option,-mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard)
 	help
 	  We have to make sure stack protector is unconditionally disabled if
-	  the compiler produces broken code or if it does not let us control
-	  the segment on 32-bit kernels.
+	  the compiler does not allow control of the segment and symbol.
 
 menu "Processor type and features"
 
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 22e41d9dbc23..169f086997ec 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -111,13 +111,7 @@  ifeq ($(CONFIG_X86_32),y)
         # temporary until string.h is fixed
         KBUILD_CFLAGS += -ffreestanding
 
-	ifeq ($(CONFIG_STACKPROTECTOR),y)
-		ifeq ($(CONFIG_SMP),y)
-			KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard
-		else
-			KBUILD_CFLAGS += -mstack-protector-guard=global
-		endif
-	endif
+	percpu_seg := fs
 else
         BITS := 64
         UTS_MACHINE := x86_64
@@ -167,6 +161,17 @@  else
         KBUILD_CFLAGS += -mcmodel=kernel
         KBUILD_RUSTFLAGS += -Cno-redzone=y
         KBUILD_RUSTFLAGS += -Ccode-model=kernel
+
+	percpu_seg := gs
+endif
+
+ifeq ($(CONFIG_STACKPROTECTOR),y)
+	ifeq ($(CONFIG_SMP),y)
+		KBUILD_CFLAGS += -mstack-protector-guard-reg=$(percpu_seg) \
+				 -mstack-protector-guard-symbol=__stack_chk_guard
+	else
+		KBUILD_CFLAGS += -mstack-protector-guard=global
+	endif
 endif
 
 #
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1a88ad8a7b48..cddcc236aaae 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -190,7 +190,7 @@  SYM_FUNC_START(__switch_to_asm)
 
 #ifdef CONFIG_STACKPROTECTOR
 	movq	TASK_stack_canary(%rsi), %rbx
-	movq	%rbx, PER_CPU_VAR(fixed_percpu_data + FIXED_stack_canary)
+	movq	%rbx, PER_CPU_VAR(__stack_chk_guard)
 #endif
 
 	/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ae81a7191c1c..04371f60e3c6 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -394,16 +394,7 @@  struct irq_stack {
 
 #ifdef CONFIG_X86_64
 struct fixed_percpu_data {
-	/*
-	 * GCC hardcodes the stack canary as %gs:40.  Since the
-	 * irq_stack is the object at %gs:0, we reserve the bottom
-	 * 48 bytes of the irq stack for the canary.
-	 *
-	 * Once we are willing to require -mstack-protector-guard-symbol=
-	 * support for x86_64 stackprotector, we can get rid of this.
-	 */
 	char		gs_base[40];
-	unsigned long	stack_canary;
 };
 
 DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
@@ -418,11 +409,7 @@  extern asmlinkage void entry_SYSCALL32_ignore(void);
 
 /* Save actual FS/GS selectors and bases to current->thread */
 void current_save_fsgs(void);
-#else	/* X86_64 */
-#ifdef CONFIG_STACKPROTECTOR
-DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
-#endif
-#endif	/* !X86_64 */
+#endif /* X86_64 */
 
 struct perf_event;
 
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 00473a650f51..33abbd29ea26 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -2,26 +2,13 @@ 
 /*
  * GCC stack protector support.
  *
- * Stack protector works by putting predefined pattern at the start of
+ * Stack protector works by putting a predefined pattern at the start of
  * the stack frame and verifying that it hasn't been overwritten when
- * returning from the function.  The pattern is called stack canary
- * and unfortunately gcc historically required it to be at a fixed offset
- * from the percpu segment base.  On x86_64, the offset is 40 bytes.
+ * returning from the function.  The pattern is called the stack canary
+ * and is a unique value for each task.
  *
- * The same segment is shared by percpu area and stack canary.  On
- * x86_64, percpu symbols are zero based and %gs (64-bit) points to the
- * base of percpu area.  The first occupant of the percpu area is always
- * fixed_percpu_data which contains stack_canary at the appropriate
- * offset.  On x86_32, the stack canary is just a regular percpu
- * variable.
- *
- * Putting percpu data in %fs on 32-bit is a minor optimization compared to
- * using %gs.  Since 32-bit userspace normally has %fs == 0, we are likely
- * to load 0 into %fs on exit to usermode, whereas with percpu data in
- * %gs, we are likely to load a non-null %gs on return to user mode.
- *
- * Once we are willing to require GCC 8.1 or better for 64-bit stackprotector
- * support, we can remove some of this complexity.
+ * GCC is configured to read the stack canary value from the __stack_chk_guard
+ * per-cpu variable, which is changed on task switch.
  */
 
 #ifndef _ASM_STACKPROTECTOR_H
@@ -36,6 +23,8 @@ 
 
 #include <linux/sched.h>
 
+DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
+
 /*
  * Initialize the stackprotector canary value.
  *
@@ -51,25 +40,13 @@  static __always_inline void boot_init_stack_canary(void)
 {
 	unsigned long canary = get_random_canary();
 
-#ifdef CONFIG_X86_64
-	BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
-#endif
-
 	current->stack_canary = canary;
-#ifdef CONFIG_X86_64
-	this_cpu_write(fixed_percpu_data.stack_canary, canary);
-#else
 	this_cpu_write(__stack_chk_guard, canary);
-#endif
 }
 
 static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
 {
-#ifdef CONFIG_X86_64
-	per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary;
-#else
 	per_cpu(__stack_chk_guard, cpu) = idle->stack_canary;
-#endif
 }
 
 #else	/* STACKPROTECTOR */
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index bb65371ea9df..590b6cd0eac0 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -54,11 +54,5 @@  int main(void)
 	BLANK();
 #undef ENTRY
 
-	BLANK();
-
-#ifdef CONFIG_STACKPROTECTOR
-	OFFSET(FIXED_stack_canary, fixed_percpu_data, stack_canary);
-	BLANK();
-#endif
 	return 0;
 }
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b14fc8c1c953..f9c8bd27b642 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2103,15 +2103,13 @@  void syscall_init(void)
 	       X86_EFLAGS_AC|X86_EFLAGS_ID);
 }
 
-#else	/* CONFIG_X86_64 */
+#endif	/* CONFIG_X86_64 */
 
 #ifdef CONFIG_STACKPROTECTOR
 DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
 EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
 #endif
 
-#endif	/* CONFIG_X86_64 */
-
 /*
  * Clear all 6 debug registers:
  */
diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh
deleted file mode 100755
index 75e4e22b986a..000000000000
--- a/scripts/gcc-x86_64-has-stack-protector.sh
+++ /dev/null
@@ -1,4 +0,0 @@ 
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -m64 -O0 -mcmodel=kernel -fno-PIE -fstack-protector - -o - 2> /dev/null | grep -q "%gs"