[v2] watchdog/hardlockup: Avoid large stack frames in watchdog_hardlockup_check()

Message ID 20230802122555.v2.1.I501ab68cb926ee33a7c87e063d207abf09b9943c@changeid
State New
Headers
Series [v2] watchdog/hardlockup: Avoid large stack frames in watchdog_hardlockup_check() |

Commit Message

Doug Anderson Aug. 2, 2023, 7:26 p.m. UTC
  After commit 77c12fc95980 ("watchdog/hardlockup: add a "cpu" param to
watchdog_hardlockup_check()") we started storing a `struct cpumask` on
the stack in watchdog_hardlockup_check(). On systems with
CONFIG_NR_CPUS set to 8192 this takes up 1K on the stack. That
triggers warnings with `CONFIG_FRAME_WARN` set to 1024.

Instead of putting this `struct cpumask` on the stack, we'll allocate
it on the heap whenever userspace tells us that they want to backtrace
all CPUs upon a hardlockup.

NOTE: the reason that this mask is even needed is to make sure that we
can print the hung CPU first, which makes the logs much easier to
understand.

Fixes: 77c12fc95980 ("watchdog/hardlockup: add a "cpu" param to watchdog_hardlockup_check()")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/r/202307310955.pLZDhpnl-lkp@intel.com
Signed-off-by: Douglas Anderson <dianders@chromium.org>
---

Changes in v2:
- Allocate space when userspace requests all cpus be backtraced.

 kernel/watchdog.c | 44 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 34 insertions(+), 10 deletions(-)
  

Comments

Michal Hocko Aug. 3, 2023, 7:48 a.m. UTC | #1
On Wed 02-08-23 12:26:00, Douglas Anderson wrote:
> After commit 77c12fc95980 ("watchdog/hardlockup: add a "cpu" param to
> watchdog_hardlockup_check()") we started storing a `struct cpumask` on
> the stack in watchdog_hardlockup_check(). On systems with
> CONFIG_NR_CPUS set to 8192 this takes up 1K on the stack. That
> triggers warnings with `CONFIG_FRAME_WARN` set to 1024.
> 
> Instead of putting this `struct cpumask` on the stack, we'll allocate
> it on the heap whenever userspace tells us that they want to backtrace
> all CPUs upon a hardlockup.
> 
> NOTE: the reason that this mask is even needed is to make sure that we
> can print the hung CPU first, which makes the logs much easier to
> understand.
> 
> Fixes: 77c12fc95980 ("watchdog/hardlockup: add a "cpu" param to watchdog_hardlockup_check()")
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/r/202307310955.pLZDhpnl-lkp@intel.com
> Signed-off-by: Douglas Anderson <dianders@chromium.org>
> ---
> 
> Changes in v2:
> - Allocate space when userspace requests all cpus be backtraced.
> 
>  kernel/watchdog.c | 44 ++++++++++++++++++++++++++++++++++----------
>  1 file changed, 34 insertions(+), 10 deletions(-)
> 
> diff --git a/kernel/watchdog.c b/kernel/watchdog.c
> index be38276a365f..25d5627a6580 100644
> --- a/kernel/watchdog.c
> +++ b/kernel/watchdog.c
> @@ -93,6 +93,8 @@ static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned);
>  static DEFINE_PER_CPU(bool, watchdog_hardlockup_touched);
>  static unsigned long watchdog_hardlockup_all_cpu_dumped;
>  
> +static struct cpumask *hardlockup_backtrace_mask;
> +
>  notrace void arch_touch_nmi_watchdog(void)
>  {
>  	/*
> @@ -106,6 +108,29 @@ notrace void arch_touch_nmi_watchdog(void)
>  }
>  EXPORT_SYMBOL(arch_touch_nmi_watchdog);
>  
> +static int hardlockup_all_cpu_backtrace_proc_handler(struct ctl_table *table, int write,
> +		  void *buffer, size_t *lenp, loff_t *ppos)
> +{
> +	int ret;
> +
> +	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
> +
> +	/*
> +	 * Only allocate memory for the backtrace mask if userspace actually
> +	 * wants to trace all CPUs since this can take up 1K of space on a
> +	 * system with CONFIG_NR_CPUS=8192.
> +	 */
> +	if (sysctl_hardlockup_all_cpu_backtrace && !hardlockup_backtrace_mask) {
> +		hardlockup_backtrace_mask =
> +			   kzalloc(sizeof(*hardlockup_backtrace_mask), GFP_KERNEL);
> +	} else if (!sysctl_hardlockup_all_cpu_backtrace && hardlockup_backtrace_mask) {
> +		kfree(hardlockup_backtrace_mask);
> +		hardlockup_backtrace_mask = NULL;
> +	}

While unlikely, this can race with the consumer and cause either
use-after-free or NULL ptr deref.
  

Patch

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index be38276a365f..25d5627a6580 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -93,6 +93,8 @@  static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned);
 static DEFINE_PER_CPU(bool, watchdog_hardlockup_touched);
 static unsigned long watchdog_hardlockup_all_cpu_dumped;
 
+static struct cpumask *hardlockup_backtrace_mask;
+
 notrace void arch_touch_nmi_watchdog(void)
 {
 	/*
@@ -106,6 +108,29 @@  notrace void arch_touch_nmi_watchdog(void)
 }
 EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
+static int hardlockup_all_cpu_backtrace_proc_handler(struct ctl_table *table, int write,
+		  void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+	/*
+	 * Only allocate memory for the backtrace mask if userspace actually
+	 * wants to trace all CPUs since this can take up 1K of space on a
+	 * system with CONFIG_NR_CPUS=8192.
+	 */
+	if (sysctl_hardlockup_all_cpu_backtrace && !hardlockup_backtrace_mask) {
+		hardlockup_backtrace_mask =
+			   kzalloc(sizeof(*hardlockup_backtrace_mask), GFP_KERNEL);
+	} else if (!sysctl_hardlockup_all_cpu_backtrace && hardlockup_backtrace_mask) {
+		kfree(hardlockup_backtrace_mask);
+		hardlockup_backtrace_mask = NULL;
+	}
+
+	return ret;
+}
+
 void watchdog_hardlockup_touch_cpu(unsigned int cpu)
 {
 	per_cpu(watchdog_hardlockup_touched, cpu) = true;
@@ -151,9 +176,6 @@  void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
 	 */
 	if (is_hardlockup(cpu)) {
 		unsigned int this_cpu = smp_processor_id();
-		struct cpumask backtrace_mask;
-
-		cpumask_copy(&backtrace_mask, cpu_online_mask);
 
 		/* Only print hardlockups once. */
 		if (per_cpu(watchdog_hardlockup_warned, cpu))
@@ -167,19 +189,20 @@  void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
 				show_regs(regs);
 			else
 				dump_stack();
-			cpumask_clear_cpu(cpu, &backtrace_mask);
 		} else {
-			if (trigger_single_cpu_backtrace(cpu))
-				cpumask_clear_cpu(cpu, &backtrace_mask);
+			trigger_single_cpu_backtrace(cpu);
 		}
 
 		/*
 		 * Perform multi-CPU dump only once to avoid multiple
 		 * hardlockups generating interleaving traces
 		 */
-		if (sysctl_hardlockup_all_cpu_backtrace &&
-		    !test_and_set_bit(0, &watchdog_hardlockup_all_cpu_dumped))
-			trigger_cpumask_backtrace(&backtrace_mask);
+		if (hardlockup_backtrace_mask &&
+		    !test_and_set_bit(0, &watchdog_hardlockup_all_cpu_dumped)) {
+			cpumask_copy(hardlockup_backtrace_mask, cpu_online_mask);
+			cpumask_clear_cpu(cpu, hardlockup_backtrace_mask);
+			trigger_cpumask_backtrace(hardlockup_backtrace_mask);
+		}
 
 		if (hardlockup_panic)
 			nmi_panic(regs, "Hard LOCKUP");
@@ -192,6 +215,7 @@  void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
 
 #else /* CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */
 
+#define hardlockup_all_cpu_backtrace_proc_handler proc_dointvec_minmax
 static inline void watchdog_hardlockup_kick(void) { }
 
 #endif /* !CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */
@@ -916,7 +940,7 @@  static struct ctl_table watchdog_sysctls[] = {
 		.data		= &sysctl_hardlockup_all_cpu_backtrace,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= hardlockup_all_cpu_backtrace_proc_handler,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},