[v3,1/2] nmi_backtrace: Allow excluding an arbitrary CPU

Message ID 20230803160649.v3.1.Ia35521b91fc781368945161d7b28538f9996c182@changeid
State New
Headers
Series [v3,1/2] nmi_backtrace: Allow excluding an arbitrary CPU |

Commit Message

Doug Anderson Aug. 3, 2023, 11:07 p.m. UTC
  The APIs that allow backtracing across CPUs have always had a way to
exclude the current CPU. This convenience means callers didn't need to
find a place to allocate a CPU mask just to handle the common case.

Let's extend the API to take a CPU ID to exclude instead of just a
boolean. This isn't any more complex for the API to handle and allows
the hardlockup detector to exclude a different CPU (the one it already
did a trace for) without needing to find space for a CPU mask.

Arguably, this new API also encourages safer behavior. Specifically if
the caller wants to avoid tracing the current CPU (maybe because they
already traced the current CPU) this makes it more obvious to the
caller that they need to make sure that the current CPU ID can't
change.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
---

Changes in v3:
- ("nmi_backtrace: Allow excluding an arbitrary CPU") new for v3.

 arch/arm/include/asm/irq.h       |  2 +-
 arch/arm/kernel/smp.c            |  4 ++--
 arch/loongarch/include/asm/irq.h |  2 +-
 arch/loongarch/kernel/process.c  |  4 ++--
 arch/mips/include/asm/irq.h      |  2 +-
 arch/mips/kernel/process.c       |  4 ++--
 arch/powerpc/include/asm/irq.h   |  2 +-
 arch/powerpc/kernel/stacktrace.c |  4 ++--
 arch/powerpc/kernel/watchdog.c   |  4 ++--
 arch/sparc/include/asm/irq_64.h  |  2 +-
 arch/sparc/kernel/process_64.c   |  6 +++---
 arch/x86/include/asm/irq.h       |  2 +-
 arch/x86/kernel/apic/hw_nmi.c    |  4 ++--
 include/linux/nmi.h              | 12 ++++++------
 kernel/watchdog.c                |  2 +-
 lib/nmi_backtrace.c              |  6 +++---
 16 files changed, 31 insertions(+), 31 deletions(-)
  

Comments

Michal Hocko Aug. 4, 2023, 7:50 a.m. UTC | #1
On Thu 03-08-23 16:07:57, Douglas Anderson wrote:
> The APIs that allow backtracing across CPUs have always had a way to
> exclude the current CPU. This convenience means callers didn't need to
> find a place to allocate a CPU mask just to handle the common case.
> 
> Let's extend the API to take a CPU ID to exclude instead of just a
> boolean. This isn't any more complex for the API to handle and allows
> the hardlockup detector to exclude a different CPU (the one it already
> did a trace for) without needing to find space for a CPU mask.
> 
> Arguably, this new API also encourages safer behavior. Specifically if
> the caller wants to avoid tracing the current CPU (maybe because they
> already traced the current CPU) this makes it more obvious to the
> caller that they need to make sure that the current CPU ID can't
> change.

Yes, this looks like the best way forward.

It would have been slightly safer to modify arch_trigger_cpumask_backtrace
by switching arguments so that some leftovers are captured easier.

You also have this leftover
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 00982b133dc1..9f1743ee2b28 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -190,10 +190,6 @@ static inline bool trigger_all_cpu_backtrace(void)
 {
 	return false;
 }
-static inline bool trigger_allbutself_cpu_backtrace(void)
-{
-	return false;
-}
 static inline bool trigger_cpumask_backtrace(struct cpumask *mask)
 {
 	return false;

> Signed-off-by: Douglas Anderson <dianders@chromium.org>

Anyway
Acked-by: Michal Hocko <mhocko@suse.com>

> ---
> 
> Changes in v3:
> - ("nmi_backtrace: Allow excluding an arbitrary CPU") new for v3.
> 
>  arch/arm/include/asm/irq.h       |  2 +-
>  arch/arm/kernel/smp.c            |  4 ++--
>  arch/loongarch/include/asm/irq.h |  2 +-
>  arch/loongarch/kernel/process.c  |  4 ++--
>  arch/mips/include/asm/irq.h      |  2 +-
>  arch/mips/kernel/process.c       |  4 ++--
>  arch/powerpc/include/asm/irq.h   |  2 +-
>  arch/powerpc/kernel/stacktrace.c |  4 ++--
>  arch/powerpc/kernel/watchdog.c   |  4 ++--
>  arch/sparc/include/asm/irq_64.h  |  2 +-
>  arch/sparc/kernel/process_64.c   |  6 +++---
>  arch/x86/include/asm/irq.h       |  2 +-
>  arch/x86/kernel/apic/hw_nmi.c    |  4 ++--
>  include/linux/nmi.h              | 12 ++++++------
>  kernel/watchdog.c                |  2 +-
>  lib/nmi_backtrace.c              |  6 +++---
>  16 files changed, 31 insertions(+), 31 deletions(-)
> 
> diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h
> index 18605f1b3580..26c1d2ced4ce 100644
> --- a/arch/arm/include/asm/irq.h
> +++ b/arch/arm/include/asm/irq.h
> @@ -32,7 +32,7 @@ void handle_IRQ(unsigned int, struct pt_regs *);
>  #include <linux/cpumask.h>
>  
>  extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
> -					   bool exclude_self);
> +					   int exclude_cpu);
>  #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
>  #endif
>  
> diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
> index 6756203e45f3..3431c0553f45 100644
> --- a/arch/arm/kernel/smp.c
> +++ b/arch/arm/kernel/smp.c
> @@ -846,7 +846,7 @@ static void raise_nmi(cpumask_t *mask)
>  	__ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask);
>  }
>  
> -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
>  {
> -	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_nmi);
> +	nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_nmi);
>  }
> diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
> index a115e8999c69..218b4da0ea90 100644
> --- a/arch/loongarch/include/asm/irq.h
> +++ b/arch/loongarch/include/asm/irq.h
> @@ -40,7 +40,7 @@ void spurious_interrupt(void);
>  #define NR_IRQS_LEGACY 16
>  
>  #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
> -void arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_self);
> +void arch_trigger_cpumask_backtrace(const struct cpumask *mask, int exclude_cpu);
>  
>  #define MAX_IO_PICS 2
>  #define NR_IRQS	(64 + (256 * MAX_IO_PICS))
> diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
> index 2e04eb07abb6..778e8d09953e 100644
> --- a/arch/loongarch/kernel/process.c
> +++ b/arch/loongarch/kernel/process.c
> @@ -345,9 +345,9 @@ static void raise_backtrace(cpumask_t *mask)
>  	}
>  }
>  
> -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
>  {
> -	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace);
> +	nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace);
>  }
>  
>  #ifdef CONFIG_64BIT
> diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
> index 75abfa834ab7..3a848e7e69f7 100644
> --- a/arch/mips/include/asm/irq.h
> +++ b/arch/mips/include/asm/irq.h
> @@ -77,7 +77,7 @@ extern int cp0_fdc_irq;
>  extern int get_c0_fdc_int(void);
>  
>  void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
> -				    bool exclude_self);
> +				    int exclude_cpu);
>  #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
>  
>  #endif /* _ASM_IRQ_H */
> diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
> index a3225912c862..5387ed0a5186 100644
> --- a/arch/mips/kernel/process.c
> +++ b/arch/mips/kernel/process.c
> @@ -750,9 +750,9 @@ static void raise_backtrace(cpumask_t *mask)
>  	}
>  }
>  
> -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
>  {
> -	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace);
> +	nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace);
>  }
>  
>  int mips_get_process_fp_mode(struct task_struct *task)
> diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
> index f257cacb49a9..ba1a5974e714 100644
> --- a/arch/powerpc/include/asm/irq.h
> +++ b/arch/powerpc/include/asm/irq.h
> @@ -55,7 +55,7 @@ int irq_choose_cpu(const struct cpumask *mask);
>  
>  #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
>  extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
> -					   bool exclude_self);
> +					   int exclude_cpu);
>  #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
>  #endif
>  
> diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
> index 5de8597eaab8..b15f15dcacb5 100644
> --- a/arch/powerpc/kernel/stacktrace.c
> +++ b/arch/powerpc/kernel/stacktrace.c
> @@ -221,8 +221,8 @@ static void raise_backtrace_ipi(cpumask_t *mask)
>  	}
>  }
>  
> -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
>  {
> -	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
> +	nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace_ipi);
>  }
>  #endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */
> diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
> index edb2dd1f53eb..8c464a5d8246 100644
> --- a/arch/powerpc/kernel/watchdog.c
> +++ b/arch/powerpc/kernel/watchdog.c
> @@ -245,7 +245,7 @@ static void watchdog_smp_panic(int cpu)
>  			__cpumask_clear_cpu(c, &wd_smp_cpus_ipi);
>  		}
>  	} else {
> -		trigger_allbutself_cpu_backtrace();
> +		trigger_allbutcpu_cpu_backtrace(cpu);
>  		cpumask_clear(&wd_smp_cpus_ipi);
>  	}
>  
> @@ -416,7 +416,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
>  		xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi
>  
>  		if (sysctl_hardlockup_all_cpu_backtrace)
> -			trigger_allbutself_cpu_backtrace();
> +			trigger_allbutcpu_cpu_backtrace(cpu);
>  
>  		if (hardlockup_panic)
>  			nmi_panic(regs, "Hard LOCKUP");
> diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
> index b436029f1ced..8c4c0c87f998 100644
> --- a/arch/sparc/include/asm/irq_64.h
> +++ b/arch/sparc/include/asm/irq_64.h
> @@ -87,7 +87,7 @@ static inline unsigned long get_softint(void)
>  }
>  
>  void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
> -				    bool exclude_self);
> +				    int exclude_cpu);
>  #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
>  
>  extern void *hardirq_stack[NR_CPUS];
> diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
> index b51d8fb0ecdc..1ea3f37fa985 100644
> --- a/arch/sparc/kernel/process_64.c
> +++ b/arch/sparc/kernel/process_64.c
> @@ -236,7 +236,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp)
>  	}
>  }
>  
> -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
>  {
>  	struct thread_info *tp = current_thread_info();
>  	struct pt_regs *regs = get_irq_regs();
> @@ -252,7 +252,7 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
>  
>  	memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
>  
> -	if (cpumask_test_cpu(this_cpu, mask) && !exclude_self)
> +	if (cpumask_test_cpu(this_cpu, mask) && this_cpu != exclude_cpu)
>  		__global_reg_self(tp, regs, this_cpu);
>  
>  	smp_fetch_global_regs();
> @@ -260,7 +260,7 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
>  	for_each_cpu(cpu, mask) {
>  		struct global_reg_snapshot *gp;
>  
> -		if (exclude_self && cpu == this_cpu)
> +		if (cpu == exclude_cpu)
>  			continue;
>  
>  		gp = &global_cpu_snapshot[cpu].reg;
> diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
> index 29e083b92813..836c170d3087 100644
> --- a/arch/x86/include/asm/irq.h
> +++ b/arch/x86/include/asm/irq.h
> @@ -42,7 +42,7 @@ extern void init_ISA_irqs(void);
>  
>  #ifdef CONFIG_X86_LOCAL_APIC
>  void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
> -				    bool exclude_self);
> +				    int exclude_cpu);
>  
>  #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
>  #endif
> diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
> index 34a992e275ef..d6e01f924299 100644
> --- a/arch/x86/kernel/apic/hw_nmi.c
> +++ b/arch/x86/kernel/apic/hw_nmi.c
> @@ -34,9 +34,9 @@ static void nmi_raise_cpu_backtrace(cpumask_t *mask)
>  	apic->send_IPI_mask(mask, NMI_VECTOR);
>  }
>  
> -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
>  {
> -	nmi_trigger_cpumask_backtrace(mask, exclude_self,
> +	nmi_trigger_cpumask_backtrace(mask, exclude_cpu,
>  				      nmi_raise_cpu_backtrace);
>  }
>  
> diff --git a/include/linux/nmi.h b/include/linux/nmi.h
> index e3e6a64b98e0..00982b133dc1 100644
> --- a/include/linux/nmi.h
> +++ b/include/linux/nmi.h
> @@ -157,31 +157,31 @@ static inline void touch_nmi_watchdog(void)
>  #ifdef arch_trigger_cpumask_backtrace
>  static inline bool trigger_all_cpu_backtrace(void)
>  {
> -	arch_trigger_cpumask_backtrace(cpu_online_mask, false);
> +	arch_trigger_cpumask_backtrace(cpu_online_mask, -1);
>  	return true;
>  }
>  
> -static inline bool trigger_allbutself_cpu_backtrace(void)
> +static inline bool trigger_allbutcpu_cpu_backtrace(int exclude_cpu)
>  {
> -	arch_trigger_cpumask_backtrace(cpu_online_mask, true);
> +	arch_trigger_cpumask_backtrace(cpu_online_mask, exclude_cpu);
>  	return true;
>  }
>  
>  static inline bool trigger_cpumask_backtrace(struct cpumask *mask)
>  {
> -	arch_trigger_cpumask_backtrace(mask, false);
> +	arch_trigger_cpumask_backtrace(mask, -1);
>  	return true;
>  }
>  
>  static inline bool trigger_single_cpu_backtrace(int cpu)
>  {
> -	arch_trigger_cpumask_backtrace(cpumask_of(cpu), false);
> +	arch_trigger_cpumask_backtrace(cpumask_of(cpu), -1);
>  	return true;
>  }
>  
>  /* generic implementation */
>  void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
> -				   bool exclude_self,
> +				   int exclude_cpu,
>  				   void (*raise)(cpumask_t *mask));
>  bool nmi_cpu_backtrace(struct pt_regs *regs);
>  
> diff --git a/kernel/watchdog.c b/kernel/watchdog.c
> index be38276a365f..085d7a78f62f 100644
> --- a/kernel/watchdog.c
> +++ b/kernel/watchdog.c
> @@ -523,7 +523,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
>  			dump_stack();
>  
>  		if (softlockup_all_cpu_backtrace) {
> -			trigger_allbutself_cpu_backtrace();
> +			trigger_allbutcpu_cpu_backtrace(smp_processor_id());
>  			clear_bit_unlock(0, &soft_lockup_nmi_warn);
>  		}
>  
> diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
> index 5274bbb026d7..33c154264bfe 100644
> --- a/lib/nmi_backtrace.c
> +++ b/lib/nmi_backtrace.c
> @@ -34,7 +34,7 @@ static unsigned long backtrace_flag;
>   * they are passed being updated as a side effect of this call.
>   */
>  void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
> -				   bool exclude_self,
> +				   int exclude_cpu,
>  				   void (*raise)(cpumask_t *mask))
>  {
>  	int i, this_cpu = get_cpu();
> @@ -49,8 +49,8 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
>  	}
>  
>  	cpumask_copy(to_cpumask(backtrace_mask), mask);
> -	if (exclude_self)
> -		cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask));
> +	if (exclude_cpu != -1)
> +		cpumask_clear_cpu(exclude_cpu, to_cpumask(backtrace_mask));
>  
>  	/*
>  	 * Don't try to send an NMI to this cpu; it may work on some
> -- 
> 2.41.0.585.gd2178a4bd4-goog
  
Doug Anderson Aug. 4, 2023, 1:56 p.m. UTC | #2
Hi,

On Fri, Aug 4, 2023 at 12:50 AM Michal Hocko <mhocko@suse.com> wrote:
>
> On Thu 03-08-23 16:07:57, Douglas Anderson wrote:
> > The APIs that allow backtracing across CPUs have always had a way to
> > exclude the current CPU. This convenience means callers didn't need to
> > find a place to allocate a CPU mask just to handle the common case.
> >
> > Let's extend the API to take a CPU ID to exclude instead of just a
> > boolean. This isn't any more complex for the API to handle and allows
> > the hardlockup detector to exclude a different CPU (the one it already
> > did a trace for) without needing to find space for a CPU mask.
> >
> > Arguably, this new API also encourages safer behavior. Specifically if
> > the caller wants to avoid tracing the current CPU (maybe because they
> > already traced the current CPU) this makes it more obvious to the
> > caller that they need to make sure that the current CPU ID can't
> > change.
>
> Yes, this looks like the best way forward.
>
> It would have been slightly safer to modify arch_trigger_cpumask_backtrace
> by switching arguments so that some leftovers are captured easier.

I'm not sure I understand. Oh, you're saying make the prototype of
arch_trigger_cpumask_backtrace() incompatible so that if someone is
directly calling it then it'll be a compile-time error? I guess the
hope is that nobody is calling that directly and they're calling
through the trigger_...() functions.

For now I'm going to leave this alone.


> You also have this leftover
> diff --git a/include/linux/nmi.h b/include/linux/nmi.h
> index 00982b133dc1..9f1743ee2b28 100644
> --- a/include/linux/nmi.h
> +++ b/include/linux/nmi.h
> @@ -190,10 +190,6 @@ static inline bool trigger_all_cpu_backtrace(void)
>  {
>         return false;
>  }
> -static inline bool trigger_allbutself_cpu_backtrace(void)
> -{
> -       return false;
> -}

Ah yes. I missed that case. Let me send a quick v4.


>  static inline bool trigger_cpumask_backtrace(struct cpumask *mask)
>  {
>         return false;
>
> > Signed-off-by: Douglas Anderson <dianders@chromium.org>
>
> Anyway
> Acked-by: Michal Hocko <mhocko@suse.com>

Thanks!
  
Michal Hocko Aug. 4, 2023, 3:02 p.m. UTC | #3
On Fri 04-08-23 06:56:51, Doug Anderson wrote:
> Hi,
> 
> On Fri, Aug 4, 2023 at 12:50 AM Michal Hocko <mhocko@suse.com> wrote:
> >
> > On Thu 03-08-23 16:07:57, Douglas Anderson wrote:
> > > The APIs that allow backtracing across CPUs have always had a way to
> > > exclude the current CPU. This convenience means callers didn't need to
> > > find a place to allocate a CPU mask just to handle the common case.
> > >
> > > Let's extend the API to take a CPU ID to exclude instead of just a
> > > boolean. This isn't any more complex for the API to handle and allows
> > > the hardlockup detector to exclude a different CPU (the one it already
> > > did a trace for) without needing to find space for a CPU mask.
> > >
> > > Arguably, this new API also encourages safer behavior. Specifically if
> > > the caller wants to avoid tracing the current CPU (maybe because they
> > > already traced the current CPU) this makes it more obvious to the
> > > caller that they need to make sure that the current CPU ID can't
> > > change.
> >
> > Yes, this looks like the best way forward.
> >
> > It would have been slightly safer to modify arch_trigger_cpumask_backtrace
> > by switching arguments so that some leftovers are captured easier.
> 
> I'm not sure I understand. Oh, you're saying make the prototype of
> arch_trigger_cpumask_backtrace() incompatible so that if someone is
> directly calling it then it'll be a compile-time error? 

exactly. bool to int promotion would be too easy to miss while the
pointer to int would complain loudly.

> I guess the
> hope is that nobody is calling that directly and they're calling
> through the trigger_...() functions.

Hope is one thing, being preventive another.

> For now I'm going to leave this alone.

If you are going to send another version then please consider this. Not
a hard requirement but better.
  
Doug Anderson Aug. 4, 2023, 4:06 p.m. UTC | #4
Hi,

On Fri, Aug 4, 2023 at 8:02 AM Michal Hocko <mhocko@suse.com> wrote:
>
> > > It would have been slightly safer to modify arch_trigger_cpumask_backtrace
> > > by switching arguments so that some leftovers are captured easier.
> >
> > I'm not sure I understand. Oh, you're saying make the prototype of
> > arch_trigger_cpumask_backtrace() incompatible so that if someone is
> > directly calling it then it'll be a compile-time error?
>
> exactly. bool to int promotion would be too easy to miss while the
> pointer to int would complain loudly.
>
> > I guess the
> > hope is that nobody is calling that directly and they're calling
> > through the trigger_...() functions.
>
> Hope is one thing, being preventive another.
>
> > For now I'm going to leave this alone.
>
> If you are going to send another version then please consider this. Not
> a hard requirement but better.

If I do send another version, do you have any suggestions for how to
change this to make it incompatible? I guess swapping the order of the
parameters would be best? I considered doing that for v4 but I felt
like long term the current order of the parameters was better. I also
considered a rename, but that different problems. ;-) If I rename both
the #define and the function then if someone has an out-of-tree patch
adding arch_trigger_cpumask_backtrace() for another architecture, like
say arm64, then there would be no compile-time failure indicating that
the out-of-tree patch needs updating. I could rename the functions but
_not_ the #define, I guess?
  
Michal Hocko Aug. 7, 2023, 7:03 a.m. UTC | #5
On Fri 04-08-23 09:06:07, Doug Anderson wrote:
> Hi,
> 
> On Fri, Aug 4, 2023 at 8:02 AM Michal Hocko <mhocko@suse.com> wrote:
> >
> > > > It would have been slightly safer to modify arch_trigger_cpumask_backtrace
> > > > by switching arguments so that some leftovers are captured easier.
> > >
> > > I'm not sure I understand. Oh, you're saying make the prototype of
> > > arch_trigger_cpumask_backtrace() incompatible so that if someone is
> > > directly calling it then it'll be a compile-time error?
> >
> > exactly. bool to int promotion would be too easy to miss while the
> > pointer to int would complain loudly.
> >
> > > I guess the
> > > hope is that nobody is calling that directly and they're calling
> > > through the trigger_...() functions.
> >
> > Hope is one thing, being preventive another.
> >
> > > For now I'm going to leave this alone.
> >
> > If you are going to send another version then please consider this. Not
> > a hard requirement but better.
> 
> If I do send another version, do you have any suggestions for how to
> change this to make it incompatible?

I would swap parameters as this seems simplest.

> I guess swapping the order of the
> parameters would be best? I considered doing that for v4 but I felt
> like long term the current order of the parameters was better.

Yes the current ordering is better but having it other way around is not
really horrendous either.

> I also
> considered a rename, but that different problems. ;-) If I rename both
> the #define and the function then if someone has an out-of-tree patch
> adding arch_trigger_cpumask_backtrace() for another architecture, like
> say arm64, then there would be no compile-time failure indicating that
> the out-of-tree patch needs updating. I could rename the functions but
> _not_ the #define, I guess?

I think that swapping would be simplest as the type mismatch should
catch also pending out-of-tree potential implementations.
  

Patch

diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h
index 18605f1b3580..26c1d2ced4ce 100644
--- a/arch/arm/include/asm/irq.h
+++ b/arch/arm/include/asm/irq.h
@@ -32,7 +32,7 @@  void handle_IRQ(unsigned int, struct pt_regs *);
 #include <linux/cpumask.h>
 
 extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
-					   bool exclude_self);
+					   int exclude_cpu);
 #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
 #endif
 
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 6756203e45f3..3431c0553f45 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -846,7 +846,7 @@  static void raise_nmi(cpumask_t *mask)
 	__ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask);
 }
 
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
 {
-	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_nmi);
+	nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_nmi);
 }
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index a115e8999c69..218b4da0ea90 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -40,7 +40,7 @@  void spurious_interrupt(void);
 #define NR_IRQS_LEGACY 16
 
 #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
-void arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_self);
+void arch_trigger_cpumask_backtrace(const struct cpumask *mask, int exclude_cpu);
 
 #define MAX_IO_PICS 2
 #define NR_IRQS	(64 + (256 * MAX_IO_PICS))
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index 2e04eb07abb6..778e8d09953e 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -345,9 +345,9 @@  static void raise_backtrace(cpumask_t *mask)
 	}
 }
 
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
 {
-	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace);
+	nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace);
 }
 
 #ifdef CONFIG_64BIT
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index 75abfa834ab7..3a848e7e69f7 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -77,7 +77,7 @@  extern int cp0_fdc_irq;
 extern int get_c0_fdc_int(void);
 
 void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
-				    bool exclude_self);
+				    int exclude_cpu);
 #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
 
 #endif /* _ASM_IRQ_H */
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index a3225912c862..5387ed0a5186 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -750,9 +750,9 @@  static void raise_backtrace(cpumask_t *mask)
 	}
 }
 
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
 {
-	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace);
+	nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace);
 }
 
 int mips_get_process_fp_mode(struct task_struct *task)
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index f257cacb49a9..ba1a5974e714 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -55,7 +55,7 @@  int irq_choose_cpu(const struct cpumask *mask);
 
 #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
 extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
-					   bool exclude_self);
+					   int exclude_cpu);
 #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
 #endif
 
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 5de8597eaab8..b15f15dcacb5 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -221,8 +221,8 @@  static void raise_backtrace_ipi(cpumask_t *mask)
 	}
 }
 
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
 {
-	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
+	nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace_ipi);
 }
 #endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index edb2dd1f53eb..8c464a5d8246 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -245,7 +245,7 @@  static void watchdog_smp_panic(int cpu)
 			__cpumask_clear_cpu(c, &wd_smp_cpus_ipi);
 		}
 	} else {
-		trigger_allbutself_cpu_backtrace();
+		trigger_allbutcpu_cpu_backtrace(cpu);
 		cpumask_clear(&wd_smp_cpus_ipi);
 	}
 
@@ -416,7 +416,7 @@  DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
 		xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi
 
 		if (sysctl_hardlockup_all_cpu_backtrace)
-			trigger_allbutself_cpu_backtrace();
+			trigger_allbutcpu_cpu_backtrace(cpu);
 
 		if (hardlockup_panic)
 			nmi_panic(regs, "Hard LOCKUP");
diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
index b436029f1ced..8c4c0c87f998 100644
--- a/arch/sparc/include/asm/irq_64.h
+++ b/arch/sparc/include/asm/irq_64.h
@@ -87,7 +87,7 @@  static inline unsigned long get_softint(void)
 }
 
 void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
-				    bool exclude_self);
+				    int exclude_cpu);
 #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
 
 extern void *hardirq_stack[NR_CPUS];
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index b51d8fb0ecdc..1ea3f37fa985 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -236,7 +236,7 @@  static void __global_reg_poll(struct global_reg_snapshot *gp)
 	}
 }
 
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
 {
 	struct thread_info *tp = current_thread_info();
 	struct pt_regs *regs = get_irq_regs();
@@ -252,7 +252,7 @@  void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
 
 	memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
 
-	if (cpumask_test_cpu(this_cpu, mask) && !exclude_self)
+	if (cpumask_test_cpu(this_cpu, mask) && this_cpu != exclude_cpu)
 		__global_reg_self(tp, regs, this_cpu);
 
 	smp_fetch_global_regs();
@@ -260,7 +260,7 @@  void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
 	for_each_cpu(cpu, mask) {
 		struct global_reg_snapshot *gp;
 
-		if (exclude_self && cpu == this_cpu)
+		if (cpu == exclude_cpu)
 			continue;
 
 		gp = &global_cpu_snapshot[cpu].reg;
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 29e083b92813..836c170d3087 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -42,7 +42,7 @@  extern void init_ISA_irqs(void);
 
 #ifdef CONFIG_X86_LOCAL_APIC
 void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
-				    bool exclude_self);
+				    int exclude_cpu);
 
 #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
 #endif
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 34a992e275ef..d6e01f924299 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -34,9 +34,9 @@  static void nmi_raise_cpu_backtrace(cpumask_t *mask)
 	apic->send_IPI_mask(mask, NMI_VECTOR);
 }
 
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
 {
-	nmi_trigger_cpumask_backtrace(mask, exclude_self,
+	nmi_trigger_cpumask_backtrace(mask, exclude_cpu,
 				      nmi_raise_cpu_backtrace);
 }
 
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index e3e6a64b98e0..00982b133dc1 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -157,31 +157,31 @@  static inline void touch_nmi_watchdog(void)
 #ifdef arch_trigger_cpumask_backtrace
 static inline bool trigger_all_cpu_backtrace(void)
 {
-	arch_trigger_cpumask_backtrace(cpu_online_mask, false);
+	arch_trigger_cpumask_backtrace(cpu_online_mask, -1);
 	return true;
 }
 
-static inline bool trigger_allbutself_cpu_backtrace(void)
+static inline bool trigger_allbutcpu_cpu_backtrace(int exclude_cpu)
 {
-	arch_trigger_cpumask_backtrace(cpu_online_mask, true);
+	arch_trigger_cpumask_backtrace(cpu_online_mask, exclude_cpu);
 	return true;
 }
 
 static inline bool trigger_cpumask_backtrace(struct cpumask *mask)
 {
-	arch_trigger_cpumask_backtrace(mask, false);
+	arch_trigger_cpumask_backtrace(mask, -1);
 	return true;
 }
 
 static inline bool trigger_single_cpu_backtrace(int cpu)
 {
-	arch_trigger_cpumask_backtrace(cpumask_of(cpu), false);
+	arch_trigger_cpumask_backtrace(cpumask_of(cpu), -1);
 	return true;
 }
 
 /* generic implementation */
 void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
-				   bool exclude_self,
+				   int exclude_cpu,
 				   void (*raise)(cpumask_t *mask));
 bool nmi_cpu_backtrace(struct pt_regs *regs);
 
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index be38276a365f..085d7a78f62f 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -523,7 +523,7 @@  static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 			dump_stack();
 
 		if (softlockup_all_cpu_backtrace) {
-			trigger_allbutself_cpu_backtrace();
+			trigger_allbutcpu_cpu_backtrace(smp_processor_id());
 			clear_bit_unlock(0, &soft_lockup_nmi_warn);
 		}
 
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 5274bbb026d7..33c154264bfe 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -34,7 +34,7 @@  static unsigned long backtrace_flag;
  * they are passed being updated as a side effect of this call.
  */
 void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
-				   bool exclude_self,
+				   int exclude_cpu,
 				   void (*raise)(cpumask_t *mask))
 {
 	int i, this_cpu = get_cpu();
@@ -49,8 +49,8 @@  void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
 	}
 
 	cpumask_copy(to_cpumask(backtrace_mask), mask);
-	if (exclude_self)
-		cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask));
+	if (exclude_cpu != -1)
+		cpumask_clear_cpu(exclude_cpu, to_cpumask(backtrace_mask));
 
 	/*
 	 * Don't try to send an NMI to this cpu; it may work on some