On Fri, Aug 4, 2023 at 10:01 PM Douglas Anderson <dianders@chromium.org> wrote:
>
> The APIs that allow backtracing across CPUs have always had a way to
> exclude the current CPU. This convenience means callers didn't need to
> find a place to allocate a CPU mask just to handle the common case.
>
> Let's extend the API to take a CPU ID to exclude instead of just a
> boolean. This isn't any more complex for the API to handle and allows
> the hardlockup detector to exclude a different CPU (the one it already
> did a trace for) without needing to find space for a CPU mask.
>
> Arguably, this new API also encourages safer behavior. Specifically if
> the caller wants to avoid tracing the current CPU (maybe because they
> already traced the current CPU) this makes it more obvious to the
> caller that they need to make sure that the current CPU ID can't
> change.
>
> Acked-by: Michal Hocko <mhocko@suse.com>
> Signed-off-by: Douglas Anderson <dianders@chromium.org>
> ---
>
> Changes in v4:
> - Renamed trigger_allbutself_cpu_backtrace() for when trigger is unsupported.
>
> Changes in v3:
> - ("nmi_backtrace: Allow excluding an arbitrary CPU") new for v3.
>
> arch/arm/include/asm/irq.h | 2 +-
> arch/arm/kernel/smp.c | 4 ++--
> arch/loongarch/include/asm/irq.h | 2 +-
> arch/loongarch/kernel/process.c | 4 ++--
> arch/mips/include/asm/irq.h | 2 +-
> arch/mips/kernel/process.c | 4 ++--
> arch/powerpc/include/asm/irq.h | 2 +-
> arch/powerpc/kernel/stacktrace.c | 4 ++--
> arch/powerpc/kernel/watchdog.c | 4 ++--
> arch/sparc/include/asm/irq_64.h | 2 +-
> arch/sparc/kernel/process_64.c | 6 +++---
> arch/x86/include/asm/irq.h | 2 +-
> arch/x86/kernel/apic/hw_nmi.c | 4 ++--
> include/linux/nmi.h | 14 +++++++-------
> kernel/watchdog.c | 2 +-
> lib/nmi_backtrace.c | 6 +++---
> 16 files changed, 32 insertions(+), 32 deletions(-)
>
[...]
> diff --git a/include/linux/nmi.h b/include/linux/nmi.h
> index e3e6a64b98e0..7cf7801856a1 100644
> --- a/include/linux/nmi.h
> +++ b/include/linux/nmi.h
> @@ -157,31 +157,31 @@ static inline void touch_nmi_watchdog(void)
> #ifdef arch_trigger_cpumask_backtrace
> static inline bool trigger_all_cpu_backtrace(void)
> {
> - arch_trigger_cpumask_backtrace(cpu_online_mask, false);
> + arch_trigger_cpumask_backtrace(cpu_online_mask, -1);
> return true;
> }
>
> -static inline bool trigger_allbutself_cpu_backtrace(void)
> +static inline bool trigger_allbutcpu_cpu_backtrace(int exclude_cpu)
> {
> - arch_trigger_cpumask_backtrace(cpu_online_mask, true);
> + arch_trigger_cpumask_backtrace(cpu_online_mask, exclude_cpu);
> return true;
> }
>
> static inline bool trigger_cpumask_backtrace(struct cpumask *mask)
> {
> - arch_trigger_cpumask_backtrace(mask, false);
> + arch_trigger_cpumask_backtrace(mask, -1);
> return true;
> }
>
> static inline bool trigger_single_cpu_backtrace(int cpu)
> {
> - arch_trigger_cpumask_backtrace(cpumask_of(cpu), false);
> + arch_trigger_cpumask_backtrace(cpumask_of(cpu), -1);
> return true;
> }
>
> /* generic implementation */
> void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
> - bool exclude_self,
> + int exclude_cpu,
> void (*raise)(cpumask_t *mask));
> bool nmi_cpu_backtrace(struct pt_regs *regs);
>
> @@ -190,7 +190,7 @@ static inline bool trigger_all_cpu_backtrace(void)
> {
> return false;
> }
> -static inline bool trigger_allbutself_cpu_backtrace(void)
> +static inline bool trigger_allbutcpu_cpu_backtrace(void)
^
The parameter here is still wrong. It should be "int exclude_cpu".
This patch in Andrew's queue is causing build errors on next-20230807 on arm64:
kernel/watchdog.c: In function ‘watchdog_timer_fn’:
kernel/watchdog.c:521:25: error: too many arguments to function
‘trigger_allbutcpu_cpu_backtrace’
521 |
trigger_allbutcpu_cpu_backtrace(smp_processor_id());
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from kernel/watchdog.c:17:
./include/linux/nmi.h:193:20: note: declared here
193 | static inline bool trigger_allbutcpu_cpu_backtrace(void)
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
make[3]: *** [scripts/Makefile.build:243: kernel/watchdog.o] Error 1
ChenYu
@@ -32,7 +32,7 @@ void handle_IRQ(unsigned int, struct pt_regs *);
#include <linux/cpumask.h>
extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
- bool exclude_self);
+ int exclude_cpu);
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
#endif
@@ -846,7 +846,7 @@ static void raise_nmi(cpumask_t *mask)
__ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask);
}
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
{
- nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_nmi);
+ nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_nmi);
}
@@ -40,7 +40,7 @@ void spurious_interrupt(void);
#define NR_IRQS_LEGACY 16
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
-void arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_self);
+void arch_trigger_cpumask_backtrace(const struct cpumask *mask, int exclude_cpu);
#define MAX_IO_PICS 2
#define NR_IRQS (64 + (256 * MAX_IO_PICS))
@@ -345,9 +345,9 @@ static void raise_backtrace(cpumask_t *mask)
}
}
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
{
- nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace);
+ nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace);
}
#ifdef CONFIG_64BIT
@@ -77,7 +77,7 @@ extern int cp0_fdc_irq;
extern int get_c0_fdc_int(void);
void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
- bool exclude_self);
+ int exclude_cpu);
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
#endif /* _ASM_IRQ_H */
@@ -750,9 +750,9 @@ static void raise_backtrace(cpumask_t *mask)
}
}
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
{
- nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace);
+ nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace);
}
int mips_get_process_fp_mode(struct task_struct *task)
@@ -55,7 +55,7 @@ int irq_choose_cpu(const struct cpumask *mask);
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
- bool exclude_self);
+ int exclude_cpu);
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
#endif
@@ -221,8 +221,8 @@ static void raise_backtrace_ipi(cpumask_t *mask)
}
}
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
{
- nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
+ nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace_ipi);
}
#endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */
@@ -245,7 +245,7 @@ static void watchdog_smp_panic(int cpu)
__cpumask_clear_cpu(c, &wd_smp_cpus_ipi);
}
} else {
- trigger_allbutself_cpu_backtrace();
+ trigger_allbutcpu_cpu_backtrace(cpu);
cpumask_clear(&wd_smp_cpus_ipi);
}
@@ -416,7 +416,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi
if (sysctl_hardlockup_all_cpu_backtrace)
- trigger_allbutself_cpu_backtrace();
+ trigger_allbutcpu_cpu_backtrace(cpu);
if (hardlockup_panic)
nmi_panic(regs, "Hard LOCKUP");
@@ -87,7 +87,7 @@ static inline unsigned long get_softint(void)
}
void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
- bool exclude_self);
+ int exclude_cpu);
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
extern void *hardirq_stack[NR_CPUS];
@@ -236,7 +236,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp)
}
}
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
{
struct thread_info *tp = current_thread_info();
struct pt_regs *regs = get_irq_regs();
@@ -252,7 +252,7 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
- if (cpumask_test_cpu(this_cpu, mask) && !exclude_self)
+ if (cpumask_test_cpu(this_cpu, mask) && this_cpu != exclude_cpu)
__global_reg_self(tp, regs, this_cpu);
smp_fetch_global_regs();
@@ -260,7 +260,7 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
for_each_cpu(cpu, mask) {
struct global_reg_snapshot *gp;
- if (exclude_self && cpu == this_cpu)
+ if (cpu == exclude_cpu)
continue;
gp = &global_cpu_snapshot[cpu].reg;
@@ -42,7 +42,7 @@ extern void init_ISA_irqs(void);
#ifdef CONFIG_X86_LOCAL_APIC
void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
- bool exclude_self);
+ int exclude_cpu);
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
#endif
@@ -34,9 +34,9 @@ static void nmi_raise_cpu_backtrace(cpumask_t *mask)
apic->send_IPI_mask(mask, NMI_VECTOR);
}
-void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
{
- nmi_trigger_cpumask_backtrace(mask, exclude_self,
+ nmi_trigger_cpumask_backtrace(mask, exclude_cpu,
nmi_raise_cpu_backtrace);
}
@@ -157,31 +157,31 @@ static inline void touch_nmi_watchdog(void)
#ifdef arch_trigger_cpumask_backtrace
static inline bool trigger_all_cpu_backtrace(void)
{
- arch_trigger_cpumask_backtrace(cpu_online_mask, false);
+ arch_trigger_cpumask_backtrace(cpu_online_mask, -1);
return true;
}
-static inline bool trigger_allbutself_cpu_backtrace(void)
+static inline bool trigger_allbutcpu_cpu_backtrace(int exclude_cpu)
{
- arch_trigger_cpumask_backtrace(cpu_online_mask, true);
+ arch_trigger_cpumask_backtrace(cpu_online_mask, exclude_cpu);
return true;
}
static inline bool trigger_cpumask_backtrace(struct cpumask *mask)
{
- arch_trigger_cpumask_backtrace(mask, false);
+ arch_trigger_cpumask_backtrace(mask, -1);
return true;
}
static inline bool trigger_single_cpu_backtrace(int cpu)
{
- arch_trigger_cpumask_backtrace(cpumask_of(cpu), false);
+ arch_trigger_cpumask_backtrace(cpumask_of(cpu), -1);
return true;
}
/* generic implementation */
void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
- bool exclude_self,
+ int exclude_cpu,
void (*raise)(cpumask_t *mask));
bool nmi_cpu_backtrace(struct pt_regs *regs);
@@ -190,7 +190,7 @@ static inline bool trigger_all_cpu_backtrace(void)
{
return false;
}
-static inline bool trigger_allbutself_cpu_backtrace(void)
+static inline bool trigger_allbutcpu_cpu_backtrace(void)
{
return false;
}
@@ -523,7 +523,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
dump_stack();
if (softlockup_all_cpu_backtrace) {
- trigger_allbutself_cpu_backtrace();
+ trigger_allbutcpu_cpu_backtrace(smp_processor_id());
clear_bit_unlock(0, &soft_lockup_nmi_warn);
}
@@ -34,7 +34,7 @@ static unsigned long backtrace_flag;
* they are passed being updated as a side effect of this call.
*/
void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
- bool exclude_self,
+ int exclude_cpu,
void (*raise)(cpumask_t *mask))
{
int i, this_cpu = get_cpu();
@@ -49,8 +49,8 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
}
cpumask_copy(to_cpumask(backtrace_mask), mask);
- if (exclude_self)
- cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask));
+ if (exclude_cpu != -1)
+ cpumask_clear_cpu(exclude_cpu, to_cpumask(backtrace_mask));
/*
* Don't try to send an NMI to this cpu; it may work on some