[3/3] watchdog/softlockup: add parameter to control the reporting of time-consuming hardirq
Commit Message
To obtain a more accurate cause of softlockup, we use tracepoints to
measure the time of each hardirq, which may have some impact on
performance. A parameter could be added to allow users to enable
this feature on demand.
Signed-off-by: Bitao Hu <yaoma@linux.alibaba.com>
---
kernel/watchdog.c | 51 ++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 48 insertions(+), 3 deletions(-)
Comments
Hi Bitao,
kernel test robot noticed the following build warnings:
[auto build test WARNING on tip/irq/core]
[also build test WARNING on akpm-mm/mm-everything linus/master v6.8-rc1 next-20240125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Bitao-Hu/watchdog-softlockup-low-overhead-detection-of-interrupt-storm/20240123-201509
base: tip/irq/core
patch link: https://lore.kernel.org/r/20240123121223.22318-4-yaoma%40linux.alibaba.com
patch subject: [PATCH 3/3] watchdog/softlockup: add parameter to control the reporting of time-consuming hardirq
config: i386-buildonly-randconfig-003-20240126 (https://download.01.org/0day-ci/archive/20240126/202401261359.eaU4UnjQ-lkp@intel.com/config)
compiler: clang version 17.0.6 (https://github.com/llvm/llvm-project 6009708b4367171ccdbf4b5905cb6a803753fe18)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240126/202401261359.eaU4UnjQ-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202401261359.eaU4UnjQ-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> kernel/watchdog.c:1088:5: warning: no previous prototype for function 'proc_softlockup_irqtrace' [-Wmissing-prototypes]
1088 | int proc_softlockup_irqtrace(struct ctl_table *table, int write,
| ^
kernel/watchdog.c:1088:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
1088 | int proc_softlockup_irqtrace(struct ctl_table *table, int write,
| ^
| static
1 warning generated.
vim +/proc_softlockup_irqtrace +1088 kernel/watchdog.c
1084
1085 /*
1086 * /proc/sys/kernel/softlockup_irqtrace
1087 */
> 1088 int proc_softlockup_irqtrace(struct ctl_table *table, int write,
1089 void *buffer, size_t *lenp, loff_t *ppos)
1090 {
1091 int err, old;
1092
1093 mutex_lock(&watchdog_mutex);
1094
1095 old = READ_ONCE(softlockup_irqtrace);
1096 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
1097
1098 if (!err && write && old != READ_ONCE(softlockup_irqtrace))
1099 proc_watchdog_update();
1100
1101 mutex_unlock(&watchdog_mutex);
1102 return err;
1103 }
1104
Hi Bitao,
kernel test robot noticed the following build warnings:
[auto build test WARNING on tip/irq/core]
[also build test WARNING on akpm-mm/mm-everything linus/master v6.8-rc1 next-20240125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Bitao-Hu/watchdog-softlockup-low-overhead-detection-of-interrupt-storm/20240123-201509
base: tip/irq/core
patch link: https://lore.kernel.org/r/20240123121223.22318-4-yaoma%40linux.alibaba.com
patch subject: [PATCH 3/3] watchdog/softlockup: add parameter to control the reporting of time-consuming hardirq
config: i386-randconfig-012-20240126 (https://download.01.org/0day-ci/archive/20240126/202401261322.fGeoPvI9-lkp@intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240126/202401261322.fGeoPvI9-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202401261322.fGeoPvI9-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> kernel/watchdog.c:1088:5: warning: no previous prototype for 'proc_softlockup_irqtrace' [-Wmissing-prototypes]
1088 | int proc_softlockup_irqtrace(struct ctl_table *table, int write,
| ^~~~~~~~~~~~~~~~~~~~~~~~
vim +/proc_softlockup_irqtrace +1088 kernel/watchdog.c
1084
1085 /*
1086 * /proc/sys/kernel/softlockup_irqtrace
1087 */
> 1088 int proc_softlockup_irqtrace(struct ctl_table *table, int write,
1089 void *buffer, size_t *lenp, loff_t *ppos)
1090 {
1091 int err, old;
1092
1093 mutex_lock(&watchdog_mutex);
1094
1095 old = READ_ONCE(softlockup_irqtrace);
1096 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
1097
1098 if (!err && write && old != READ_ONCE(softlockup_irqtrace))
1099 proc_watchdog_update();
1100
1101 mutex_unlock(&watchdog_mutex);
1102 return err;
1103 }
1104
@@ -303,6 +303,9 @@ unsigned int __read_mostly softlockup_panic =
static bool softlockup_initialized __read_mostly;
static u64 __read_mostly sample_period;
+static int __read_mostly softlockup_irqtrace;
+static bool softlockup_irqtrace_initialized __read_mostly;
+
/* Timestamp taken after the last successful reschedule. */
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
/* Timestamp of the last softlockup report. */
@@ -318,6 +321,13 @@ static int __init softlockup_panic_setup(char *str)
}
__setup("softlockup_panic=", softlockup_panic_setup);
+static int __init softlockup_irqtrace_setup(char *str)
+{
+ get_option(&str, &softlockup_irqtrace);
+ return 1;
+}
+__setup("softlockup_irqtrace=", softlockup_irqtrace_setup);
+
static int __init nowatchdog_setup(char *str)
{
watchdog_user_enabled = 0;
@@ -635,7 +645,7 @@ static void print_hardirq_time(void)
u64 start_time, now, a;
u32 period_us, i, b;
- if (test_bit(SOFTLOCKUP_HARDIRQ, this_cpu_ptr(&softlockup_flags))) {
+ if (softlockup_irqtrace && test_bit(SOFTLOCKUP_HARDIRQ, this_cpu_ptr(&softlockup_flags))) {
start_time = __this_cpu_read(hardirq_start_time);
now = local_clock();
period_us = (now - start_time)/1000;
@@ -856,7 +866,10 @@ static void softlockup_stop_all(void)
if (!softlockup_initialized)
return;
- unhook_hardirq_events();
+ if (softlockup_irqtrace_initialized) {
+ unhook_hardirq_events();
+ softlockup_irqtrace_initialized = false;
+ }
for_each_cpu(cpu, &watchdog_allowed_mask)
smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false);
@@ -874,7 +887,10 @@ static void softlockup_start_all(void)
{
int cpu;
- hook_hardirq_events();
+ if (softlockup_irqtrace && !softlockup_irqtrace_initialized) {
+ hook_hardirq_events();
+ softlockup_irqtrace_initialized = true;
+ }
cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
for_each_cpu(cpu, &watchdog_allowed_mask)
@@ -1090,6 +1106,26 @@ int proc_watchdog_thresh(struct ctl_table *table, int write,
return err;
}
+/*
+ * /proc/sys/kernel/softlockup_irqtrace
+ */
+int proc_softlockup_irqtrace(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int err, old;
+
+ mutex_lock(&watchdog_mutex);
+
+ old = READ_ONCE(softlockup_irqtrace);
+ err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ if (!err && write && old != READ_ONCE(softlockup_irqtrace))
+ proc_watchdog_update();
+
+ mutex_unlock(&watchdog_mutex);
+ return err;
+}
+
/*
* The cpumask is the mask of possible cpus that the watchdog can run
* on, not the mask of cpus it is actually running on. This allows the
@@ -1158,6 +1194,15 @@ static struct ctl_table watchdog_sysctls[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+ {
+ .procname = "softlockup_irqtrace",
+ .data = &softlockup_irqtrace,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_softlockup_irqtrace,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
#ifdef CONFIG_SMP
{
.procname = "softlockup_all_cpu_backtrace",