[3/3] watchdog/softlockup: add parameter to control the reporting of time-consuming hardirq

Message ID 20240123121223.22318-4-yaoma@linux.alibaba.com
State New
Headers
Series *** Detect interrupt storm in softlockup *** |

Commit Message

Bitao Hu Jan. 23, 2024, 12:12 p.m. UTC
  To obtain a more accurate cause of softlockup, we use tracepoints to
measure the time of each hardirq, which may have some impact on
performance. A parameter could be added to allow users to enable
this feature on demand.

Signed-off-by: Bitao Hu <yaoma@linux.alibaba.com>
---
 kernel/watchdog.c | 51 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 48 insertions(+), 3 deletions(-)
  

Comments

kernel test robot Jan. 26, 2024, 5:25 a.m. UTC | #1
Hi Bitao,

kernel test robot noticed the following build warnings:

[auto build test WARNING on tip/irq/core]
[also build test WARNING on akpm-mm/mm-everything linus/master v6.8-rc1 next-20240125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Bitao-Hu/watchdog-softlockup-low-overhead-detection-of-interrupt-storm/20240123-201509
base:   tip/irq/core
patch link:    https://lore.kernel.org/r/20240123121223.22318-4-yaoma%40linux.alibaba.com
patch subject: [PATCH 3/3] watchdog/softlockup: add parameter to control the reporting of time-consuming hardirq
config: i386-buildonly-randconfig-003-20240126 (https://download.01.org/0day-ci/archive/20240126/202401261359.eaU4UnjQ-lkp@intel.com/config)
compiler: clang version 17.0.6 (https://github.com/llvm/llvm-project 6009708b4367171ccdbf4b5905cb6a803753fe18)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240126/202401261359.eaU4UnjQ-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202401261359.eaU4UnjQ-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> kernel/watchdog.c:1088:5: warning: no previous prototype for function 'proc_softlockup_irqtrace' [-Wmissing-prototypes]
    1088 | int proc_softlockup_irqtrace(struct ctl_table *table, int write,
         |     ^
   kernel/watchdog.c:1088:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
    1088 | int proc_softlockup_irqtrace(struct ctl_table *table, int write,
         | ^
         | static 
   1 warning generated.


vim +/proc_softlockup_irqtrace +1088 kernel/watchdog.c

  1084	
  1085	/*
  1086	 * /proc/sys/kernel/softlockup_irqtrace
  1087	 */
> 1088	int proc_softlockup_irqtrace(struct ctl_table *table, int write,
  1089				 void *buffer, size_t *lenp, loff_t *ppos)
  1090	{
  1091		int err, old;
  1092	
  1093		mutex_lock(&watchdog_mutex);
  1094	
  1095		old = READ_ONCE(softlockup_irqtrace);
  1096		err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  1097	
  1098		if (!err && write && old != READ_ONCE(softlockup_irqtrace))
  1099			proc_watchdog_update();
  1100	
  1101		mutex_unlock(&watchdog_mutex);
  1102		return err;
  1103	}
  1104
  
kernel test robot Jan. 26, 2024, 6:07 a.m. UTC | #2
Hi Bitao,

kernel test robot noticed the following build warnings:

[auto build test WARNING on tip/irq/core]
[also build test WARNING on akpm-mm/mm-everything linus/master v6.8-rc1 next-20240125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Bitao-Hu/watchdog-softlockup-low-overhead-detection-of-interrupt-storm/20240123-201509
base:   tip/irq/core
patch link:    https://lore.kernel.org/r/20240123121223.22318-4-yaoma%40linux.alibaba.com
patch subject: [PATCH 3/3] watchdog/softlockup: add parameter to control the reporting of time-consuming hardirq
config: i386-randconfig-012-20240126 (https://download.01.org/0day-ci/archive/20240126/202401261322.fGeoPvI9-lkp@intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240126/202401261322.fGeoPvI9-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202401261322.fGeoPvI9-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> kernel/watchdog.c:1088:5: warning: no previous prototype for 'proc_softlockup_irqtrace' [-Wmissing-prototypes]
    1088 | int proc_softlockup_irqtrace(struct ctl_table *table, int write,
         |     ^~~~~~~~~~~~~~~~~~~~~~~~


vim +/proc_softlockup_irqtrace +1088 kernel/watchdog.c

  1084	
  1085	/*
  1086	 * /proc/sys/kernel/softlockup_irqtrace
  1087	 */
> 1088	int proc_softlockup_irqtrace(struct ctl_table *table, int write,
  1089				 void *buffer, size_t *lenp, loff_t *ppos)
  1090	{
  1091		int err, old;
  1092	
  1093		mutex_lock(&watchdog_mutex);
  1094	
  1095		old = READ_ONCE(softlockup_irqtrace);
  1096		err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  1097	
  1098		if (!err && write && old != READ_ONCE(softlockup_irqtrace))
  1099			proc_watchdog_update();
  1100	
  1101		mutex_unlock(&watchdog_mutex);
  1102		return err;
  1103	}
  1104
  

Patch

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index f347c5d8c5c1..314dfd301d8c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -303,6 +303,9 @@  unsigned int __read_mostly softlockup_panic =
 static bool softlockup_initialized __read_mostly;
 static u64 __read_mostly sample_period;
 
+static int __read_mostly softlockup_irqtrace;
+static bool softlockup_irqtrace_initialized __read_mostly;
+
 /* Timestamp taken after the last successful reschedule. */
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
 /* Timestamp of the last softlockup report. */
@@ -318,6 +321,13 @@  static int __init softlockup_panic_setup(char *str)
 }
 __setup("softlockup_panic=", softlockup_panic_setup);
 
+static int __init softlockup_irqtrace_setup(char *str)
+{
+	get_option(&str, &softlockup_irqtrace);
+	return 1;
+}
+__setup("softlockup_irqtrace=", softlockup_irqtrace_setup);
+
 static int __init nowatchdog_setup(char *str)
 {
 	watchdog_user_enabled = 0;
@@ -635,7 +645,7 @@  static void print_hardirq_time(void)
 	u64 start_time, now, a;
 	u32 period_us, i, b;
 
-	if (test_bit(SOFTLOCKUP_HARDIRQ, this_cpu_ptr(&softlockup_flags))) {
+	if (softlockup_irqtrace && test_bit(SOFTLOCKUP_HARDIRQ, this_cpu_ptr(&softlockup_flags))) {
 		start_time = __this_cpu_read(hardirq_start_time);
 		now = local_clock();
 		period_us = (now - start_time)/1000;
@@ -856,7 +866,10 @@  static void softlockup_stop_all(void)
 	if (!softlockup_initialized)
 		return;
 
-	unhook_hardirq_events();
+	if (softlockup_irqtrace_initialized) {
+		unhook_hardirq_events();
+		softlockup_irqtrace_initialized = false;
+	}
 
 	for_each_cpu(cpu, &watchdog_allowed_mask)
 		smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false);
@@ -874,7 +887,10 @@  static void softlockup_start_all(void)
 {
 	int cpu;
 
-	hook_hardirq_events();
+	if (softlockup_irqtrace && !softlockup_irqtrace_initialized) {
+		hook_hardirq_events();
+		softlockup_irqtrace_initialized = true;
+	}
 
 	cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
 	for_each_cpu(cpu, &watchdog_allowed_mask)
@@ -1090,6 +1106,26 @@  int proc_watchdog_thresh(struct ctl_table *table, int write,
 	return err;
 }
 
+/*
+ * /proc/sys/kernel/softlockup_irqtrace
+ */
+int proc_softlockup_irqtrace(struct ctl_table *table, int write,
+			 void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int err, old;
+
+	mutex_lock(&watchdog_mutex);
+
+	old = READ_ONCE(softlockup_irqtrace);
+	err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+	if (!err && write && old != READ_ONCE(softlockup_irqtrace))
+		proc_watchdog_update();
+
+	mutex_unlock(&watchdog_mutex);
+	return err;
+}
+
 /*
  * The cpumask is the mask of possible cpus that the watchdog can run
  * on, not the mask of cpus it is actually running on.  This allows the
@@ -1158,6 +1194,15 @@  static struct ctl_table watchdog_sysctls[] = {
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
+	{
+		.procname	= "softlockup_irqtrace",
+		.data		= &softlockup_irqtrace,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_softlockup_irqtrace,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
 #ifdef CONFIG_SMP
 	{
 		.procname	= "softlockup_all_cpu_backtrace",