[v2,11/13] rcu-tasks: Report stalls during synchronize_srcu() in rcu_tasks_postscan()

Message ID 20230325173316.3118674-12-joel@joelfernandes.org
State New
Headers
Series Core RCU patches for 6.4 |

Commit Message

Joel Fernandes March 25, 2023, 5:33 p.m. UTC
  From: Neeraj Upadhyay <quic_neeraju@quicinc.com>

The call to synchronize_srcu() from rcu_tasks_postscan() can be stalled
by a task getting stuck in do_exit() between that function's calls to
exit_tasks_rcu_start() and exit_tasks_rcu_finish().   To ease diagnosis
of this situation, print a stall warning message every rcu_task_stall_info
period when rcu_tasks_postscan() is stalled.

[ paulmck: Adjust to handle CONFIG_SMP=n. ]

Reported-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/rcu/20230111212736.GA1062057@paulmck-ThinkPad-P17-Gen-1/
Signed-off-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 kernel/rcu/tasks.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
  

Comments

Frederic Weisbecker March 26, 2023, 7:53 p.m. UTC | #1
Le Sat, Mar 25, 2023 at 05:33:14PM +0000, Joel Fernandes (Google) a écrit :
> From: Neeraj Upadhyay <quic_neeraju@quicinc.com>
> 
> The call to synchronize_srcu() from rcu_tasks_postscan() can be stalled
> by a task getting stuck in do_exit() between that function's calls to
> exit_tasks_rcu_start() and exit_tasks_rcu_finish().   To ease diagnosis
> of this situation, print a stall warning message every rcu_task_stall_info
> period when rcu_tasks_postscan() is stalled.
> 
> [ paulmck: Adjust to handle CONFIG_SMP=n. ]
> 
> Reported-by: Mark Brown <broonie@kernel.org>
> Link: https://lore.kernel.org/rcu/20230111212736.GA1062057@paulmck-ThinkPad-P17-Gen-1/
> Signed-off-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>

Acked-by: Frederic Weisbecker <frederic@kernel.org>
  

Patch

diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index bfb5e1549f2b..baf7ec178155 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -139,6 +139,12 @@  static struct rcu_tasks rt_name =							\
 /* Track exiting tasks in order to allow them to be waited for. */
 DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
 
+#ifdef CONFIG_TASKS_RCU
+/* Report delay in synchronize_srcu() completion in rcu_tasks_postscan(). */
+static void tasks_rcu_exit_srcu_stall(struct timer_list *unused);
+static DEFINE_TIMER(tasks_rcu_exit_srcu_stall_timer, tasks_rcu_exit_srcu_stall);
+#endif
+
 /* Avoid IPIing CPUs early in the grace period. */
 #define RCU_TASK_IPI_DELAY (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) ? HZ / 2 : 0)
 static int rcu_task_ipi_delay __read_mostly = RCU_TASK_IPI_DELAY;
@@ -830,6 +836,13 @@  static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
 /* Processing between scanning taskslist and draining the holdout list. */
 static void rcu_tasks_postscan(struct list_head *hop)
 {
+	int rtsi = READ_ONCE(rcu_task_stall_info);
+
+	if (!IS_ENABLED(CONFIG_TINY_RCU)) {
+		tasks_rcu_exit_srcu_stall_timer.expires = jiffies + rtsi;
+		add_timer(&tasks_rcu_exit_srcu_stall_timer);
+	}
+
 	/*
 	 * Exiting tasks may escape the tasklist scan. Those are vulnerable
 	 * until their final schedule() with TASK_DEAD state. To cope with
@@ -848,6 +861,9 @@  static void rcu_tasks_postscan(struct list_head *hop)
 	 * call to synchronize_rcu().
 	 */
 	synchronize_srcu(&tasks_rcu_exit_srcu);
+
+	if (!IS_ENABLED(CONFIG_TINY_RCU))
+		del_timer_sync(&tasks_rcu_exit_srcu_stall_timer);
 }
 
 /* See if tasks are still holding out, complain if so. */
@@ -923,6 +939,21 @@  static void rcu_tasks_postgp(struct rcu_tasks *rtp)
 void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func);
 DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks");
 
+static void tasks_rcu_exit_srcu_stall(struct timer_list *unused)
+{
+#ifndef CONFIG_TINY_RCU
+	int rtsi;
+
+	rtsi = READ_ONCE(rcu_task_stall_info);
+	pr_info("%s: %s grace period number %lu (since boot) gp_state: %s is %lu jiffies old.\n",
+		__func__, rcu_tasks.kname, rcu_tasks.tasks_gp_seq,
+		tasks_gp_state_getname(&rcu_tasks), jiffies - rcu_tasks.gp_jiffies);
+	pr_info("Please check any exiting tasks stuck between calls to exit_tasks_rcu_start() and exit_tasks_rcu_finish()\n");
+	tasks_rcu_exit_srcu_stall_timer.expires = jiffies + rtsi;
+	add_timer(&tasks_rcu_exit_srcu_stall_timer);
+#endif // #ifndef CONFIG_TINY_RCU
+}
+
 /**
  * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
  * @rhp: structure to be used for queueing the RCU updates.