[RFC,59/86] treewide: rcu: remove cond_resched()

Message ID 20231107230822.371443-3-ankur.a.arora@oracle.com
State New
Headers
Series Make the kernel preemptible |

Commit Message

Ankur Arora Nov. 7, 2023, 11:07 p.m. UTC
  All the cond_resched() calls in the RCU interfaces here are to
drive preemption once it has reported a potentially quiescent
state, or to exit the grace period. With PREEMPTION=y that should
happen implicitly.

So we can remove these.

[1] https://lore.kernel.org/lkml/20231107215742.363031-1-ankur.a.arora@oracle.com/

Cc: "Paul E. McKenney" <paulmck@kernel.org> 
Cc: Frederic Weisbecker <frederic@kernel.org> 
Cc: Ingo Molnar <mingo@redhat.com> 
Cc: Peter Zijlstra <peterz@infradead.org> 
Cc: Juri Lelli <juri.lelli@redhat.com> 
Cc: Vincent Guittot <vincent.guittot@linaro.org> 
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 include/linux/rcupdate.h | 6 ++----
 include/linux/sched.h    | 7 ++++++-
 kernel/hung_task.c       | 6 +++---
 kernel/rcu/tasks.h       | 5 +----
 4 files changed, 12 insertions(+), 12 deletions(-)
  

Comments

Paul E. McKenney Nov. 21, 2023, 1:01 a.m. UTC | #1
On Tue, Nov 07, 2023 at 03:07:55PM -0800, Ankur Arora wrote:
> All the cond_resched() calls in the RCU interfaces here are to
> drive preemption once it has reported a potentially quiescent
> state, or to exit the grace period. With PREEMPTION=y that should
> happen implicitly.
> 
> So we can remove these.
> 
> [1] https://lore.kernel.org/lkml/20231107215742.363031-1-ankur.a.arora@oracle.com/
> 
> Cc: "Paul E. McKenney" <paulmck@kernel.org> 
> Cc: Frederic Weisbecker <frederic@kernel.org> 
> Cc: Ingo Molnar <mingo@redhat.com> 
> Cc: Peter Zijlstra <peterz@infradead.org> 
> Cc: Juri Lelli <juri.lelli@redhat.com> 
> Cc: Vincent Guittot <vincent.guittot@linaro.org> 
> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
>  include/linux/rcupdate.h | 6 ++----
>  include/linux/sched.h    | 7 ++++++-
>  kernel/hung_task.c       | 6 +++---
>  kernel/rcu/tasks.h       | 5 +----
>  4 files changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> index 7246ee602b0b..58f8c7faaa52 100644
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@ -238,14 +238,12 @@ static inline bool rcu_trace_implies_rcu_gp(void) { return true; }
>  /**
>   * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
>   *
> - * This macro resembles cond_resched(), except that it is defined to
> - * report potential quiescent states to RCU-tasks even if the cond_resched()
> - * machinery were to be shut off, as some advocate for PREEMPTION kernels.
> + * This macro resembles cond_resched(), in that it reports potential
> + * quiescent states to RCU-tasks.
>   */
>  #define cond_resched_tasks_rcu_qs() \
>  do { \
>  	rcu_tasks_qs(current, false); \
> -	cond_resched(); \

I am a bit nervous about dropping the cond_resched() in a few cases,
for example, the call from rcu_tasks_trace_pregp_step() only momentarily
enables interrupts.  This should be OK given a scheduling-clock interrupt,
except that nohz_full CPUs don't necessarily have these.  At least not
unless RCU happens to be in a grace period at the time.

>  } while (0)
>  
>  /*
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 199f8f7211f2..bae6eed534dd 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -2145,7 +2145,12 @@ static inline void cond_resched_rcu(void)
>  {
>  #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
>  	rcu_read_unlock();
> -	cond_resched();
> +
> +	/*
> +	 * Might reschedule here as we exit the RCU read-side
> +	 * critical section.
> +	 */
> +
>  	rcu_read_lock();

And here I am wondering if some of my nervousness about increased
grace-period latency due to removing cond_resched() might be addressed
by making preempt_enable() take over the help-RCU functionality currently
being provided by cond_resched()...

>  #endif
>  }
> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> index 9a24574988d2..4bdfad08a2e8 100644
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -153,8 +153,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
>   * To avoid extending the RCU grace period for an unbounded amount of time,
>   * periodically exit the critical section and enter a new one.
>   *
> - * For preemptible RCU it is sufficient to call rcu_read_unlock in order
> - * to exit the grace period. For classic RCU, a reschedule is required.
> + * Under a preemptive kernel, or with preemptible RCU, it is sufficient to
> + * call rcu_read_unlock in order to exit the grace period.
>   */
>  static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
>  {
> @@ -163,7 +163,7 @@ static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
>  	get_task_struct(g);
>  	get_task_struct(t);
>  	rcu_read_unlock();
> -	cond_resched();
> +
>  	rcu_read_lock();
>  	can_cont = pid_alive(g) && pid_alive(t);
>  	put_task_struct(t);
> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
> index 8d65f7d576a3..fa1d9aa31b36 100644
> --- a/kernel/rcu/tasks.h
> +++ b/kernel/rcu/tasks.h
> @@ -541,7 +541,6 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
>  		local_bh_disable();
>  		rhp->func(rhp);
>  		local_bh_enable();
> -		cond_resched();

...and by local_bh_enable().

						Thanx, Paul

>  	}
>  	raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
>  	rcu_segcblist_add_len(&rtpcp->cblist, -len);
> @@ -974,10 +973,8 @@ static void check_all_holdout_tasks(struct list_head *hop,
>  {
>  	struct task_struct *t, *t1;
>  
> -	list_for_each_entry_safe(t, t1, hop, rcu_tasks_holdout_list) {
> +	list_for_each_entry_safe(t, t1, hop, rcu_tasks_holdout_list)
>  		check_holdout_task(t, needreport, firstreport);
> -		cond_resched();
> -	}
>  }
>  
>  /* Finish off the Tasks-RCU grace period. */
> -- 
> 2.31.1
>
  

Patch

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7246ee602b0b..58f8c7faaa52 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -238,14 +238,12 @@  static inline bool rcu_trace_implies_rcu_gp(void) { return true; }
 /**
  * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
  *
- * This macro resembles cond_resched(), except that it is defined to
- * report potential quiescent states to RCU-tasks even if the cond_resched()
- * machinery were to be shut off, as some advocate for PREEMPTION kernels.
+ * This macro resembles cond_resched(), in that it reports potential
+ * quiescent states to RCU-tasks.
  */
 #define cond_resched_tasks_rcu_qs() \
 do { \
 	rcu_tasks_qs(current, false); \
-	cond_resched(); \
 } while (0)
 
 /*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 199f8f7211f2..bae6eed534dd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2145,7 +2145,12 @@  static inline void cond_resched_rcu(void)
 {
 #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
 	rcu_read_unlock();
-	cond_resched();
+
+	/*
+	 * Might reschedule here as we exit the RCU read-side
+	 * critical section.
+	 */
+
 	rcu_read_lock();
 #endif
 }
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 9a24574988d2..4bdfad08a2e8 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -153,8 +153,8 @@  static void check_hung_task(struct task_struct *t, unsigned long timeout)
  * To avoid extending the RCU grace period for an unbounded amount of time,
  * periodically exit the critical section and enter a new one.
  *
- * For preemptible RCU it is sufficient to call rcu_read_unlock in order
- * to exit the grace period. For classic RCU, a reschedule is required.
+ * Under a preemptive kernel, or with preemptible RCU, it is sufficient to
+ * call rcu_read_unlock in order to exit the grace period.
  */
 static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
 {
@@ -163,7 +163,7 @@  static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
 	get_task_struct(g);
 	get_task_struct(t);
 	rcu_read_unlock();
-	cond_resched();
+
 	rcu_read_lock();
 	can_cont = pid_alive(g) && pid_alive(t);
 	put_task_struct(t);
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 8d65f7d576a3..fa1d9aa31b36 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -541,7 +541,6 @@  static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
 		local_bh_disable();
 		rhp->func(rhp);
 		local_bh_enable();
-		cond_resched();
 	}
 	raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
 	rcu_segcblist_add_len(&rtpcp->cblist, -len);
@@ -974,10 +973,8 @@  static void check_all_holdout_tasks(struct list_head *hop,
 {
 	struct task_struct *t, *t1;
 
-	list_for_each_entry_safe(t, t1, hop, rcu_tasks_holdout_list) {
+	list_for_each_entry_safe(t, t1, hop, rcu_tasks_holdout_list)
 		check_holdout_task(t, needreport, firstreport);
-		cond_resched();
-	}
 }
 
 /* Finish off the Tasks-RCU grace period. */