[05/30] sched: *_tsk_need_resched() now takes resched_t as param

Message ID 20240213055554.1802415-6-ankur.a.arora@oracle.com
State New
Headers
Series PREEMPT_AUTO: support lazy rescheduling |

Commit Message

Ankur Arora Feb. 13, 2024, 5:55 a.m. UTC
  *_tsk_need_resched() now test for the immediacy of the need-resched.

These interfaces are primarily used in the scheduler and RCU. Outside
of the scheduler, set_tsk_need_resched() is typically used to force
a context switch by setting need-resched and folding it in. Update
those calls with set_tsk_need_resched(..., NR_now).

For scheduler usage, preserve the current semantics by using
set_tsk_need_resched(..., NR_now), test_tsk_need_resched(..., NR_now).

Note that clear_tsk_need_resched() is only used from __schedule()
to do any clearing needing before switching context. Now it clears
all the need-resched flags.

Cc: Peter Ziljstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Paul E. McKenney <paulmck@kernel.org>
Originally-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/87jzshhexi.ffs@tglx/
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 arch/s390/mm/pfault.c    |  2 +-
 include/linux/sched.h    | 30 ++++++++++++++++++++++++------
 kernel/rcu/tiny.c        |  2 +-
 kernel/rcu/tree.c        |  4 ++--
 kernel/rcu/tree_exp.h    |  4 ++--
 kernel/rcu/tree_plugin.h |  4 ++--
 kernel/rcu/tree_stall.h  |  2 +-
 kernel/sched/core.c      |  9 +++++----
 kernel/sched/deadline.c  |  4 ++--
 kernel/sched/fair.c      |  2 +-
 kernel/sched/idle.c      |  2 +-
 kernel/sched/rt.c        |  4 ++--
 12 files changed, 44 insertions(+), 25 deletions(-)
  

Comments

Thomas Gleixner Feb. 19, 2024, 3:26 p.m. UTC | #1
On Mon, Feb 12 2024 at 21:55, Ankur Arora wrote:

The subject line reads odd...

> -static inline bool test_tsk_need_resched(struct task_struct *tsk)
> +static inline bool test_tsk_need_resched(struct task_struct *tsk, resched_t rs)
>  {
> -	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
> +	if (IS_ENABLED(CONFIG_PREEMPT_AUTO) || rs == NR_now)
> +		return unlikely(test_tsk_thread_flag(tsk, tif_resched(rs)));
> +	else
> +		return false;
>  }

Same like the others. This wants wrappers with now/lazy.
  
>  /*
> @@ -2104,7 +2121,8 @@ static __always_inline bool need_resched(void)
>  
>  static __always_inline bool need_resched_lazy(void)
>  {
> -	return unlikely(tif_need_resched(NR_lazy));
> +	return IS_ENABLED(CONFIG_PREEMPT_AUTO) &&
> +		unlikely(tif_need_resched(NR_lazy));

Shouldn't this be folded into the patch which adds need_resched_lazy()?

Thanks,

        tglx
  
Ankur Arora Feb. 20, 2024, 10:37 p.m. UTC | #2
Thomas Gleixner <tglx@linutronix.de> writes:

> On Mon, Feb 12 2024 at 21:55, Ankur Arora wrote:
>
> The subject line reads odd...
>
>> -static inline bool test_tsk_need_resched(struct task_struct *tsk)
>> +static inline bool test_tsk_need_resched(struct task_struct *tsk, resched_t rs)
>>  {
>> -	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
>> +	if (IS_ENABLED(CONFIG_PREEMPT_AUTO) || rs == NR_now)
>> +		return unlikely(test_tsk_thread_flag(tsk, tif_resched(rs)));
>> +	else
>> +		return false;
>>  }
>
> Same like the others. This wants wrappers with now/lazy.

So, when working on the scheduler changes, I found the simplest
implementation was to define a function that takes into account
current preemption mode, checks for idle, tick etc and returns
the rescheduling policy, which __resched_curr() carries out.

So, it would be useful to just pass the resched_t as a parameter
instead of having now/lazy wrappers.

That said, as I mention in the other thread, the current primitives
are unnecessarily noisy because everyone needs to use it.

   -static inline bool test_tsk_need_resched(struct task_struct *tsk, resched_t rs)
   +static inline bool __test_tsk_need_resched(struct task_struct *tsk, resched_t rs)
   {
           if (IS_ENABLED(CONFIG_PREEMPT_AUTO) || rs == NR_now)
                   return unlikely(test_tsk_thread_flag(tsk, tif_resched(rs)));
   @@ -1980,6 +1985,11 @@ static inline bool test_tsk_need_resched(struct task_struct *tsk, resched_t rs)
                   return false;
   }

   +static inline bool test_tsk_need_resched(struct task_struct *tsk)
   +{
   +	return __test_tsk_need_resched(tsk, NR_now);
   +}
   +

How about something like this (and similar elsewhere)?

>>  /*
>> @@ -2104,7 +2121,8 @@ static __always_inline bool need_resched(void)
>>
>>  static __always_inline bool need_resched_lazy(void)
>>  {
>> -	return unlikely(tif_need_resched(NR_lazy));
>> +	return IS_ENABLED(CONFIG_PREEMPT_AUTO) &&
>> +		unlikely(tif_need_resched(NR_lazy));
>
> Shouldn't this be folded into the patch which adds need_resched_lazy()?

I think I had messed up a rebase. Will fix.

Thanks

--
ankur
  
Thomas Gleixner Feb. 21, 2024, 5:10 p.m. UTC | #3
On Tue, Feb 20 2024 at 14:37, Ankur Arora wrote:
> Thomas Gleixner <tglx@linutronix.de> writes:
>> On Mon, Feb 12 2024 at 21:55, Ankur Arora wrote:
>>
>> The subject line reads odd...
>>
>>> -static inline bool test_tsk_need_resched(struct task_struct *tsk)
>>> +static inline bool test_tsk_need_resched(struct task_struct *tsk, resched_t rs)
>>>  {
>>> -	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
>>> +	if (IS_ENABLED(CONFIG_PREEMPT_AUTO) || rs == NR_now)
>>> +		return unlikely(test_tsk_thread_flag(tsk, tif_resched(rs)));
>>> +	else
>>> +		return false;
>>>  }
>>
>> Same like the others. This wants wrappers with now/lazy.
>
> So, when working on the scheduler changes, I found the simplest
> implementation was to define a function that takes into account
> current preemption mode, checks for idle, tick etc and returns
> the rescheduling policy, which __resched_curr() carries out.
>
> So, it would be useful to just pass the resched_t as a parameter
> instead of having now/lazy wrappers.

That's fine for specific functions which really need to handle the
rescheduling mode, but for all other random places having a nice wrapper
makes the code more readable.

Thanks,

        tglx
  

Patch

diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
index 1aac13bb8f53..4e075059d2e7 100644
--- a/arch/s390/mm/pfault.c
+++ b/arch/s390/mm/pfault.c
@@ -198,7 +198,7 @@  static void pfault_interrupt(struct ext_code ext_code,
 			 * return to userspace schedule() to block.
 			 */
 			__set_current_state(TASK_UNINTERRUPTIBLE);
-			set_tsk_need_resched(tsk);
+			set_tsk_need_resched(tsk, NR_now);
 			set_preempt_need_resched();
 		}
 	}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e790860d89c3..d226c2920cff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1948,19 +1948,36 @@  static inline bool test_tsk_thread_flag(struct task_struct *tsk, int flag)
 	return test_ti_thread_flag(task_thread_info(tsk), flag);
 }
 
-static inline void set_tsk_need_resched(struct task_struct *tsk)
+/*
+ * With !CONFIG_PREEMPT_AUTO, tif_resched(NR_lazy) reduces to
+ * tif_resched(NR_now). Add a check in the helpers below to ensure
+ * we don't touch the tif_reshed(NR_now) bit unnecessarily.
+ */
+static inline void set_tsk_need_resched(struct task_struct *tsk, resched_t rs)
 {
-	set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
+	if (IS_ENABLED(CONFIG_PREEMPT_AUTO) || rs == NR_now)
+		set_tsk_thread_flag(tsk, tif_resched(rs));
+	else
+		/*
+		 * NR_lazy is only touched under CONFIG_PREEMPT_AUTO.
+		 */
+		BUG();
 }
 
 static inline void clear_tsk_need_resched(struct task_struct *tsk)
 {
-	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
+	clear_tsk_thread_flag(tsk, tif_resched(NR_now));
+
+	if (IS_ENABLED(CONFIG_PREEMPT_AUTO))
+		clear_tsk_thread_flag(tsk, tif_resched(NR_lazy));
 }
 
-static inline bool test_tsk_need_resched(struct task_struct *tsk)
+static inline bool test_tsk_need_resched(struct task_struct *tsk, resched_t rs)
 {
-	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
+	if (IS_ENABLED(CONFIG_PREEMPT_AUTO) || rs == NR_now)
+		return unlikely(test_tsk_thread_flag(tsk, tif_resched(rs)));
+	else
+		return false;
 }
 
 /*
@@ -2104,7 +2121,8 @@  static __always_inline bool need_resched(void)
 
 static __always_inline bool need_resched_lazy(void)
 {
-	return unlikely(tif_need_resched(NR_lazy));
+	return IS_ENABLED(CONFIG_PREEMPT_AUTO) &&
+		unlikely(tif_need_resched(NR_lazy));
 }
 
 /*
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index fec804b79080..a085d337434e 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -73,7 +73,7 @@  void rcu_sched_clock_irq(int user)
 	if (user) {
 		rcu_qs();
 	} else if (rcu_ctrlblk.donetail != rcu_ctrlblk.curtail) {
-		set_tsk_need_resched(current);
+		set_tsk_need_resched(current, NR_now);
 		set_preempt_need_resched();
 	}
 }
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1ae851777806..d6ac2b703a6d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2250,7 +2250,7 @@  void rcu_sched_clock_irq(int user)
 	if (smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
 		/* Idle and userspace execution already are quiescent states. */
 		if (!rcu_is_cpu_rrupt_from_idle() && !user) {
-			set_tsk_need_resched(current);
+			set_tsk_need_resched(current, NR_now);
 			set_preempt_need_resched();
 		}
 		__this_cpu_write(rcu_data.rcu_urgent_qs, false);
@@ -2409,7 +2409,7 @@  static __latent_entropy void rcu_core(void)
 	if (IS_ENABLED(CONFIG_PREEMPT_COUNT) && (!(preempt_count() & PREEMPT_MASK))) {
 		rcu_preempt_deferred_qs(current);
 	} else if (rcu_preempt_need_deferred_qs(current)) {
-		set_tsk_need_resched(current);
+		set_tsk_need_resched(current, NR_now);
 		set_preempt_need_resched();
 	}
 
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 6d7cea5d591f..34d0bbd93343 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -759,7 +759,7 @@  static void rcu_exp_handler(void *unused)
 			rcu_report_exp_rdp(rdp);
 		} else {
 			WRITE_ONCE(rdp->cpu_no_qs.b.exp, true);
-			set_tsk_need_resched(t);
+			set_tsk_need_resched(t, NR_now);
 			set_preempt_need_resched();
 		}
 		return;
@@ -860,7 +860,7 @@  static void rcu_exp_need_qs(void)
 	__this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
 	/* Store .exp before .rcu_urgent_qs. */
 	smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
-	set_tsk_need_resched(current);
+	set_tsk_need_resched(current, NR_now);
 	set_preempt_need_resched();
 }
 
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 41021080ad25..26c79246873a 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -658,7 +658,7 @@  static void rcu_read_unlock_special(struct task_struct *t)
 			// Also if no expediting and no possible deboosting,
 			// slow is OK.  Plus nohz_full CPUs eventually get
 			// tick enabled.
-			set_tsk_need_resched(current);
+			set_tsk_need_resched(current, NR_now);
 			set_preempt_need_resched();
 			if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
 			    expboost && !rdp->defer_qs_iw_pending && cpu_online(rdp->cpu)) {
@@ -725,7 +725,7 @@  static void rcu_flavor_sched_clock_irq(int user)
 	    (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
 		/* No QS, force context switch if deferred. */
 		if (rcu_preempt_need_deferred_qs(t)) {
-			set_tsk_need_resched(t);
+			set_tsk_need_resched(t, NR_now);
 			set_preempt_need_resched();
 		}
 	} else if (rcu_preempt_need_deferred_qs(t)) {
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 5d666428546b..9d4aa4fde5ae 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -712,7 +712,7 @@  static void print_cpu_stall(unsigned long gps)
 	 * progress and it could be we're stuck in kernel space without context
 	 * switches for an entirely unreasonable amount of time.
 	 */
-	set_tsk_need_resched(current);
+	set_tsk_need_resched(current, NR_now);
 	set_preempt_need_resched();
 }
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9116bcc90346..41c3bd49a700 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -933,7 +933,7 @@  static bool set_nr_if_polling(struct task_struct *p)
 #else
 static inline bool set_nr_and_not_polling(struct task_struct *p)
 {
-	set_tsk_need_resched(p);
+	set_tsk_need_resched(p, NR_now);
 	return true;
 }
 
@@ -1045,13 +1045,13 @@  void resched_curr(struct rq *rq)
 
 	lockdep_assert_rq_held(rq);
 
-	if (test_tsk_need_resched(curr))
+	if (test_tsk_need_resched(curr, NR_now))
 		return;
 
 	cpu = cpu_of(rq);
 
 	if (cpu == smp_processor_id()) {
-		set_tsk_need_resched(curr);
+		set_tsk_need_resched(curr, NR_now);
 		set_preempt_need_resched();
 		return;
 	}
@@ -2247,7 +2247,8 @@  void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags)
 	 * A queue event has occurred, and we're going to schedule.  In
 	 * this case, we can save a useless back to back clock update.
 	 */
-	if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
+	if (task_on_rq_queued(rq->curr) &&
+	    test_tsk_need_resched(rq->curr, NR_now))
 		rq_clock_skip_update(rq);
 }
 
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a04a436af8cc..b4e68cfc3c3a 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2035,7 +2035,7 @@  static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p,
 	 * let us try to decide what's the best thing to do...
 	 */
 	if ((p->dl.deadline == rq->curr->dl.deadline) &&
-	    !test_tsk_need_resched(rq->curr))
+	    !test_tsk_need_resched(rq->curr, NR_now))
 		check_preempt_equal_dl(rq, p);
 #endif /* CONFIG_SMP */
 }
@@ -2564,7 +2564,7 @@  static void pull_dl_task(struct rq *this_rq)
 static void task_woken_dl(struct rq *rq, struct task_struct *p)
 {
 	if (!task_on_cpu(rq, p) &&
-	    !test_tsk_need_resched(rq->curr) &&
+	    !test_tsk_need_resched(rq->curr, NR_now) &&
 	    p->nr_cpus_allowed > 1 &&
 	    dl_task(rq->curr) &&
 	    (rq->curr->nr_cpus_allowed < 2 ||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 533547e3c90a..ae9b237fa32b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8298,7 +8298,7 @@  static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
 	 * prevents us from potentially nominating it as a false LAST_BUDDY
 	 * below.
 	 */
-	if (test_tsk_need_resched(curr))
+	if (test_tsk_need_resched(curr, NR_now))
 		return;
 
 	/* Idle tasks are by definition preempted by non-idle tasks. */
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index be53d164e267..cd25f71f43a7 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -359,7 +359,7 @@  static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
 	struct idle_timer *it = container_of(timer, struct idle_timer, timer);
 
 	WRITE_ONCE(it->done, 1);
-	set_tsk_need_resched(current);
+	set_tsk_need_resched(current, NR_now);
 
 	return HRTIMER_NORESTART;
 }
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 3261b067b67e..c57cc8427a57 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1680,7 +1680,7 @@  static void wakeup_preempt_rt(struct rq *rq, struct task_struct *p, int flags)
 	 * to move current somewhere else, making room for our non-migratable
 	 * task.
 	 */
-	if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
+	if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr, NR_now))
 		check_preempt_equal_prio(rq, p);
 #endif
 }
@@ -2415,7 +2415,7 @@  static void pull_rt_task(struct rq *this_rq)
 static void task_woken_rt(struct rq *rq, struct task_struct *p)
 {
 	bool need_to_push = !task_on_cpu(rq, p) &&
-			    !test_tsk_need_resched(rq->curr) &&
+			    !test_tsk_need_resched(rq->curr, NR_now) &&
 			    p->nr_cpus_allowed > 1 &&
 			    (dl_task(rq->curr) || rt_task(rq->curr)) &&
 			    (rq->curr->nr_cpus_allowed < 2 ||