[v10,13/20] timers: Add get next timer interrupt functionality for remote CPUs

Message ID 20240115143743.27827-14-anna-maria@linutronix.de
State New
Headers
Series timers: Move from a push remote at enqueue to a pull at expiry model |

Commit Message

Anna-Maria Behnsen Jan. 15, 2024, 2:37 p.m. UTC
  remote CPUs

To prepare for the conversion of the NOHZ timer placement to a pull at
expiry time model it's required to have functionality available getting the
next timer interrupt on a remote CPU.

Locking of the timer bases and getting the information for the next timer
interrupt functionality is split into separate functions. This is required
to be compliant with lock ordering when the new model is in place.

Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
---
v10:
 - sparse annotations for locks

v8:
 - Update comment

v7:
 - Move functions into CONFIG_SMP && CONFIG_NO_HZ_COMMON section
 - change lock, fetch functions to be unconditional
 - split out unlock function into a separate function

v6:
 - introduce timer_lock_remote_bases() to fix race
---
 kernel/time/tick-internal.h | 10 +++++
 kernel/time/timer.c         | 80 ++++++++++++++++++++++++++++++++++---
 2 files changed, 85 insertions(+), 5 deletions(-)
  

Comments

Frederic Weisbecker Feb. 19, 2024, 4:04 p.m. UTC | #1
Le Mon, Jan 15, 2024 at 03:37:36PM +0100, Anna-Maria Behnsen a écrit :
> +# ifdef CONFIG_SMP
> +/**
> + * fetch_next_timer_interrupt_remote() - Store next timers into @tevt
> + * @basej:	base time jiffies
> + * @basem:	base time clock monotonic
> + * @tevt:	Pointer to the storage for the expiry values
> + * @cpu:	Remote CPU
> + *
> + * Stores the next pending local and global timer expiry values in the
> + * struct pointed to by @tevt. If a queue is empty the corresponding
> + * field is set to KTIME_MAX. If local event expires before global
> + * event, global event is set to KTIME_MAX as well.
> + *
> + * Caller needs to make sure timer base locks are held (use
> + * timer_lock_remote_bases() for this purpose).
> + */
> +void fetch_next_timer_interrupt_remote(unsigned long basej, u64 basem,
> +				       struct timer_events *tevt,
> +				       unsigned int cpu)
> +{
> +	struct timer_base *base_local, *base_global;
> +
> +	/* Preset local / global events */
> +	tevt->local = tevt->global = KTIME_MAX;
> +
> +	base_local = per_cpu_ptr(&timer_bases[BASE_LOCAL], cpu);
> +	base_global = per_cpu_ptr(&timer_bases[BASE_GLOBAL], cpu);
> +
> +	lockdep_assert_held(&base_local->lock);
> +	lockdep_assert_held(&base_global->lock);
> +
> +	fetch_next_timer_interrupt(basej, basem, base_local, base_global, tevt);

If the next timer is global and it is <= jiffies + 1, the result will be
returned in tevt.local only and not on tevt.global. So a remote fetch may miss it.

For this to work on both local and remote fetch, you may need:

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 320eb4ceafa2..64ce9a7760f5 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -2004,6 +2007,8 @@ static unsigned long fetch_next_timer_interrupt(unsigned long basej, u64 basem,
 		if (time_before(nextevt, basej))
 			nextevt = basej;
 		tevt->local = basem + (u64)(nextevt - basej) * TICK_NSEC;
+		if (!local_first)
+			tevt->global = tevt->local;
 		return nextevt;
 	}
  
Anna-Maria Behnsen Feb. 19, 2024, 4:57 p.m. UTC | #2
Frederic Weisbecker <frederic@kernel.org> writes:

> Le Mon, Jan 15, 2024 at 03:37:36PM +0100, Anna-Maria Behnsen a écrit :
>> +# ifdef CONFIG_SMP
>> +/**
>> + * fetch_next_timer_interrupt_remote() - Store next timers into @tevt
>> + * @basej:	base time jiffies
>> + * @basem:	base time clock monotonic
>> + * @tevt:	Pointer to the storage for the expiry values
>> + * @cpu:	Remote CPU
>> + *
>> + * Stores the next pending local and global timer expiry values in the
>> + * struct pointed to by @tevt. If a queue is empty the corresponding
>> + * field is set to KTIME_MAX. If local event expires before global
>> + * event, global event is set to KTIME_MAX as well.
>> + *
>> + * Caller needs to make sure timer base locks are held (use
>> + * timer_lock_remote_bases() for this purpose).
>> + */
>> +void fetch_next_timer_interrupt_remote(unsigned long basej, u64 basem,
>> +				       struct timer_events *tevt,
>> +				       unsigned int cpu)
>> +{
>> +	struct timer_base *base_local, *base_global;
>> +
>> +	/* Preset local / global events */
>> +	tevt->local = tevt->global = KTIME_MAX;
>> +
>> +	base_local = per_cpu_ptr(&timer_bases[BASE_LOCAL], cpu);
>> +	base_global = per_cpu_ptr(&timer_bases[BASE_GLOBAL], cpu);
>> +
>> +	lockdep_assert_held(&base_local->lock);
>> +	lockdep_assert_held(&base_global->lock);
>> +
>> +	fetch_next_timer_interrupt(basej, basem, base_local, base_global, tevt);
>
> If the next timer is global and it is <= jiffies + 1, the result will be
> returned in tevt.local only and not on tevt.global. So a remote fetch may miss it.

Oh no. But yes, sounds reasonable.

> For this to work on both local and remote fetch, you may need:
>
> diff --git a/kernel/time/timer.c b/kernel/time/timer.c
> index 320eb4ceafa2..64ce9a7760f5 100644
> --- a/kernel/time/timer.c
> +++ b/kernel/time/timer.c
> @@ -2004,6 +2007,8 @@ static unsigned long fetch_next_timer_interrupt(unsigned long basej, u64 basem,
>  		if (time_before(nextevt, basej))
>  			nextevt = basej;
>  		tevt->local = basem + (u64)(nextevt - basej) * TICK_NSEC;
> +		if (!local_first)
> +			tevt->global = tevt->local;
>  		return nextevt;
>  	}
>  

Will fix it - with a big comment explaining why this is required for
remote call sites and will not hurt when executed on the local cpu.

Thanks a lot!
  

Patch

diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 47df30b871e4..8b0c28edbd09 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -8,6 +8,11 @@ 
 #include "timekeeping.h"
 #include "tick-sched.h"
 
+struct timer_events {
+	u64	local;
+	u64	global;
+};
+
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 
 # define TICK_DO_TIMER_NONE	-1
@@ -154,6 +159,11 @@  extern unsigned long tick_nohz_active;
 extern void timers_update_nohz(void);
 # ifdef CONFIG_SMP
 extern struct static_key_false timers_migration_enabled;
+extern void fetch_next_timer_interrupt_remote(unsigned long basej, u64 basem,
+					      struct timer_events *tevt,
+					      unsigned int cpu);
+extern void timer_lock_remote_bases(unsigned int cpu);
+extern void timer_unlock_remote_bases(unsigned int cpu);
 # endif
 #else /* CONFIG_NO_HZ_COMMON */
 static inline void timers_update_nohz(void) { }
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 9fa759dd80f5..3e2adfc15f3a 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -221,11 +221,6 @@  struct timer_base {
 
 static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
 
-struct timer_events {
-	u64	local;
-	u64	global;
-};
-
 #ifdef CONFIG_NO_HZ_COMMON
 
 static DEFINE_STATIC_KEY_FALSE(timers_nohz_active);
@@ -2031,6 +2026,81 @@  static unsigned long fetch_next_timer_interrupt(unsigned long basej, u64 basem,
 	return nextevt;
 }
 
+# ifdef CONFIG_SMP
+/**
+ * fetch_next_timer_interrupt_remote() - Store next timers into @tevt
+ * @basej:	base time jiffies
+ * @basem:	base time clock monotonic
+ * @tevt:	Pointer to the storage for the expiry values
+ * @cpu:	Remote CPU
+ *
+ * Stores the next pending local and global timer expiry values in the
+ * struct pointed to by @tevt. If a queue is empty the corresponding
+ * field is set to KTIME_MAX. If local event expires before global
+ * event, global event is set to KTIME_MAX as well.
+ *
+ * Caller needs to make sure timer base locks are held (use
+ * timer_lock_remote_bases() for this purpose).
+ */
+void fetch_next_timer_interrupt_remote(unsigned long basej, u64 basem,
+				       struct timer_events *tevt,
+				       unsigned int cpu)
+{
+	struct timer_base *base_local, *base_global;
+
+	/* Preset local / global events */
+	tevt->local = tevt->global = KTIME_MAX;
+
+	base_local = per_cpu_ptr(&timer_bases[BASE_LOCAL], cpu);
+	base_global = per_cpu_ptr(&timer_bases[BASE_GLOBAL], cpu);
+
+	lockdep_assert_held(&base_local->lock);
+	lockdep_assert_held(&base_global->lock);
+
+	fetch_next_timer_interrupt(basej, basem, base_local, base_global, tevt);
+}
+
+/**
+ * timer_unlock_remote_bases - unlock timer bases of cpu
+ * @cpu:	Remote CPU
+ *
+ * Unlocks the remote timer bases.
+ */
+void timer_unlock_remote_bases(unsigned int cpu)
+	__releases(timer_bases[BASE_LOCAL]->lock)
+	__releases(timer_bases[BASE_GLOBAL]->lock)
+{
+	struct timer_base *base_local, *base_global;
+
+	base_local = per_cpu_ptr(&timer_bases[BASE_LOCAL], cpu);
+	base_global = per_cpu_ptr(&timer_bases[BASE_GLOBAL], cpu);
+
+	raw_spin_unlock(&base_global->lock);
+	raw_spin_unlock(&base_local->lock);
+}
+
+/**
+ * timer_lock_remote_bases - lock timer bases of cpu
+ * @cpu:	Remote CPU
+ *
+ * Locks the remote timer bases.
+ */
+void timer_lock_remote_bases(unsigned int cpu)
+	__acquires(timer_bases[BASE_LOCAL]->lock)
+	__acquires(timer_bases[BASE_GLOBAL]->lock)
+{
+	struct timer_base *base_local, *base_global;
+
+	base_local = per_cpu_ptr(&timer_bases[BASE_LOCAL], cpu);
+	base_global = per_cpu_ptr(&timer_bases[BASE_GLOBAL], cpu);
+
+	lockdep_assert_irqs_disabled();
+
+	raw_spin_lock(&base_local->lock);
+	raw_spin_lock_nested(&base_global->lock, SINGLE_DEPTH_NESTING);
+}
+# endif /* CONFIG_SMP */
+
 static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
 					     bool *idle)
 {