For the conversion of the NOHZ timer placement to a pull at expiry time
model it's required to have separate expiry times for the pinned and the
non-pinned (movable) timers. Therefore struct timer_events is introduced.
No functional change
Originally-by: Richard Cochran (linutronix GmbH) <richardcochran@gmail.com>
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
---
kernel/time/timer.c | 45 ++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 40 insertions(+), 5 deletions(-)
@@ -221,6 +221,11 @@ struct timer_base {
static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
+struct timer_events {
+ u64 local;
+ u64 global;
+};
+
#ifdef CONFIG_NO_HZ_COMMON
static DEFINE_STATIC_KEY_FALSE(timers_nohz_active);
@@ -1997,17 +2002,17 @@ static void forward_base_clk(struct timer_base *base, unsigned long nextevt,
*/
u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
{
+ struct timer_events tevt = { .local = KTIME_MAX, .global = KTIME_MAX };
unsigned long nextevt, nextevt_local, nextevt_global;
struct timer_base *base_local, *base_global;
bool local_first, is_idle;
- u64 expires = KTIME_MAX;
/*
* Pretend that there is no timer pending if the cpu is offline.
* Possible pending timers will be migrated later to an active cpu.
*/
if (cpu_is_offline(smp_processor_id()))
- return expires;
+ return tevt.local;
base_local = this_cpu_ptr(&timer_bases[BASE_LOCAL]);
base_global = this_cpu_ptr(&timer_bases[BASE_GLOBAL]);
@@ -2052,16 +2057,46 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
/* We need to mark both bases in sync */
base_local->is_idle = base_global->is_idle = is_idle;
- if (base_local->timers_pending || base_global->timers_pending) {
+ /*
+ * If the bases are not marked idle, i.e one of the events is at
+ * max. one tick away, then the CPU can't go into a NOHZ idle
+ * sleep. Use the earlier event of both and store it in the local
+ * expiry value. The next global event is irrelevant in this case
+ * and can be left as KTIME_MAX. CPU will wakeup on time.
+ */
+ if (!is_idle) {
/* If we missed a tick already, force 0 delta */
if (time_before(nextevt, basej))
nextevt = basej;
- expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
+ tevt.local = basem + (u64)(nextevt - basej) * TICK_NSEC;
+ goto unlock;
}
+
+ /*
+ * If the bases are marked idle, i.e. the next event on both the
+ * local and the global queue are farther away than a tick,
+ * evaluate both bases. No need to check whether one of the bases
+ * has an already expired timer as this is caught by the !is_idle
+ * condition above.
+ */
+ if (base_local->timers_pending)
+ tevt.local = basem + (u64)(nextevt_local - basej) * TICK_NSEC;
+
+ /*
+ * If the local queue expires first, then the global event can be
+ * ignored. The CPU wakes up before that. If the global queue is
+ * empty, nothing to do either.
+ */
+ if (!local_first && base_global->timers_pending)
+ tevt.global = basem + (u64)(nextevt_global - basej) * TICK_NSEC;
+
+unlock:
raw_spin_unlock(&base_global->lock);
raw_spin_unlock(&base_local->lock);
- return cmp_next_hrtimer_event(basem, expires);
+ tevt.local = min_t(u64, tevt.local, tevt.global);
+
+ return cmp_next_hrtimer_event(basem, tevt.local);
}
/**