[v3,17/17] timer: Always queue timers on the local CPU

Message ID 20221025135850.51044-18-anna-maria@linutronix.de
State New
Headers
Series timer: Move from a push remote at enqueue to a pull at expiry model |

Commit Message

Anna-Maria Behnsen Oct. 25, 2022, 1:58 p.m. UTC
  The timer pull model is in place so we can remove the heuristics which try
to guess the best target CPU at enqueue/modification time.

All non pinned timers are queued on the local CPU in the seperate storage
and eventually pulled at expiry time to a remote CPU.

When a timer is added via add_timer_on(), TIMER_PINNED flag is required to
ensure it expires on the specified CPU. Otherwise it will be enqueued in
the global timer base which could be expired by a remote CPU. WARN_ONCE()
is added to prevent misuse.

Originally-by: Richard Cochran (linutronix GmbH) <richardcochran@gmail.com>
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
---
 kernel/time/timer.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)
  

Comments

Liu, Yujie Oct. 27, 2022, 7:22 a.m. UTC | #1
Greeting,

FYI, we noticed WARNING:at_kernel/time/timer.c:#add_timer_on due to commit (built with gcc-11):

commit: 718a33930f6d6ac4f4c1fa361d8664590319b656 ("[PATCH v3 17/17] timer: Always queue timers on the local CPU")
url: https://github.com/intel-lab-lkp/linux/commits/Anna-Maria-Behnsen/timer-Move-from-a-push-remote-at-enqueue-to-a-pull-at-expiry-model/20221025-220106
base: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git 8be3f96ceddb911539a53d87a66da84a04502366
patch link: https://lore.kernel.org/lkml/20221025135850.51044-18-anna-maria@linutronix.de
patch subject: [PATCH v3 17/17] timer: Always queue timers on the local CPU

in testcase: boot

on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):


[    1.879219][    C0] ------------[ cut here ]------------
[    1.880023][    C0] TIMER_PINNED flag for add_timer_on() is missing: timer=(____ptrval____) function=mix_interrupt_randomness
[ 1.880049][ C0] WARNING: CPU: 0 PID: 0 at kernel/time/timer.c:1220 add_timer_on (kernel/time/timer.c:1220 (discriminator 3)) 
[    1.883421][    C0] Modules linked in:
[    1.883975][    C0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.1.0-rc1-00018-g718a33930f6d #12
[    1.885340][    C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-4 04/01/2014
[ 1.887256][ C0] RIP: 0010:add_timer_on (kernel/time/timer.c:1220 (discriminator 3)) 
[ 1.888353][ C0] Code: ff ff 0f 0b 80 3d 8f aa 33 04 00 0f 85 c4 fd ff ff 4c 89 e2 48 89 ee 48 c7 c7 20 4b ca 83 c6 05 75 aa 33 04 01 e8 0f aa 1a 02 <0f> 0b 48 b8 00 00 00 00 00 fc ff df 4c 89 f2 48 c1 ea 03 0f b6 04
All code
========
   0:	ff                   	(bad)  
   1:	ff 0f                	decl   (%rdi)
   3:	0b 80 3d 8f aa 33    	or     0x33aa8f3d(%rax),%eax
   9:	04 00                	add    $0x0,%al
   b:	0f 85 c4 fd ff ff    	jne    0xfffffffffffffdd5
  11:	4c 89 e2             	mov    %r12,%rdx
  14:	48 89 ee             	mov    %rbp,%rsi
  17:	48 c7 c7 20 4b ca 83 	mov    $0xffffffff83ca4b20,%rdi
  1e:	c6 05 75 aa 33 04 01 	movb   $0x1,0x433aa75(%rip)        # 0x433aa9a
  25:	e8 0f aa 1a 02       	callq  0x21aaa39
  2a:*	0f 0b                	ud2    		<-- trapping instruction
  2c:	48 b8 00 00 00 00 00 	movabs $0xdffffc0000000000,%rax
  33:	fc ff df 
  36:	4c 89 f2             	mov    %r14,%rdx
  39:	48 c1 ea 03          	shr    $0x3,%rdx
  3d:	0f                   	.byte 0xf
  3e:	b6 04                	mov    $0x4,%dh

Code starting with the faulting instruction
===========================================
   0:	0f 0b                	ud2    
   2:	48 b8 00 00 00 00 00 	movabs $0xdffffc0000000000,%rax
   9:	fc ff df 
   c:	4c 89 f2             	mov    %r14,%rdx
   f:	48 c1 ea 03          	shr    $0x3,%rdx
  13:	0f                   	.byte 0xf
  14:	b6 04                	mov    $0x4,%dh
[    1.891973][    C0] RSP: 0000:ffffc90000007ea0 EFLAGS: 00010082
[    1.893142][    C0] RAX: 0000000000000000 RBX: ffff888100193000 RCX: 0000000000000000
[    1.894745][    C0] RDX: 0000000000000000 RSI: 1ffff92000000f9d RDI: fffff52000000fc6
[    1.896436][    C0] RBP: ffff88839d636ad0 R08: 0000000000000000 R09: ffffc90000007bd7
[    1.897969][    C0] R10: fffff52000000f7a R11: 0000000000000001 R12: ffffffff82524400
[    1.899594][    C0] R13: 1ffff92000000fd6 R14: ffff88839d636af0 R15: 0000000000000000
[    1.900968][    C0] FS:  0000000000000000(0000) GS:ffff88839d600000(0000) knlGS:0000000000000000
[    1.902552][    C0] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    1.903849][    C0] CR2: ffff88843ffff000 CR3: 0000000004a0e000 CR4: 00000000000406b0
[    1.905141][    C0] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    1.906226][    C0] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    1.907263][    C0] Call Trace:
[    1.907776][    C0]  <IRQ>
[ 1.908256][ C0] ? run_timer_softirq (kernel/time/timer.c:1214) 
[ 1.908957][ C0] ? tick_handle_periodic (kernel/time/tick-common.c:120) 
[ 1.909670][ C0] ? timer_interrupt (arch/x86/kernel/time.c:59) 
[ 1.910354][ C0] ? get_stack_info (arch/x86/kernel/time.c:56) 
[ 1.911112][ C0] handle_irq_event (kernel/irq/settings.h:187 kernel/irq/handle.c:197 kernel/irq/handle.c:210) 
[ 1.911849][ C0] handle_level_irq (include/linux/irq.h:348 kernel/irq/chip.c:616 kernel/irq/chip.c:652) 
[ 1.912570][ C0] __common_interrupt (arch/x86/kernel/irq.c:264 (discriminator 22)) 
[ 1.913265][ C0] common_interrupt (arch/x86/kernel/irq.c:240 (discriminator 14)) 
[    1.914215][    C0]  </IRQ>
[    1.914862][    C0]  <TASK>
[ 1.915511][ C0] asm_common_interrupt (arch/x86/include/asm/idtentry.h:640) 
[ 1.916474][ C0] RIP: 0010:_raw_spin_unlock_irqrestore (kernel/locking/spinlock.c:195) 
[ 1.917346][ C0] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc 0f 1f 44 00 00 ff 15 5d 55 42 01 f7 c6 00 02 00 00 74 01 fb 65 ff 0d 65 35 99 7c <e9> e0 f1 36 00 66 66 2e 0f 1f 84 00 00 00 00 00 66 66 2e 0f 1f 84
All code
========
   0:	cc                   	int3   
   1:	cc                   	int3   
   2:	cc                   	int3   
   3:	cc                   	int3   
   4:	cc                   	int3   
   5:	cc                   	int3   
   6:	cc                   	int3   
   7:	cc                   	int3   
   8:	cc                   	int3   
   9:	cc                   	int3   
   a:	cc                   	int3   
   b:	cc                   	int3   
   c:	cc                   	int3   
   d:	cc                   	int3   
   e:	cc                   	int3   
   f:	0f 1f 44 00 00       	nopl   0x0(%rax,%rax,1)
  14:	ff 15 5d 55 42 01    	callq  *0x142555d(%rip)        # 0x1425577
  1a:	f7 c6 00 02 00 00    	test   $0x200,%esi
  20:	74 01                	je     0x23
  22:	fb                   	sti    
  23:	65 ff 0d 65 35 99 7c 	decl   %gs:0x7c993565(%rip)        # 0x7c99358f
  2a:*	e9 e0 f1 36 00       	jmpq   0x36f20f		<-- trapping instruction
  2f:	66 66 2e 0f 1f 84 00 	data16 nopw %cs:0x0(%rax,%rax,1)
  36:	00 00 00 00 
  3a:	66                   	data16
  3b:	66                   	data16
  3c:	2e                   	cs
  3d:	0f                   	.byte 0xf
  3e:	1f                   	(bad)  
  3f:	84                   	.byte 0x84

Code starting with the faulting instruction
===========================================
   0:	e9 e0 f1 36 00       	jmpq   0x36f1e5
   5:	66 66 2e 0f 1f 84 00 	data16 nopw %cs:0x0(%rax,%rax,1)
   c:	00 00 00 00 
  10:	66                   	data16
  11:	66                   	data16
  12:	2e                   	cs
  13:	0f                   	.byte 0xf
  14:	1f                   	(bad)  
  15:	84                   	.byte 0x84
[    1.920523][    C0] RSP: 0000:ffffffff84a07e38 EFLAGS: 00000282
[    1.921331][    C0] RAX: 0000000000000000 RBX: ffff888100193078 RCX: 1ffff11020032600
[    1.922600][    C0] RDX: 1ffff11020032613 RSI: 0000000000000246 RDI: ffff8881001930a4
[    1.924162][    C0] RBP: ffff888100193000 R08: 0000000000000000 R09: ffffffff84a07d3b
[    1.925766][    C0] R10: 0000000000000246 R11: 0000000000000001 R12: ffff88810019c23c
[    1.927034][    C0] R13: ffff88810019c200 R14: 0000000000015a00 R15: ffff888100193160
[ 1.928805][ C0] ? _raw_spin_unlock_irqrestore (arch/x86/include/asm/irqflags.h:137 include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194) 
[ 1.930002][ C0] __setup_irq (kernel/irq/internals.h:153 kernel/irq/manage.c:1810) 
[ 1.931099][ C0] ? get_stack_info (arch/x86/kernel/time.c:56) 
[ 1.932213][ C0] ? kasan_set_track (mm/kasan/common.c:52) 
[ 1.933270][ C0] ? get_stack_info (arch/x86/kernel/time.c:56) 
[ 1.934271][ C0] request_threaded_irq (kernel/irq/manage.c:2198) 
[ 1.935332][ C0] hpet_time_init (arch/x86/kernel/time.c:70 arch/x86/kernel/time.c:82) 
[ 1.936297][ C0] x86_late_time_init (arch/x86/kernel/time.c:100) 
[ 1.937322][ C0] start_kernel (init/main.c:1109) 
[ 1.938280][ C0] secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:358) 
[    1.939447][    C0]  </TASK>
[    1.940128][    C0] ---[ end trace 0000000000000000 ]---


If you fix the issue, kindly add following tag
| Reported-by: kernel test robot <yujie.liu@intel.com>
| Link: https://lore.kernel.org/oe-lkp/202210271413.b19a3e56-yujie.liu@intel.com


To reproduce:

        # build kernel
	cd linux
	cp config-6.1.0-rc1-00018-g718a33930f6d .config
	make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 olddefconfig prepare modules_prepare bzImage modules
	make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 INSTALL_MOD_PATH=<mod-install-dir> modules_install
	cd <mod-install-dir>
	find lib/ | cpio -o -H newc --quiet | gzip > modules.cgz


        git clone https://github.com/intel/lkp-tests.git
        cd lkp-tests
        bin/lkp qemu -k <bzImage> -m modules.cgz job-script # job-script is attached in this email

        # if come across any failure that blocks the test,
        # please remove ~/.lkp and /lkp dir to run from a clean state.
  

Patch

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 67f9cc077ff7..f05e547e0c6c 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -947,17 +947,6 @@  static inline struct timer_base *get_timer_base(u32 tflags)
 	return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK);
 }
 
-static inline struct timer_base *
-get_target_base(struct timer_base *base, unsigned tflags)
-{
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-	if (static_branch_likely(&timers_migration_enabled) &&
-	    !(tflags & TIMER_PINNED))
-		return get_timer_cpu_base(tflags, get_nohz_timer_target());
-#endif
-	return get_timer_this_cpu_base(tflags);
-}
-
 static inline void forward_timer_base(struct timer_base *base)
 {
 	unsigned long jnow = READ_ONCE(jiffies);
@@ -1093,7 +1082,7 @@  __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
 	if (!ret && (options & MOD_TIMER_PENDING_ONLY))
 		goto out_unlock;
 
-	new_base = get_target_base(base, timer->flags);
+	new_base = get_timer_this_cpu_base(timer->flags);
 
 	if (base != new_base) {
 		/*
@@ -1228,6 +1217,10 @@  void add_timer_on(struct timer_list *timer, int cpu)
 
 	BUG_ON(timer_pending(timer) || !timer->function);
 
+	WARN_ONCE(!(timer->flags & TIMER_PINNED), "TIMER_PINNED flag for "
+		  "add_timer_on() is missing: timer=%p function=%ps",
+		  timer, timer->function);
+
 	new_base = get_timer_cpu_base(timer->flags, cpu);
 
 	/*