[v2] kasan: Fix lockdep report invalid wait context

Message ID 20230327120019.1027640-1-qiang1.zhang@intel.com
State New
Headers
Series [v2] kasan: Fix lockdep report invalid wait context |

Commit Message

Zqiang March 27, 2023, noon UTC
  For kernels built with the following options and booting

CONFIG_SLUB=y
CONFIG_DEBUG_LOCKDEP=y
CONFIG_PROVE_LOCKING=y
CONFIG_PROVE_RAW_LOCK_NESTING=y

[    0.523115] [ BUG: Invalid wait context ]
[    0.523315] 6.3.0-rc1-yocto-standard+ #739 Not tainted
[    0.523649] -----------------------------
[    0.523663] swapper/0/0 is trying to lock:
[    0.523663] ffff888035611360 (&c->lock){....}-{3:3}, at: put_cpu_partial+0x2e/0x1e0
[    0.523663] other info that might help us debug this:
[    0.523663] context-{2:2}
[    0.523663] no locks held by swapper/0/0.
[    0.523663] stack backtrace:
[    0.523663] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.3.0-rc1-yocto-standard+ #739
[    0.523663] Call Trace:
[    0.523663]  <IRQ>
[    0.523663]  dump_stack_lvl+0x64/0xb0
[    0.523663]  dump_stack+0x10/0x20
[    0.523663]  __lock_acquire+0x6c4/0x3c10
[    0.523663]  lock_acquire+0x188/0x460
[    0.523663]  put_cpu_partial+0x5a/0x1e0
[    0.523663]  __slab_free+0x39a/0x520
[    0.523663]  ___cache_free+0xa9/0xc0
[    0.523663]  qlist_free_all+0x7a/0x160
[    0.523663]  per_cpu_remove_cache+0x5c/0x70
[    0.523663]  __flush_smp_call_function_queue+0xfc/0x330
[    0.523663]  generic_smp_call_function_single_interrupt+0x13/0x20
[    0.523663]  __sysvec_call_function+0x86/0x2e0
[    0.523663]  sysvec_call_function+0x73/0x90
[    0.523663]  </IRQ>
[    0.523663]  <TASK>
[    0.523663]  asm_sysvec_call_function+0x1b/0x20
[    0.523663] RIP: 0010:default_idle+0x13/0x20
[    0.523663] RSP: 0000:ffffffff83e07dc0 EFLAGS: 00000246
[    0.523663] RAX: 0000000000000000 RBX: ffffffff83e1e200 RCX: ffffffff82a83293
[    0.523663] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff8119a6b1
[    0.523663] RBP: ffffffff83e07dc8 R08: 0000000000000001 R09: ffffed1006ac0d66
[    0.523663] R10: ffff888035606b2b R11: ffffed1006ac0d65 R12: 0000000000000000
[    0.523663] R13: ffffffff83e1e200 R14: ffffffff84a7d980 R15: 0000000000000000
[    0.523663]  default_idle_call+0x6c/0xa0
[    0.523663]  do_idle+0x2e1/0x330
[    0.523663]  cpu_startup_entry+0x20/0x30
[    0.523663]  rest_init+0x152/0x240
[    0.523663]  arch_call_rest_init+0x13/0x40
[    0.523663]  start_kernel+0x331/0x470
[    0.523663]  x86_64_start_reservations+0x18/0x40
[    0.523663]  x86_64_start_kernel+0xbb/0x120
[    0.523663]  secondary_startup_64_no_verify+0xe0/0xeb
[    0.523663]  </TASK>

The local_lock_irqsave() is invoked in put_cpu_partial() and happens
in IPI context, due to the CONFIG_PROVE_RAW_LOCK_NESTING=y (the
LD_WAIT_CONFIG not equal to LD_WAIT_SPIN), so acquire local_lock in
IPI context will trigger above calltrace.

This commit therefore move qlist_free_all() from hard-irq context to
task context. 

Signed-off-by: Zqiang <qiang1.zhang@intel.com>
---
 v1->v2:
 Modify the commit information and add Cc.

 mm/kasan/quarantine.c | 34 ++++++++--------------------------
 1 file changed, 8 insertions(+), 26 deletions(-)
  

Comments

Andrew Morton April 18, 2023, 10:08 p.m. UTC | #1
On Mon, 27 Mar 2023 20:00:19 +0800 Zqiang <qiang1.zhang@intel.com> wrote:

> For kernels built with the following options and booting
> 
> CONFIG_SLUB=y
> CONFIG_DEBUG_LOCKDEP=y
> CONFIG_PROVE_LOCKING=y
> CONFIG_PROVE_RAW_LOCK_NESTING=y
> 
> [    0.523115] [ BUG: Invalid wait context ]

Could we please get some reviewer input on this change?

Thanks.
  
Qi Zheng April 19, 2023, 2:52 a.m. UTC | #2
On 2023/3/27 20:00, Zqiang wrote:
> For kernels built with the following options and booting
> 
> CONFIG_SLUB=y
> CONFIG_DEBUG_LOCKDEP=y
> CONFIG_PROVE_LOCKING=y
> CONFIG_PROVE_RAW_LOCK_NESTING=y
> 
> [    0.523115] [ BUG: Invalid wait context ]
> [    0.523315] 6.3.0-rc1-yocto-standard+ #739 Not tainted
> [    0.523649] -----------------------------
> [    0.523663] swapper/0/0 is trying to lock:
> [    0.523663] ffff888035611360 (&c->lock){....}-{3:3}, at: put_cpu_partial+0x2e/0x1e0
> [    0.523663] other info that might help us debug this:
> [    0.523663] context-{2:2}
> [    0.523663] no locks held by swapper/0/0.
> [    0.523663] stack backtrace:
> [    0.523663] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.3.0-rc1-yocto-standard+ #739
> [    0.523663] Call Trace:
> [    0.523663]  <IRQ>
> [    0.523663]  dump_stack_lvl+0x64/0xb0
> [    0.523663]  dump_stack+0x10/0x20
> [    0.523663]  __lock_acquire+0x6c4/0x3c10
> [    0.523663]  lock_acquire+0x188/0x460
> [    0.523663]  put_cpu_partial+0x5a/0x1e0
> [    0.523663]  __slab_free+0x39a/0x520
> [    0.523663]  ___cache_free+0xa9/0xc0
> [    0.523663]  qlist_free_all+0x7a/0x160
> [    0.523663]  per_cpu_remove_cache+0x5c/0x70
> [    0.523663]  __flush_smp_call_function_queue+0xfc/0x330
> [    0.523663]  generic_smp_call_function_single_interrupt+0x13/0x20
> [    0.523663]  __sysvec_call_function+0x86/0x2e0
> [    0.523663]  sysvec_call_function+0x73/0x90
> [    0.523663]  </IRQ>
> [    0.523663]  <TASK>
> [    0.523663]  asm_sysvec_call_function+0x1b/0x20
> [    0.523663] RIP: 0010:default_idle+0x13/0x20
> [    0.523663] RSP: 0000:ffffffff83e07dc0 EFLAGS: 00000246
> [    0.523663] RAX: 0000000000000000 RBX: ffffffff83e1e200 RCX: ffffffff82a83293
> [    0.523663] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff8119a6b1
> [    0.523663] RBP: ffffffff83e07dc8 R08: 0000000000000001 R09: ffffed1006ac0d66
> [    0.523663] R10: ffff888035606b2b R11: ffffed1006ac0d65 R12: 0000000000000000
> [    0.523663] R13: ffffffff83e1e200 R14: ffffffff84a7d980 R15: 0000000000000000
> [    0.523663]  default_idle_call+0x6c/0xa0
> [    0.523663]  do_idle+0x2e1/0x330
> [    0.523663]  cpu_startup_entry+0x20/0x30
> [    0.523663]  rest_init+0x152/0x240
> [    0.523663]  arch_call_rest_init+0x13/0x40
> [    0.523663]  start_kernel+0x331/0x470
> [    0.523663]  x86_64_start_reservations+0x18/0x40
> [    0.523663]  x86_64_start_kernel+0xbb/0x120
> [    0.523663]  secondary_startup_64_no_verify+0xe0/0xeb
> [    0.523663]  </TASK>
> 
> The local_lock_irqsave() is invoked in put_cpu_partial() and happens
> in IPI context, due to the CONFIG_PROVE_RAW_LOCK_NESTING=y (the
> LD_WAIT_CONFIG not equal to LD_WAIT_SPIN), so acquire local_lock in
> IPI context will trigger above calltrace.

Just to add another similar case:

Call Trace:
  <IRQ>
  dump_stack_lvl+0x69/0x97
  __lock_acquire+0x4a0/0x1b50
  lock_acquire+0x261/0x2c0
  ? restore_bytes+0x40/0x40
  local_lock_acquire+0x21/0x70
  ? restore_bytes+0x40/0x40
  put_cpu_partial+0x41/0x130
  ? flush_smp_call_function_queue+0x125/0x4d0
  kfree+0x250/0x2c0
  flush_smp_call_function_queue+0x125/0x4d0
  __sysvec_call_function_single+0x3a/0x100
  sysvec_call_function_single+0x4b/0x90
  </IRQ>
  <TASK>
  asm_sysvec_call_function_single+0x16/0x20

So we can't call kfree() and its friends in interrupt context?

Also +Vlastimil Babka.

Thanks,
Qi

> 
> This commit therefore move qlist_free_all() from hard-irq context to
> task context.
> 
> Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> ---
>   v1->v2:
>   Modify the commit information and add Cc.
> 
>   mm/kasan/quarantine.c | 34 ++++++++--------------------------
>   1 file changed, 8 insertions(+), 26 deletions(-)
> 
> diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
> index 75585077eb6d..152dca73f398 100644
> --- a/mm/kasan/quarantine.c
> +++ b/mm/kasan/quarantine.c
> @@ -99,7 +99,6 @@ static unsigned long quarantine_size;
>   static DEFINE_RAW_SPINLOCK(quarantine_lock);
>   DEFINE_STATIC_SRCU(remove_cache_srcu);
>   
> -#ifdef CONFIG_PREEMPT_RT
>   struct cpu_shrink_qlist {
>   	raw_spinlock_t lock;
>   	struct qlist_head qlist;
> @@ -108,7 +107,6 @@ struct cpu_shrink_qlist {
>   static DEFINE_PER_CPU(struct cpu_shrink_qlist, shrink_qlist) = {
>   	.lock = __RAW_SPIN_LOCK_UNLOCKED(shrink_qlist.lock),
>   };
> -#endif
>   
>   /* Maximum size of the global queue. */
>   static unsigned long quarantine_max_size;
> @@ -319,16 +317,6 @@ static void qlist_move_cache(struct qlist_head *from,
>   	}
>   }
>   
> -#ifndef CONFIG_PREEMPT_RT
> -static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
> -{
> -	struct kmem_cache *cache = arg;
> -	struct qlist_head to_free = QLIST_INIT;
> -
> -	qlist_move_cache(q, &to_free, cache);
> -	qlist_free_all(&to_free, cache);
> -}
> -#else
>   static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
>   {
>   	struct kmem_cache *cache = arg;
> @@ -340,7 +328,6 @@ static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
>   	qlist_move_cache(q, &sq->qlist, cache);
>   	raw_spin_unlock_irqrestore(&sq->lock, flags);
>   }
> -#endif
>   
>   static void per_cpu_remove_cache(void *arg)
>   {
> @@ -362,6 +349,8 @@ void kasan_quarantine_remove_cache(struct kmem_cache *cache)
>   {
>   	unsigned long flags, i;
>   	struct qlist_head to_free = QLIST_INIT;
> +	int cpu;
> +	struct cpu_shrink_qlist *sq;
>   
>   	/*
>   	 * Must be careful to not miss any objects that are being moved from
> @@ -372,20 +361,13 @@ void kasan_quarantine_remove_cache(struct kmem_cache *cache)
>   	 */
>   	on_each_cpu(per_cpu_remove_cache, cache, 1);
>   
> -#ifdef CONFIG_PREEMPT_RT
> -	{
> -		int cpu;
> -		struct cpu_shrink_qlist *sq;
> -
> -		for_each_online_cpu(cpu) {
> -			sq = per_cpu_ptr(&shrink_qlist, cpu);
> -			raw_spin_lock_irqsave(&sq->lock, flags);
> -			qlist_move_cache(&sq->qlist, &to_free, cache);
> -			raw_spin_unlock_irqrestore(&sq->lock, flags);
> -		}
> -		qlist_free_all(&to_free, cache);
> +	for_each_online_cpu(cpu) {
> +		sq = per_cpu_ptr(&shrink_qlist, cpu);
> +		raw_spin_lock_irqsave(&sq->lock, flags);
> +		qlist_move_cache(&sq->qlist, &to_free, cache);
> +		raw_spin_unlock_irqrestore(&sq->lock, flags);
>   	}
> -#endif
> +	qlist_free_all(&to_free, cache);
>   
>   	raw_spin_lock_irqsave(&quarantine_lock, flags);
>   	for (i = 0; i < QUARANTINE_BATCHES; i++) {
  
Marco Elver April 19, 2023, 7:26 a.m. UTC | #3
On Mon, 27 Mar 2023 at 13:48, Zqiang <qiang1.zhang@intel.com> wrote:
>
> For kernels built with the following options and booting
>
> CONFIG_SLUB=y
> CONFIG_DEBUG_LOCKDEP=y
> CONFIG_PROVE_LOCKING=y
> CONFIG_PROVE_RAW_LOCK_NESTING=y
>
> [    0.523115] [ BUG: Invalid wait context ]
> [    0.523315] 6.3.0-rc1-yocto-standard+ #739 Not tainted
> [    0.523649] -----------------------------
> [    0.523663] swapper/0/0 is trying to lock:
> [    0.523663] ffff888035611360 (&c->lock){....}-{3:3}, at: put_cpu_partial+0x2e/0x1e0
> [    0.523663] other info that might help us debug this:
> [    0.523663] context-{2:2}
> [    0.523663] no locks held by swapper/0/0.
> [    0.523663] stack backtrace:
> [    0.523663] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.3.0-rc1-yocto-standard+ #739
> [    0.523663] Call Trace:
> [    0.523663]  <IRQ>
> [    0.523663]  dump_stack_lvl+0x64/0xb0
> [    0.523663]  dump_stack+0x10/0x20
> [    0.523663]  __lock_acquire+0x6c4/0x3c10
> [    0.523663]  lock_acquire+0x188/0x460
> [    0.523663]  put_cpu_partial+0x5a/0x1e0
> [    0.523663]  __slab_free+0x39a/0x520
> [    0.523663]  ___cache_free+0xa9/0xc0
> [    0.523663]  qlist_free_all+0x7a/0x160
> [    0.523663]  per_cpu_remove_cache+0x5c/0x70
> [    0.523663]  __flush_smp_call_function_queue+0xfc/0x330
> [    0.523663]  generic_smp_call_function_single_interrupt+0x13/0x20
> [    0.523663]  __sysvec_call_function+0x86/0x2e0
> [    0.523663]  sysvec_call_function+0x73/0x90
> [    0.523663]  </IRQ>
> [    0.523663]  <TASK>
> [    0.523663]  asm_sysvec_call_function+0x1b/0x20
> [    0.523663] RIP: 0010:default_idle+0x13/0x20
> [    0.523663] RSP: 0000:ffffffff83e07dc0 EFLAGS: 00000246
> [    0.523663] RAX: 0000000000000000 RBX: ffffffff83e1e200 RCX: ffffffff82a83293
> [    0.523663] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff8119a6b1
> [    0.523663] RBP: ffffffff83e07dc8 R08: 0000000000000001 R09: ffffed1006ac0d66
> [    0.523663] R10: ffff888035606b2b R11: ffffed1006ac0d65 R12: 0000000000000000
> [    0.523663] R13: ffffffff83e1e200 R14: ffffffff84a7d980 R15: 0000000000000000
> [    0.523663]  default_idle_call+0x6c/0xa0
> [    0.523663]  do_idle+0x2e1/0x330
> [    0.523663]  cpu_startup_entry+0x20/0x30
> [    0.523663]  rest_init+0x152/0x240
> [    0.523663]  arch_call_rest_init+0x13/0x40
> [    0.523663]  start_kernel+0x331/0x470
> [    0.523663]  x86_64_start_reservations+0x18/0x40
> [    0.523663]  x86_64_start_kernel+0xbb/0x120
> [    0.523663]  secondary_startup_64_no_verify+0xe0/0xeb
> [    0.523663]  </TASK>
>
> The local_lock_irqsave() is invoked in put_cpu_partial() and happens
> in IPI context, due to the CONFIG_PROVE_RAW_LOCK_NESTING=y (the
> LD_WAIT_CONFIG not equal to LD_WAIT_SPIN), so acquire local_lock in
> IPI context will trigger above calltrace.
>
> This commit therefore move qlist_free_all() from hard-irq context to
> task context.
>
> Signed-off-by: Zqiang <qiang1.zhang@intel.com>

PROVE_RAW_LOCK_NESTING is for the benefit of RT kernels. So it's
unclear if this is fixing anything on non-RT kernels, besides the
lockdep warning.

I'd be inclined to say that having unified code for RT and non-RT
kernels is better.

Acked-by: Marco Elver <elver@google.com>

+Cc RT folks

> ---
>  v1->v2:
>  Modify the commit information and add Cc.
>
>  mm/kasan/quarantine.c | 34 ++++++++--------------------------
>  1 file changed, 8 insertions(+), 26 deletions(-)
>
> diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
> index 75585077eb6d..152dca73f398 100644
> --- a/mm/kasan/quarantine.c
> +++ b/mm/kasan/quarantine.c
> @@ -99,7 +99,6 @@ static unsigned long quarantine_size;
>  static DEFINE_RAW_SPINLOCK(quarantine_lock);
>  DEFINE_STATIC_SRCU(remove_cache_srcu);
>
> -#ifdef CONFIG_PREEMPT_RT
>  struct cpu_shrink_qlist {
>         raw_spinlock_t lock;
>         struct qlist_head qlist;
> @@ -108,7 +107,6 @@ struct cpu_shrink_qlist {
>  static DEFINE_PER_CPU(struct cpu_shrink_qlist, shrink_qlist) = {
>         .lock = __RAW_SPIN_LOCK_UNLOCKED(shrink_qlist.lock),
>  };
> -#endif
>
>  /* Maximum size of the global queue. */
>  static unsigned long quarantine_max_size;
> @@ -319,16 +317,6 @@ static void qlist_move_cache(struct qlist_head *from,
>         }
>  }
>
> -#ifndef CONFIG_PREEMPT_RT
> -static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
> -{
> -       struct kmem_cache *cache = arg;
> -       struct qlist_head to_free = QLIST_INIT;
> -
> -       qlist_move_cache(q, &to_free, cache);
> -       qlist_free_all(&to_free, cache);
> -}
> -#else
>  static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
>  {
>         struct kmem_cache *cache = arg;
> @@ -340,7 +328,6 @@ static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
>         qlist_move_cache(q, &sq->qlist, cache);
>         raw_spin_unlock_irqrestore(&sq->lock, flags);
>  }
> -#endif
>
>  static void per_cpu_remove_cache(void *arg)
>  {
> @@ -362,6 +349,8 @@ void kasan_quarantine_remove_cache(struct kmem_cache *cache)
>  {
>         unsigned long flags, i;
>         struct qlist_head to_free = QLIST_INIT;
> +       int cpu;
> +       struct cpu_shrink_qlist *sq;
>
>         /*
>          * Must be careful to not miss any objects that are being moved from
> @@ -372,20 +361,13 @@ void kasan_quarantine_remove_cache(struct kmem_cache *cache)
>          */
>         on_each_cpu(per_cpu_remove_cache, cache, 1);
>
> -#ifdef CONFIG_PREEMPT_RT
> -       {
> -               int cpu;
> -               struct cpu_shrink_qlist *sq;
> -
> -               for_each_online_cpu(cpu) {
> -                       sq = per_cpu_ptr(&shrink_qlist, cpu);
> -                       raw_spin_lock_irqsave(&sq->lock, flags);
> -                       qlist_move_cache(&sq->qlist, &to_free, cache);
> -                       raw_spin_unlock_irqrestore(&sq->lock, flags);
> -               }
> -               qlist_free_all(&to_free, cache);
> +       for_each_online_cpu(cpu) {
> +               sq = per_cpu_ptr(&shrink_qlist, cpu);
> +               raw_spin_lock_irqsave(&sq->lock, flags);
> +               qlist_move_cache(&sq->qlist, &to_free, cache);
> +               raw_spin_unlock_irqrestore(&sq->lock, flags);
>         }
> -#endif
> +       qlist_free_all(&to_free, cache);
>
>         raw_spin_lock_irqsave(&quarantine_lock, flags);
>         for (i = 0; i < QUARANTINE_BATCHES; i++) {
> --
> 2.25.1
>
> --
> You received this message because you are subscribed to the Google Groups "kasan-dev" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20230327120019.1027640-1-qiang1.zhang%40intel.com.
  
Vlastimil Babka April 19, 2023, 7:50 a.m. UTC | #4
On 4/19/23 09:26, Marco Elver wrote:
> On Mon, 27 Mar 2023 at 13:48, Zqiang <qiang1.zhang@intel.com> wrote:
>>
>> For kernels built with the following options and booting
>>
>> CONFIG_SLUB=y
>> CONFIG_DEBUG_LOCKDEP=y
>> CONFIG_PROVE_LOCKING=y
>> CONFIG_PROVE_RAW_LOCK_NESTING=y
>>
>> [    0.523115] [ BUG: Invalid wait context ]
>> [    0.523315] 6.3.0-rc1-yocto-standard+ #739 Not tainted
>> [    0.523649] -----------------------------
>> [    0.523663] swapper/0/0 is trying to lock:
>> [    0.523663] ffff888035611360 (&c->lock){....}-{3:3}, at: put_cpu_partial+0x2e/0x1e0
>> [    0.523663] other info that might help us debug this:
>> [    0.523663] context-{2:2}
>> [    0.523663] no locks held by swapper/0/0.
>> [    0.523663] stack backtrace:
>> [    0.523663] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.3.0-rc1-yocto-standard+ #739
>> [    0.523663] Call Trace:
>> [    0.523663]  <IRQ>
>> [    0.523663]  dump_stack_lvl+0x64/0xb0
>> [    0.523663]  dump_stack+0x10/0x20
>> [    0.523663]  __lock_acquire+0x6c4/0x3c10
>> [    0.523663]  lock_acquire+0x188/0x460
>> [    0.523663]  put_cpu_partial+0x5a/0x1e0
>> [    0.523663]  __slab_free+0x39a/0x520
>> [    0.523663]  ___cache_free+0xa9/0xc0
>> [    0.523663]  qlist_free_all+0x7a/0x160
>> [    0.523663]  per_cpu_remove_cache+0x5c/0x70
>> [    0.523663]  __flush_smp_call_function_queue+0xfc/0x330
>> [    0.523663]  generic_smp_call_function_single_interrupt+0x13/0x20
>> [    0.523663]  __sysvec_call_function+0x86/0x2e0
>> [    0.523663]  sysvec_call_function+0x73/0x90
>> [    0.523663]  </IRQ>
>> [    0.523663]  <TASK>
>> [    0.523663]  asm_sysvec_call_function+0x1b/0x20
>> [    0.523663] RIP: 0010:default_idle+0x13/0x20
>> [    0.523663] RSP: 0000:ffffffff83e07dc0 EFLAGS: 00000246
>> [    0.523663] RAX: 0000000000000000 RBX: ffffffff83e1e200 RCX: ffffffff82a83293
>> [    0.523663] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff8119a6b1
>> [    0.523663] RBP: ffffffff83e07dc8 R08: 0000000000000001 R09: ffffed1006ac0d66
>> [    0.523663] R10: ffff888035606b2b R11: ffffed1006ac0d65 R12: 0000000000000000
>> [    0.523663] R13: ffffffff83e1e200 R14: ffffffff84a7d980 R15: 0000000000000000
>> [    0.523663]  default_idle_call+0x6c/0xa0
>> [    0.523663]  do_idle+0x2e1/0x330
>> [    0.523663]  cpu_startup_entry+0x20/0x30
>> [    0.523663]  rest_init+0x152/0x240
>> [    0.523663]  arch_call_rest_init+0x13/0x40
>> [    0.523663]  start_kernel+0x331/0x470
>> [    0.523663]  x86_64_start_reservations+0x18/0x40
>> [    0.523663]  x86_64_start_kernel+0xbb/0x120
>> [    0.523663]  secondary_startup_64_no_verify+0xe0/0xeb
>> [    0.523663]  </TASK>
>>
>> The local_lock_irqsave() is invoked in put_cpu_partial() and happens
>> in IPI context, due to the CONFIG_PROVE_RAW_LOCK_NESTING=y (the
>> LD_WAIT_CONFIG not equal to LD_WAIT_SPIN), so acquire local_lock in
>> IPI context will trigger above calltrace.
>>
>> This commit therefore move qlist_free_all() from hard-irq context to
>> task context.
>>
>> Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> 
> PROVE_RAW_LOCK_NESTING is for the benefit of RT kernels. So it's
> unclear if this is fixing anything on non-RT kernels, besides the
> lockdep warning.

Yes, the problem seems to be that if there's different paths tor RT and !RT
kernels, PROVE_RAW_LOCK_NESTING doesn't know that and will trigger on the
!RT path in the !RT kernel. There's was an annotation proposed for these
cases in the thread linked below, but AFAIK it's not yet finished.

https://lore.kernel.org/all/20230412124735.GE628377@hirez.programming.kicks-ass.net/

> I'd be inclined to say that having unified code for RT and non-RT
> kernels is better.

Agreed it should be better, as long as it's viable.

> Acked-by: Marco Elver <elver@google.com>
> 
> +Cc RT folks
> 
>> ---
>>  v1->v2:
>>  Modify the commit information and add Cc.
>>
>>  mm/kasan/quarantine.c | 34 ++++++++--------------------------
>>  1 file changed, 8 insertions(+), 26 deletions(-)
>>
>> diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
>> index 75585077eb6d..152dca73f398 100644
>> --- a/mm/kasan/quarantine.c
>> +++ b/mm/kasan/quarantine.c
>> @@ -99,7 +99,6 @@ static unsigned long quarantine_size;
>>  static DEFINE_RAW_SPINLOCK(quarantine_lock);
>>  DEFINE_STATIC_SRCU(remove_cache_srcu);
>>
>> -#ifdef CONFIG_PREEMPT_RT
>>  struct cpu_shrink_qlist {
>>         raw_spinlock_t lock;
>>         struct qlist_head qlist;
>> @@ -108,7 +107,6 @@ struct cpu_shrink_qlist {
>>  static DEFINE_PER_CPU(struct cpu_shrink_qlist, shrink_qlist) = {
>>         .lock = __RAW_SPIN_LOCK_UNLOCKED(shrink_qlist.lock),
>>  };
>> -#endif
>>
>>  /* Maximum size of the global queue. */
>>  static unsigned long quarantine_max_size;
>> @@ -319,16 +317,6 @@ static void qlist_move_cache(struct qlist_head *from,
>>         }
>>  }
>>
>> -#ifndef CONFIG_PREEMPT_RT
>> -static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
>> -{
>> -       struct kmem_cache *cache = arg;
>> -       struct qlist_head to_free = QLIST_INIT;
>> -
>> -       qlist_move_cache(q, &to_free, cache);
>> -       qlist_free_all(&to_free, cache);
>> -}
>> -#else
>>  static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
>>  {
>>         struct kmem_cache *cache = arg;
>> @@ -340,7 +328,6 @@ static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
>>         qlist_move_cache(q, &sq->qlist, cache);
>>         raw_spin_unlock_irqrestore(&sq->lock, flags);
>>  }
>> -#endif
>>
>>  static void per_cpu_remove_cache(void *arg)
>>  {
>> @@ -362,6 +349,8 @@ void kasan_quarantine_remove_cache(struct kmem_cache *cache)
>>  {
>>         unsigned long flags, i;
>>         struct qlist_head to_free = QLIST_INIT;
>> +       int cpu;
>> +       struct cpu_shrink_qlist *sq;
>>
>>         /*
>>          * Must be careful to not miss any objects that are being moved from
>> @@ -372,20 +361,13 @@ void kasan_quarantine_remove_cache(struct kmem_cache *cache)
>>          */
>>         on_each_cpu(per_cpu_remove_cache, cache, 1);
>>
>> -#ifdef CONFIG_PREEMPT_RT
>> -       {
>> -               int cpu;
>> -               struct cpu_shrink_qlist *sq;
>> -
>> -               for_each_online_cpu(cpu) {
>> -                       sq = per_cpu_ptr(&shrink_qlist, cpu);
>> -                       raw_spin_lock_irqsave(&sq->lock, flags);
>> -                       qlist_move_cache(&sq->qlist, &to_free, cache);
>> -                       raw_spin_unlock_irqrestore(&sq->lock, flags);
>> -               }
>> -               qlist_free_all(&to_free, cache);
>> +       for_each_online_cpu(cpu) {
>> +               sq = per_cpu_ptr(&shrink_qlist, cpu);
>> +               raw_spin_lock_irqsave(&sq->lock, flags);
>> +               qlist_move_cache(&sq->qlist, &to_free, cache);
>> +               raw_spin_unlock_irqrestore(&sq->lock, flags);
>>         }
>> -#endif
>> +       qlist_free_all(&to_free, cache);
>>
>>         raw_spin_lock_irqsave(&quarantine_lock, flags);
>>         for (i = 0; i < QUARANTINE_BATCHES; i++) {
>> --
>> 2.25.1
>>
>> --
>> You received this message because you are subscribed to the Google Groups "kasan-dev" group.
>> To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com.
>> To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20230327120019.1027640-1-qiang1.zhang%40intel.com.
>
  
Vlastimil Babka April 19, 2023, 8:03 a.m. UTC | #5
On 4/19/23 04:52, Qi Zheng wrote:
> 
> 
> On 2023/3/27 20:00, Zqiang wrote:
>> For kernels built with the following options and booting
>> 
>> CONFIG_SLUB=y
>> CONFIG_DEBUG_LOCKDEP=y
>> CONFIG_PROVE_LOCKING=y
>> CONFIG_PROVE_RAW_LOCK_NESTING=y
>> 
>> [    0.523115] [ BUG: Invalid wait context ]
>> [    0.523315] 6.3.0-rc1-yocto-standard+ #739 Not tainted
>> [    0.523649] -----------------------------
>> [    0.523663] swapper/0/0 is trying to lock:
>> [    0.523663] ffff888035611360 (&c->lock){....}-{3:3}, at: put_cpu_partial+0x2e/0x1e0
>> [    0.523663] other info that might help us debug this:
>> [    0.523663] context-{2:2}
>> [    0.523663] no locks held by swapper/0/0.
>> [    0.523663] stack backtrace:
>> [    0.523663] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.3.0-rc1-yocto-standard+ #739
>> [    0.523663] Call Trace:
>> [    0.523663]  <IRQ>
>> [    0.523663]  dump_stack_lvl+0x64/0xb0
>> [    0.523663]  dump_stack+0x10/0x20
>> [    0.523663]  __lock_acquire+0x6c4/0x3c10
>> [    0.523663]  lock_acquire+0x188/0x460
>> [    0.523663]  put_cpu_partial+0x5a/0x1e0
>> [    0.523663]  __slab_free+0x39a/0x520
>> [    0.523663]  ___cache_free+0xa9/0xc0
>> [    0.523663]  qlist_free_all+0x7a/0x160
>> [    0.523663]  per_cpu_remove_cache+0x5c/0x70
>> [    0.523663]  __flush_smp_call_function_queue+0xfc/0x330
>> [    0.523663]  generic_smp_call_function_single_interrupt+0x13/0x20
>> [    0.523663]  __sysvec_call_function+0x86/0x2e0
>> [    0.523663]  sysvec_call_function+0x73/0x90
>> [    0.523663]  </IRQ>
>> [    0.523663]  <TASK>
>> [    0.523663]  asm_sysvec_call_function+0x1b/0x20
>> [    0.523663] RIP: 0010:default_idle+0x13/0x20
>> [    0.523663] RSP: 0000:ffffffff83e07dc0 EFLAGS: 00000246
>> [    0.523663] RAX: 0000000000000000 RBX: ffffffff83e1e200 RCX: ffffffff82a83293
>> [    0.523663] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff8119a6b1
>> [    0.523663] RBP: ffffffff83e07dc8 R08: 0000000000000001 R09: ffffed1006ac0d66
>> [    0.523663] R10: ffff888035606b2b R11: ffffed1006ac0d65 R12: 0000000000000000
>> [    0.523663] R13: ffffffff83e1e200 R14: ffffffff84a7d980 R15: 0000000000000000
>> [    0.523663]  default_idle_call+0x6c/0xa0
>> [    0.523663]  do_idle+0x2e1/0x330
>> [    0.523663]  cpu_startup_entry+0x20/0x30
>> [    0.523663]  rest_init+0x152/0x240
>> [    0.523663]  arch_call_rest_init+0x13/0x40
>> [    0.523663]  start_kernel+0x331/0x470
>> [    0.523663]  x86_64_start_reservations+0x18/0x40
>> [    0.523663]  x86_64_start_kernel+0xbb/0x120
>> [    0.523663]  secondary_startup_64_no_verify+0xe0/0xeb
>> [    0.523663]  </TASK>
>> 
>> The local_lock_irqsave() is invoked in put_cpu_partial() and happens
>> in IPI context, due to the CONFIG_PROVE_RAW_LOCK_NESTING=y (the
>> LD_WAIT_CONFIG not equal to LD_WAIT_SPIN), so acquire local_lock in
>> IPI context will trigger above calltrace.
> 
> Just to add another similar case:
> 
> Call Trace:
>   <IRQ>
>   dump_stack_lvl+0x69/0x97
>   __lock_acquire+0x4a0/0x1b50
>   lock_acquire+0x261/0x2c0
>   ? restore_bytes+0x40/0x40
>   local_lock_acquire+0x21/0x70
>   ? restore_bytes+0x40/0x40
>   put_cpu_partial+0x41/0x130
>   ? flush_smp_call_function_queue+0x125/0x4d0
>   kfree+0x250/0x2c0
>   flush_smp_call_function_queue+0x125/0x4d0
>   __sysvec_call_function_single+0x3a/0x100
>   sysvec_call_function_single+0x4b/0x90
>   </IRQ>
>   <TASK>
>   asm_sysvec_call_function_single+0x16/0x20
> 
> So we can't call kfree() and its friends in interrupt context?

We can (well not RT "hard IRQ" context AFAIK, but that shouldn't be the case
here), although I don't see from the part that you posted if it's again
CONFIG_PROVE_RAW_LOCK_NESTING clashing with something else (no KASAN in the
trace or I'm missing it?)

> Also +Vlastimil Babka.
> 
> Thanks,
> Qi
> 
>> 
>> This commit therefore move qlist_free_all() from hard-irq context to
>> task context.
>> 
>> Signed-off-by: Zqiang <qiang1.zhang@intel.com>
>> ---
>>   v1->v2:
>>   Modify the commit information and add Cc.
>> 
>>   mm/kasan/quarantine.c | 34 ++++++++--------------------------
>>   1 file changed, 8 insertions(+), 26 deletions(-)
>> 
>> diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
>> index 75585077eb6d..152dca73f398 100644
>> --- a/mm/kasan/quarantine.c
>> +++ b/mm/kasan/quarantine.c
>> @@ -99,7 +99,6 @@ static unsigned long quarantine_size;
>>   static DEFINE_RAW_SPINLOCK(quarantine_lock);
>>   DEFINE_STATIC_SRCU(remove_cache_srcu);
>>   
>> -#ifdef CONFIG_PREEMPT_RT
>>   struct cpu_shrink_qlist {
>>   	raw_spinlock_t lock;
>>   	struct qlist_head qlist;
>> @@ -108,7 +107,6 @@ struct cpu_shrink_qlist {
>>   static DEFINE_PER_CPU(struct cpu_shrink_qlist, shrink_qlist) = {
>>   	.lock = __RAW_SPIN_LOCK_UNLOCKED(shrink_qlist.lock),
>>   };
>> -#endif
>>   
>>   /* Maximum size of the global queue. */
>>   static unsigned long quarantine_max_size;
>> @@ -319,16 +317,6 @@ static void qlist_move_cache(struct qlist_head *from,
>>   	}
>>   }
>>   
>> -#ifndef CONFIG_PREEMPT_RT
>> -static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
>> -{
>> -	struct kmem_cache *cache = arg;
>> -	struct qlist_head to_free = QLIST_INIT;
>> -
>> -	qlist_move_cache(q, &to_free, cache);
>> -	qlist_free_all(&to_free, cache);
>> -}
>> -#else
>>   static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
>>   {
>>   	struct kmem_cache *cache = arg;
>> @@ -340,7 +328,6 @@ static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
>>   	qlist_move_cache(q, &sq->qlist, cache);
>>   	raw_spin_unlock_irqrestore(&sq->lock, flags);
>>   }
>> -#endif
>>   
>>   static void per_cpu_remove_cache(void *arg)
>>   {
>> @@ -362,6 +349,8 @@ void kasan_quarantine_remove_cache(struct kmem_cache *cache)
>>   {
>>   	unsigned long flags, i;
>>   	struct qlist_head to_free = QLIST_INIT;
>> +	int cpu;
>> +	struct cpu_shrink_qlist *sq;
>>   
>>   	/*
>>   	 * Must be careful to not miss any objects that are being moved from
>> @@ -372,20 +361,13 @@ void kasan_quarantine_remove_cache(struct kmem_cache *cache)
>>   	 */
>>   	on_each_cpu(per_cpu_remove_cache, cache, 1);
>>   
>> -#ifdef CONFIG_PREEMPT_RT
>> -	{
>> -		int cpu;
>> -		struct cpu_shrink_qlist *sq;
>> -
>> -		for_each_online_cpu(cpu) {
>> -			sq = per_cpu_ptr(&shrink_qlist, cpu);
>> -			raw_spin_lock_irqsave(&sq->lock, flags);
>> -			qlist_move_cache(&sq->qlist, &to_free, cache);
>> -			raw_spin_unlock_irqrestore(&sq->lock, flags);
>> -		}
>> -		qlist_free_all(&to_free, cache);
>> +	for_each_online_cpu(cpu) {
>> +		sq = per_cpu_ptr(&shrink_qlist, cpu);
>> +		raw_spin_lock_irqsave(&sq->lock, flags);
>> +		qlist_move_cache(&sq->qlist, &to_free, cache);
>> +		raw_spin_unlock_irqrestore(&sq->lock, flags);
>>   	}
>> -#endif
>> +	qlist_free_all(&to_free, cache);
>>   
>>   	raw_spin_lock_irqsave(&quarantine_lock, flags);
>>   	for (i = 0; i < QUARANTINE_BATCHES; i++) {
>
  
Qi Zheng April 19, 2023, 9:20 a.m. UTC | #6
On 2023/4/19 16:03, Vlastimil Babka wrote:
> On 4/19/23 04:52, Qi Zheng wrote:
>>
>>
>> On 2023/3/27 20:00, Zqiang wrote:
>>> For kernels built with the following options and booting
>>>
>>> CONFIG_SLUB=y
>>> CONFIG_DEBUG_LOCKDEP=y
>>> CONFIG_PROVE_LOCKING=y
>>> CONFIG_PROVE_RAW_LOCK_NESTING=y
>>>
>>> [    0.523115] [ BUG: Invalid wait context ]
>>> [    0.523315] 6.3.0-rc1-yocto-standard+ #739 Not tainted
>>> [    0.523649] -----------------------------
>>> [    0.523663] swapper/0/0 is trying to lock:
>>> [    0.523663] ffff888035611360 (&c->lock){....}-{3:3}, at: put_cpu_partial+0x2e/0x1e0
>>> [    0.523663] other info that might help us debug this:
>>> [    0.523663] context-{2:2}
>>> [    0.523663] no locks held by swapper/0/0.
>>> [    0.523663] stack backtrace:
>>> [    0.523663] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.3.0-rc1-yocto-standard+ #739
>>> [    0.523663] Call Trace:
>>> [    0.523663]  <IRQ>
>>> [    0.523663]  dump_stack_lvl+0x64/0xb0
>>> [    0.523663]  dump_stack+0x10/0x20
>>> [    0.523663]  __lock_acquire+0x6c4/0x3c10
>>> [    0.523663]  lock_acquire+0x188/0x460
>>> [    0.523663]  put_cpu_partial+0x5a/0x1e0
>>> [    0.523663]  __slab_free+0x39a/0x520
>>> [    0.523663]  ___cache_free+0xa9/0xc0
>>> [    0.523663]  qlist_free_all+0x7a/0x160
>>> [    0.523663]  per_cpu_remove_cache+0x5c/0x70
>>> [    0.523663]  __flush_smp_call_function_queue+0xfc/0x330
>>> [    0.523663]  generic_smp_call_function_single_interrupt+0x13/0x20
>>> [    0.523663]  __sysvec_call_function+0x86/0x2e0
>>> [    0.523663]  sysvec_call_function+0x73/0x90
>>> [    0.523663]  </IRQ>
>>> [    0.523663]  <TASK>
>>> [    0.523663]  asm_sysvec_call_function+0x1b/0x20
>>> [    0.523663] RIP: 0010:default_idle+0x13/0x20
>>> [    0.523663] RSP: 0000:ffffffff83e07dc0 EFLAGS: 00000246
>>> [    0.523663] RAX: 0000000000000000 RBX: ffffffff83e1e200 RCX: ffffffff82a83293
>>> [    0.523663] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff8119a6b1
>>> [    0.523663] RBP: ffffffff83e07dc8 R08: 0000000000000001 R09: ffffed1006ac0d66
>>> [    0.523663] R10: ffff888035606b2b R11: ffffed1006ac0d65 R12: 0000000000000000
>>> [    0.523663] R13: ffffffff83e1e200 R14: ffffffff84a7d980 R15: 0000000000000000
>>> [    0.523663]  default_idle_call+0x6c/0xa0
>>> [    0.523663]  do_idle+0x2e1/0x330
>>> [    0.523663]  cpu_startup_entry+0x20/0x30
>>> [    0.523663]  rest_init+0x152/0x240
>>> [    0.523663]  arch_call_rest_init+0x13/0x40
>>> [    0.523663]  start_kernel+0x331/0x470
>>> [    0.523663]  x86_64_start_reservations+0x18/0x40
>>> [    0.523663]  x86_64_start_kernel+0xbb/0x120
>>> [    0.523663]  secondary_startup_64_no_verify+0xe0/0xeb
>>> [    0.523663]  </TASK>
>>>
>>> The local_lock_irqsave() is invoked in put_cpu_partial() and happens
>>> in IPI context, due to the CONFIG_PROVE_RAW_LOCK_NESTING=y (the
>>> LD_WAIT_CONFIG not equal to LD_WAIT_SPIN), so acquire local_lock in
>>> IPI context will trigger above calltrace.
>>
>> Just to add another similar case:
>>
>> Call Trace:
>>    <IRQ>
>>    dump_stack_lvl+0x69/0x97
>>    __lock_acquire+0x4a0/0x1b50
>>    lock_acquire+0x261/0x2c0
>>    ? restore_bytes+0x40/0x40
>>    local_lock_acquire+0x21/0x70
>>    ? restore_bytes+0x40/0x40
>>    put_cpu_partial+0x41/0x130
>>    ? flush_smp_call_function_queue+0x125/0x4d0
>>    kfree+0x250/0x2c0
>>    flush_smp_call_function_queue+0x125/0x4d0
>>    __sysvec_call_function_single+0x3a/0x100
>>    sysvec_call_function_single+0x4b/0x90
>>    </IRQ>
>>    <TASK>
>>    asm_sysvec_call_function_single+0x16/0x20
>>
>> So we can't call kfree() and its friends in interrupt context?
> 
> We can (well not RT "hard IRQ" context AFAIK, but that shouldn't be the case
> here), although I don't see from the part that you posted if it's again
> CONFIG_PROVE_RAW_LOCK_NESTING clashing with something else (no KASAN in the
> trace or I'm missing it?)

I lost the corresponding vmlinux, but this should be a similar issue, we
should continue to make lockdep recognize the !RT path.

> 
>> Also +Vlastimil Babka.
>>
>> Thanks,
>> Qi
>>
>>>
>>> This commit therefore move qlist_free_all() from hard-irq context to
>>> task context.
>>>
>>> Signed-off-by: Zqiang <qiang1.zhang@intel.com>
>>> ---
>>>    v1->v2:
>>>    Modify the commit information and add Cc.
>>>
>>>    mm/kasan/quarantine.c | 34 ++++++++--------------------------
>>>    1 file changed, 8 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
>>> index 75585077eb6d..152dca73f398 100644
>>> --- a/mm/kasan/quarantine.c
>>> +++ b/mm/kasan/quarantine.c
>>> @@ -99,7 +99,6 @@ static unsigned long quarantine_size;
>>>    static DEFINE_RAW_SPINLOCK(quarantine_lock);
>>>    DEFINE_STATIC_SRCU(remove_cache_srcu);
>>>    
>>> -#ifdef CONFIG_PREEMPT_RT
>>>    struct cpu_shrink_qlist {
>>>    	raw_spinlock_t lock;
>>>    	struct qlist_head qlist;
>>> @@ -108,7 +107,6 @@ struct cpu_shrink_qlist {
>>>    static DEFINE_PER_CPU(struct cpu_shrink_qlist, shrink_qlist) = {
>>>    	.lock = __RAW_SPIN_LOCK_UNLOCKED(shrink_qlist.lock),
>>>    };
>>> -#endif
>>>    
>>>    /* Maximum size of the global queue. */
>>>    static unsigned long quarantine_max_size;
>>> @@ -319,16 +317,6 @@ static void qlist_move_cache(struct qlist_head *from,
>>>    	}
>>>    }
>>>    
>>> -#ifndef CONFIG_PREEMPT_RT
>>> -static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
>>> -{
>>> -	struct kmem_cache *cache = arg;
>>> -	struct qlist_head to_free = QLIST_INIT;
>>> -
>>> -	qlist_move_cache(q, &to_free, cache);
>>> -	qlist_free_all(&to_free, cache);
>>> -}
>>> -#else
>>>    static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
>>>    {
>>>    	struct kmem_cache *cache = arg;
>>> @@ -340,7 +328,6 @@ static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
>>>    	qlist_move_cache(q, &sq->qlist, cache);
>>>    	raw_spin_unlock_irqrestore(&sq->lock, flags);
>>>    }
>>> -#endif
>>>    
>>>    static void per_cpu_remove_cache(void *arg)
>>>    {
>>> @@ -362,6 +349,8 @@ void kasan_quarantine_remove_cache(struct kmem_cache *cache)
>>>    {
>>>    	unsigned long flags, i;
>>>    	struct qlist_head to_free = QLIST_INIT;
>>> +	int cpu;
>>> +	struct cpu_shrink_qlist *sq;
>>>    
>>>    	/*
>>>    	 * Must be careful to not miss any objects that are being moved from
>>> @@ -372,20 +361,13 @@ void kasan_quarantine_remove_cache(struct kmem_cache *cache)
>>>    	 */
>>>    	on_each_cpu(per_cpu_remove_cache, cache, 1);
>>>    
>>> -#ifdef CONFIG_PREEMPT_RT
>>> -	{
>>> -		int cpu;
>>> -		struct cpu_shrink_qlist *sq;
>>> -
>>> -		for_each_online_cpu(cpu) {
>>> -			sq = per_cpu_ptr(&shrink_qlist, cpu);
>>> -			raw_spin_lock_irqsave(&sq->lock, flags);
>>> -			qlist_move_cache(&sq->qlist, &to_free, cache);
>>> -			raw_spin_unlock_irqrestore(&sq->lock, flags);
>>> -		}
>>> -		qlist_free_all(&to_free, cache);
>>> +	for_each_online_cpu(cpu) {
>>> +		sq = per_cpu_ptr(&shrink_qlist, cpu);
>>> +		raw_spin_lock_irqsave(&sq->lock, flags);
>>> +		qlist_move_cache(&sq->qlist, &to_free, cache);
>>> +		raw_spin_unlock_irqrestore(&sq->lock, flags);
>>>    	}
>>> -#endif
>>> +	qlist_free_all(&to_free, cache);
>>>    
>>>    	raw_spin_lock_irqsave(&quarantine_lock, flags);
>>>    	for (i = 0; i < QUARANTINE_BATCHES; i++) {
>>
> 
>
  
Peter Zijlstra April 25, 2023, 3:03 p.m. UTC | #7
On Wed, Apr 19, 2023 at 09:50:23AM +0200, Vlastimil Babka wrote:
> Yes, the problem seems to be that if there's different paths tor RT and !RT
> kernels, PROVE_RAW_LOCK_NESTING doesn't know that and will trigger on the
> !RT path in the !RT kernel. There's was an annotation proposed for these
> cases in the thread linked below, but AFAIK it's not yet finished.
> 
> https://lore.kernel.org/all/20230412124735.GE628377@hirez.programming.kicks-ass.net/

Oh, thanks for the reminder, I'd completely forgotten about it. I just
replied with a new version... fingers crossed ;-)
  

Patch

diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index 75585077eb6d..152dca73f398 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -99,7 +99,6 @@  static unsigned long quarantine_size;
 static DEFINE_RAW_SPINLOCK(quarantine_lock);
 DEFINE_STATIC_SRCU(remove_cache_srcu);
 
-#ifdef CONFIG_PREEMPT_RT
 struct cpu_shrink_qlist {
 	raw_spinlock_t lock;
 	struct qlist_head qlist;
@@ -108,7 +107,6 @@  struct cpu_shrink_qlist {
 static DEFINE_PER_CPU(struct cpu_shrink_qlist, shrink_qlist) = {
 	.lock = __RAW_SPIN_LOCK_UNLOCKED(shrink_qlist.lock),
 };
-#endif
 
 /* Maximum size of the global queue. */
 static unsigned long quarantine_max_size;
@@ -319,16 +317,6 @@  static void qlist_move_cache(struct qlist_head *from,
 	}
 }
 
-#ifndef CONFIG_PREEMPT_RT
-static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
-{
-	struct kmem_cache *cache = arg;
-	struct qlist_head to_free = QLIST_INIT;
-
-	qlist_move_cache(q, &to_free, cache);
-	qlist_free_all(&to_free, cache);
-}
-#else
 static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
 {
 	struct kmem_cache *cache = arg;
@@ -340,7 +328,6 @@  static void __per_cpu_remove_cache(struct qlist_head *q, void *arg)
 	qlist_move_cache(q, &sq->qlist, cache);
 	raw_spin_unlock_irqrestore(&sq->lock, flags);
 }
-#endif
 
 static void per_cpu_remove_cache(void *arg)
 {
@@ -362,6 +349,8 @@  void kasan_quarantine_remove_cache(struct kmem_cache *cache)
 {
 	unsigned long flags, i;
 	struct qlist_head to_free = QLIST_INIT;
+	int cpu;
+	struct cpu_shrink_qlist *sq;
 
 	/*
 	 * Must be careful to not miss any objects that are being moved from
@@ -372,20 +361,13 @@  void kasan_quarantine_remove_cache(struct kmem_cache *cache)
 	 */
 	on_each_cpu(per_cpu_remove_cache, cache, 1);
 
-#ifdef CONFIG_PREEMPT_RT
-	{
-		int cpu;
-		struct cpu_shrink_qlist *sq;
-
-		for_each_online_cpu(cpu) {
-			sq = per_cpu_ptr(&shrink_qlist, cpu);
-			raw_spin_lock_irqsave(&sq->lock, flags);
-			qlist_move_cache(&sq->qlist, &to_free, cache);
-			raw_spin_unlock_irqrestore(&sq->lock, flags);
-		}
-		qlist_free_all(&to_free, cache);
+	for_each_online_cpu(cpu) {
+		sq = per_cpu_ptr(&shrink_qlist, cpu);
+		raw_spin_lock_irqsave(&sq->lock, flags);
+		qlist_move_cache(&sq->qlist, &to_free, cache);
+		raw_spin_unlock_irqrestore(&sq->lock, flags);
 	}
-#endif
+	qlist_free_all(&to_free, cache);
 
 	raw_spin_lock_irqsave(&quarantine_lock, flags);
 	for (i = 0; i < QUARANTINE_BATCHES; i++) {