[1/2] test_xarray: add tests for advanced multi-index use

Message ID 20231104005747.1389762-2-da.gomez@samsung.com
State New
Headers
Series XArray multi-index tests |

Commit Message

Daniel Gomez Nov. 4, 2023, 12:57 a.m. UTC
  From: Luis Chamberlain <mcgrof@kernel.org>

The multi index selftests are great but they don't replicate
how we deal with the page cache exactly, which makes it a bit
hard to follow as the page cache uses the advanced API.

Add tests which use the advanced API, mimicking what we do in the
page cache, while at it, extend the example to do what is needed for
min order support.

Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Tested-by: Daniel Gomez <da.gomez@samsung.com>
---
 lib/test_xarray.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 134 insertions(+)
  

Comments

kernel test robot Nov. 15, 2023, 3:02 p.m. UTC | #1
Hello,

kernel test robot noticed "WARNING:suspicious_RCU_usage" on:

commit: 68f563aa7e55d45795a0396c36a4d3d485f845a0 ("[PATCH 1/2] test_xarray: add tests for advanced multi-index use")
url: https://github.com/intel-lab-lkp/linux/commits/Daniel-Gomez/test_xarray-add-tests-for-advanced-multi-index-use/20231104-085907
base: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git e392ea4d4d00880bf94550151b1ace4f88a4b17a
patch link: https://lore.kernel.org/all/20231104005747.1389762-2-da.gomez@samsung.com/
patch subject: [PATCH 1/2] test_xarray: add tests for advanced multi-index use

in testcase: boot

compiler: gcc-12
test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

(please refer to attached dmesg/kmsg for entire log/backtrace)


+----------------------------------------------------------------------------------+------------+------------+
|                                                                                  | e392ea4d4d | 68f563aa7e |
+----------------------------------------------------------------------------------+------------+------------+
| WARNING:suspicious_RCU_usage                                                     | 0          | 9          |
| include/linux/xarray.h:#suspicious_rcu_dereference_check()usage                  | 0          | 9          |
| include/linux/xarray.h:#suspicious_rcu_dereference_protected()usage              | 0          | 9          |
| INFO:rcu_preempt_detected_stalls_on_CPUs/tasks                                   | 0          | 9          |
+----------------------------------------------------------------------------------+------------+------------+


If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202311152254.610174ff-oliver.sang@intel.com


[  304.121213][    T1] WARNING: suspicious RCU usage
[  304.122111][    T1] 6.6.0-12894-g68f563aa7e55 #1 Tainted: G                 N
[  304.123404][    T1] -----------------------------
[  304.124297][    T1] include/linux/xarray.h:1200 suspicious rcu_dereference_check() usage!
[  304.125787][    T1]
[  304.125787][    T1] other info that might help us debug this:
[  304.125787][    T1]
[  304.127648][    T1]
[  304.127648][    T1] rcu_scheduler_active = 2, debug_locks = 1
[  304.129454][    T1] no locks held by swapper/1.
[  304.130560][    T1]
[  304.130560][    T1] stack backtrace:
[  304.131863][    T1] CPU: 0 PID: 1 Comm: swapper Tainted: G                 N 6.6.0-12894-g68f563aa7e55 #1
[  304.132791][    T1] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[  304.132791][    T1] Call Trace:
[  304.132791][    T1]  <TASK>
[ 304.132791][ T1] dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) 
[ 304.132791][ T1] lockdep_rcu_suspicious (include/linux/context_tracking.h:153 kernel/locking/lockdep.c:6712) 
[ 304.132791][ T1] xas_start (include/linux/xarray.h:1200 include/linux/xarray.h:1198 lib/xarray.c:190) 
[ 304.132791][ T1] xas_load (lib/xarray.c:237) 
[ 304.132791][ T1] xas_store (lib/xarray.c:789) 
[ 304.132791][ T1] ? xa_load (include/linux/rcupdate.h:306 include/linux/rcupdate.h:780 lib/xarray.c:1465) 
[ 304.132791][ T1] check_xa_multi_store_adv_delete+0xf0/0x120 
[ 304.132791][ T1] ? check_find_1+0x690/0x690 
[ 304.132791][ T1] check_xa_multi_store_adv+0x17d/0x650 
[ 304.132791][ T1] check_multi_store_advanced+0x36/0x90 
[ 304.132791][ T1] ? check_xas_retry+0xee0/0xee0 
[ 304.132791][ T1] xarray_checks (lib/test_xarray.c:1941) 
[ 304.132791][ T1] do_one_initcall (arch/x86/include/asm/atomic.h:23 include/linux/atomic/atomic-arch-fallback.h:457 include/linux/jump_label.h:260 include/linux/jump_label.h:270 include/trace/events/initcall.h:48 init/main.c:1237) 
[ 304.132791][ T1] ? trace_event_raw_event_initcall_level (init/main.c:1227) 
[ 304.132791][ T1] do_initcalls (init/main.c:1297 init/main.c:1314) 
[ 304.132791][ T1] kernel_init_freeable (init/main.c:1553) 
[ 304.132791][ T1] ? rest_init (init/main.c:1433) 
[ 304.132791][ T1] kernel_init (init/main.c:1443) 
[ 304.132791][ T1] ? rest_init (init/main.c:1433) 
[ 304.132791][ T1] ret_from_fork (arch/x86/kernel/process.c:153) 
[ 304.132791][ T1] ? rest_init (init/main.c:1433) 
[ 304.132791][ T1] ret_from_fork_asm (arch/x86/entry/entry_64.S:250) 
[  304.132791][    T1]  </TASK>
[  304.161541][    T1]
[  304.162053][    T1] =============================
[  304.162982][    T1] WARNING: suspicious RCU usage
[  304.163917][    T1] 6.6.0-12894-g68f563aa7e55 #1 Tainted: G                 N
[  304.165264][    T1] -----------------------------
[  304.166440][    T1] include/linux/xarray.h:1216 suspicious rcu_dereference_check() usage!
[  304.167982][    T1]
[  304.167982][    T1] other info that might help us debug this:
[  304.167982][    T1]
[  304.169978][    T1]
[  304.169978][    T1] rcu_scheduler_active = 2, debug_locks = 1
[  304.171380][    T1] no locks held by swapper/1.
[  304.172271][    T1]
[  304.172271][    T1] stack backtrace:
[  304.173737][    T1] CPU: 0 PID: 1 Comm: swapper Tainted: G                 N 6.6.0-12894-g68f563aa7e55 #1
[  304.175951][    T1] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[  304.177150][    T1] Call Trace:
[  304.177150][    T1]  <TASK>
[ 304.177150][ T1] dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) 
[ 304.177150][ T1] lockdep_rcu_suspicious (include/linux/context_tracking.h:153 kernel/locking/lockdep.c:6712) 
[ 304.177150][ T1] xas_descend (include/linux/xarray.h:1216 include/linux/xarray.h:1212 lib/xarray.c:206) 
[ 304.177150][ T1] xas_load (lib/xarray.c:245) 
[ 304.177150][ T1] xas_store (lib/xarray.c:789) 
[ 304.177150][ T1] ? xa_load (include/linux/rcupdate.h:306 include/linux/rcupdate.h:780 lib/xarray.c:1465) 
[ 304.177150][ T1] check_xa_multi_store_adv_delete+0xf0/0x120 
[ 304.177150][ T1] ? check_find_1+0x690/0x690 
[ 304.177150][ T1] check_xa_multi_store_adv+0x17d/0x650 
[ 304.177150][ T1] check_multi_store_advanced+0x36/0x90 
[ 304.177150][ T1] ? check_xas_retry+0xee0/0xee0 
[ 304.177150][ T1] xarray_checks (lib/test_xarray.c:1941) 
[ 304.177150][ T1] do_one_initcall (arch/x86/include/asm/atomic.h:23 include/linux/atomic/atomic-arch-fallback.h:457 include/linux/jump_label.h:260 include/linux/jump_label.h:270 include/trace/events/initcall.h:48 init/main.c:1237) 
[ 304.177150][ T1] ? trace_event_raw_event_initcall_level (init/main.c:1227) 
[ 304.177150][ T1] do_initcalls (init/main.c:1297 init/main.c:1314) 
[ 304.177150][ T1] kernel_init_freeable (init/main.c:1553) 
[ 304.177150][ T1] ? rest_init (init/main.c:1433) 
[ 304.177150][ T1] kernel_init (init/main.c:1443) 
[ 304.177150][ T1] ? rest_init (init/main.c:1433) 
[ 304.177150][ T1] ret_from_fork (arch/x86/kernel/process.c:153) 
[ 304.177150][ T1] ? rest_init (init/main.c:1433) 
[ 304.177150][ T1] ret_from_fork_asm (arch/x86/entry/entry_64.S:250) 
[  304.177150][    T1]  </TASK>
[  304.209019][    T1]
[  304.209562][    T1] =============================
[  304.210489][    T1] WARNING: suspicious RCU usage
[  304.211422][    T1] 6.6.0-12894-g68f563aa7e55 #1 Tainted: G                 N
[  304.212738][    T1] -----------------------------
[  304.213856][    T1] include/linux/xarray.h:1225 suspicious rcu_dereference_protected() usage!
[  304.216097][    T1]
[  304.216097][    T1] other info that might help us debug this:
[  304.216097][    T1]
[  304.218820][    T1]
[  304.218820][    T1] rcu_scheduler_active = 2, debug_locks = 1
[  304.220914][    T1] no locks held by swapper/1.
[  304.222183][    T1]
[  304.222183][    T1] stack backtrace:
[  304.223559][    T1] CPU: 0 PID: 1 Comm: swapper Tainted: G                 N 6.6.0-12894-g68f563aa7e55 #1
[  304.224892][    T1] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[  304.224892][    T1] Call Trace:
[  304.224892][    T1]  <TASK>
[ 304.224892][ T1] dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) 
[ 304.224892][ T1] lockdep_rcu_suspicious (include/linux/context_tracking.h:153 kernel/locking/lockdep.c:6712) 
[ 304.224892][ T1] xas_store (include/linux/xarray.h:1225 lib/xarray.c:835) 
[ 304.224892][ T1] check_xa_multi_store_adv_delete+0xf0/0x120 
[ 304.224892][ T1] ? check_find_1+0x690/0x690 
[ 304.224892][ T1] check_xa_multi_store_adv+0x17d/0x650 
[ 304.224892][ T1] check_multi_store_advanced+0x36/0x90 
[ 304.224892][ T1] ? check_xas_retry+0xee0/0xee0 
[ 304.224892][ T1] xarray_checks (lib/test_xarray.c:1941) 
[ 304.224892][ T1] do_one_initcall (arch/x86/include/asm/atomic.h:23 include/linux/atomic/atomic-arch-fallback.h:457 include/linux/jump_label.h:260 include/linux/jump_label.h:270 include/trace/events/initcall.h:48 init/main.c:1237) 
[ 304.224892][ T1] ? trace_event_raw_event_initcall_level (init/main.c:1227) 
[ 304.224892][ T1] do_initcalls (init/main.c:1297 init/main.c:1314) 
[ 304.224892][ T1] kernel_init_freeable (init/main.c:1553) 
[ 304.224892][ T1] ? rest_init (init/main.c:1433) 
[ 304.224892][ T1] kernel_init (init/main.c:1443) 
[ 304.224892][ T1] ? rest_init (init/main.c:1433) 
[ 304.224892][ T1] ret_from_fork (arch/x86/kernel/process.c:153) 
[ 304.224892][ T1] ? rest_init (init/main.c:1433) 
[ 304.224892][ T1] ret_from_fork_asm (arch/x86/entry/entry_64.S:250) 
[  304.224892][    T1]  </TASK>
[  304.254135][    T1]
[  304.254916][    T1] =============================
[  304.256249][    T1] WARNING: suspicious RCU usage
[  304.257611][    T1] 6.6.0-12894-g68f563aa7e55 #1 Tainted: G                 N
[  304.259429][    T1] -----------------------------
[  304.260792][    T1] include/linux/xarray.h:1241 suspicious rcu_dereference_protected() usage!
[  304.263056][    T1]
[  304.263056][    T1] other info that might help us debug this:
[  304.263056][    T1]
[  304.265748][    T1]
[  304.265748][    T1] rcu_scheduler_active = 2, debug_locks = 1
[  304.267891][    T1] no locks held by swapper/1.
[  304.269171][    T1]
[  304.269171][    T1] stack backtrace:
[  304.270787][    T1] CPU: 0 PID: 1 Comm: swapper Tainted: G                 N 6.6.0-12894-g68f563aa7e55 #1
[  304.272727][    T1] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[  304.272727][    T1] Call Trace:
[  304.272727][    T1]  <TASK>
[ 304.272727][ T1] dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) 
[ 304.272727][ T1] lockdep_rcu_suspicious (include/linux/context_tracking.h:153 kernel/locking/lockdep.c:6712) 
[ 304.272727][ T1] xas_store (include/linux/xarray.h:1241 lib/xarray.c:492 lib/xarray.c:759 lib/xarray.c:844) 
[ 304.272727][ T1] check_xa_multi_store_adv_delete+0xf0/0x120 
[ 304.272727][ T1] ? check_find_1+0x690/0x690 
[ 304.272727][ T1] check_xa_multi_store_adv+0x3d4/0x650 
[ 304.272727][ T1] check_multi_store_advanced+0x36/0x90 
[ 304.272727][ T1] ? check_xas_retry+0xee0/0xee0 
[ 304.272727][ T1] xarray_checks (lib/test_xarray.c:1941) 
[ 304.272727][ T1] do_one_initcall (arch/x86/include/asm/atomic.h:23 include/linux/atomic/atomic-arch-fallback.h:457 include/linux/jump_label.h:260 include/linux/jump_label.h:270 include/trace/events/initcall.h:48 init/main.c:1237) 
[ 304.272727][ T1] ? trace_event_raw_event_initcall_level (init/main.c:1227) 
[ 304.272727][ T1] do_initcalls (init/main.c:1297 init/main.c:1314) 
[ 304.272727][ T1] kernel_init_freeable (init/main.c:1553) 
[ 304.272727][ T1] ? rest_init (init/main.c:1433) 
[ 304.272727][ T1] kernel_init (init/main.c:1443) 
[ 304.272727][ T1] ? rest_init (init/main.c:1433) 
[ 304.272727][ T1] ret_from_fork (arch/x86/kernel/process.c:153) 
[ 304.272727][ T1] ? rest_init (init/main.c:1433) 
[ 304.272727][ T1] ret_from_fork_asm (arch/x86/entry/entry_64.S:250) 
[  304.272727][    T1]  </TASK>
[  342.420416][    C0] BUG: workqueue lockup - pool cpus=0 node=0 flags=0x0 nice=0 stuck for 86s!
[  342.423303][    C0] Showing busy workqueues and worker pools:
[  342.424864][    C0] workqueue events: flags=0x0
[  342.426128][    C0]   pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=3/256 refcnt=4
[  342.426185][    C0]     pending: psi_avgs_work, kfree_rcu_monitor, stop_one_cpu_nowait_workfn
[  342.426278][    C0] workqueue events_unbound: flags=0x2
[  342.432244][    C0]   pwq 2: cpus=0 flags=0x4 nice=0 active=1/512 refcnt=2
[  342.432286][    C0]     pending: idle_cull_fn
[  342.435502][    C0] workqueue events_power_efficient: flags=0x80
[  342.437095][    C0]   pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=3/256 refcnt=4
[  342.437143][    C0]     pending: do_cache_clean, neigh_managed_work, neigh_periodic_work
[  342.437268][    C0] Showing backtraces of running workers in stalled CPU-bound worker pools:
[  360.200350][    C0] rcu: INFO: rcu_preempt detected stalls on CPUs/tasks:
[  360.200360][    C0] rcu: 	(detected by 0, t=26252 jiffies, g=20393, q=113135 ncpus=1)
[  360.200360][    C0] rcu: All QSes seen, last rcu_preempt kthread activity 26252 (4294981469-4294955217), jiffies_till_next_fqs=1, root ->qsmask 0x0
[  360.200360][    C0] rcu: rcu_preempt kthread starved for 26252 jiffies! g20393 f0x2 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=0
[  360.200360][    C0] rcu: 	Unless rcu_preempt kthread gets sufficient CPU time, OOM is now expected behavior.
[  360.200360][    C0] rcu: RCU grace-period kthread stack dump:
[  360.200360][    C0] task:rcu_preempt     state:R  running task     stack:0     pid:16    tgid:16    ppid:2      flags:0x00004000
[  360.200360][    C0] Call Trace:
[  360.200360][    C0]  <TASK>
[ 360.200360][ C0] __schedule (kernel/sched/core.c:5376 kernel/sched/core.c:6688) 
[ 360.200360][ C0] ? io_schedule_timeout (kernel/sched/core.c:6569) 
[ 360.200360][ C0] schedule (arch/x86/include/asm/preempt.h:85 kernel/sched/core.c:6764 kernel/sched/core.c:6778) 
[ 360.200360][ C0] schedule_timeout (kernel/time/timer.c:1628 include/linux/timer.h:199 kernel/time/timer.c:2168) 
[ 360.200360][ C0] ? usleep_range_state (kernel/time/timer.c:2129) 
[ 360.200360][ C0] ? enqueue_timer (kernel/time/timer.c:2091) 
[ 360.200360][ C0] ? prepare_to_swait_event (kernel/sched/swait.c:122 (discriminator 6)) 
[ 360.200360][ C0] rcu_gp_fqs_loop (kernel/rcu/tree.c:1626 (discriminator 13)) 
[ 360.200360][ C0] ? start_poll_synchronize_rcu_full (kernel/rcu/tree.c:1596) 
[ 360.200360][ C0] ? rcu_gp_init (kernel/rcu/tree.c:1521 (discriminator 23)) 
[ 360.200360][ C0] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4565) 
[ 360.200360][ C0] rcu_gp_kthread (kernel/rcu/tree.c:1828) 
[ 360.200360][ C0] ? rcu_gp_cleanup (kernel/rcu/tree.c:1800) 
[ 360.200360][ C0] ? __kthread_parkme (kernel/kthread.c:293 (discriminator 3)) 
[ 360.200360][ C0] ? rcu_gp_cleanup (kernel/rcu/tree.c:1800) 
[ 360.200360][ C0] kthread (kernel/kthread.c:388) 
[ 360.200360][ C0] ? kthread_complete_and_exit (kernel/kthread.c:341) 
[ 360.200360][ C0] ret_from_fork (arch/x86/kernel/process.c:153) 
[ 360.200360][ C0] ? kthread_complete_and_exit (kernel/kthread.c:341) 
[ 360.200360][ C0] ret_from_fork_asm (arch/x86/entry/entry_64.S:250) 
[  360.200360][    C0]  </TASK>
[  360.200360][    C0] rcu: Stack dump where RCU GP kthread last ran:
[  360.200360][    C0] CPU: 0 PID: 1 Comm: swapper Tainted: G                 N 6.6.0-12894-g68f563aa7e55 #1
[  360.200360][    C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[ 360.200360][ C0] RIP: 0010:xa_load (lib/xarray.c:1456) 
[ 360.200360][ C0] Code: e8 32 a7 5e fc 48 89 5c 24 28 48 89 6c 24 20 48 c7 44 24 30 00 00 00 00 48 c7 44 24 38 03 00 00 00 48 c7 44 24 40 00 00 00 00 <48> c7 44 24 48 00 00 00 00 48 c7 44 24 50 00 00 00 00 e8 a6 34 4f
All code
========
   0:	e8 32 a7 5e fc       	call   0xfffffffffc5ea737
   5:	48 89 5c 24 28       	mov    %rbx,0x28(%rsp)
   a:	48 89 6c 24 20       	mov    %rbp,0x20(%rsp)
   f:	48 c7 44 24 30 00 00 	movq   $0x0,0x30(%rsp)
  16:	00 00 
  18:	48 c7 44 24 38 03 00 	movq   $0x3,0x38(%rsp)
  1f:	00 00 
  21:	48 c7 44 24 40 00 00 	movq   $0x0,0x40(%rsp)
  28:	00 00 
  2a:*	48 c7 44 24 48 00 00 	movq   $0x0,0x48(%rsp)		<-- trapping instruction
  31:	00 00 
  33:	48 c7 44 24 50 00 00 	movq   $0x0,0x50(%rsp)
  3a:	00 00 
  3c:	e8                   	.byte 0xe8
  3d:	a6                   	cmpsb  %es:(%rdi),%ds:(%rsi)
  3e:	34 4f                	xor    $0x4f,%al

Code starting with the faulting instruction
===========================================
   0:	48 c7 44 24 48 00 00 	movq   $0x0,0x48(%rsp)
   7:	00 00 
   9:	48 c7 44 24 50 00 00 	movq   $0x0,0x50(%rsp)
  10:	00 00 
  12:	e8                   	.byte 0xe8
  13:	a6                   	cmpsb  %es:(%rdi),%ds:(%rsi)
  14:	34 4f                	xor    $0x4f,%al
[  360.200360][    C0] RSP: 0000:ffff888110a9fc90 EFLAGS: 00000246
[  360.200360][    C0] RAX: 0000000000000000 RBX: 00000000020f5ebf RCX: 0000000000000000
[  360.200360][    C0] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff87653c00
[  360.200360][    C0] RBP: ffffffff87653c00 R08: 0000000000000000 R09: 0000000000000000
[  360.200360][    C0] R10: 0000000000000000 R11: 0000000000000000 R12: 1ffff11022153f92
[  360.200360][    C0] R13: 0000000000035ebf R14: 0000000000000012 R15: 0000000002180000
[  360.200360][    C0] FS:  0000000000000000(0000) GS:ffffffff86b45000(0000) knlGS:0000000000000000
[  360.200360][    C0] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  360.200360][    C0] CR2: ffff88843ffff000 CR3: 0000000006b1a000 CR4: 00000000000406b0
[  360.200360][    C0] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  360.200360][    C0] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  360.200360][    C0] Call Trace:
[  360.200360][    C0]  <IRQ>
[ 360.200360][ C0] ? show_regs (arch/x86/kernel/dumpstack.c:479) 
[ 360.200360][ C0] ? rcu_check_gp_kthread_starvation (kernel/rcu/tree_stall.h:553 (discriminator 1)) 
[ 360.200360][ C0] ? print_other_cpu_stall (kernel/rcu/tree_stall.h:143 (discriminator 3) kernel/rcu/tree_stall.h:658 (discriminator 3)) 
[ 360.200360][ C0] ? atomic_notifier_call_chain (include/linux/rcupdate.h:306 include/linux/rcupdate.h:780 kernel/notifier.c:232) 
[ 360.200360][ C0] ? check_cpu_stall+0x361/0x4c0 
[ 360.200360][ C0] ? rcu_sched_clock_irq (kernel/rcu/tree.c:3884 kernel/rcu/tree.c:2254) 
[ 360.200360][ C0] ? rcu_note_context_switch (kernel/rcu/tree.c:2233) 
[ 360.200360][ C0] ? rcu_read_lock_sched_held (include/linux/lockdep.h:288 kernel/rcu/update.c:126 kernel/rcu/update.c:120) 
[ 360.200360][ C0] ? update_process_times (kernel/time/timer.c:2073) 
[ 360.200360][ C0] ? msleep_interruptible (kernel/time/timer.c:2065) 
[ 360.200360][ C0] ? tick_do_update_jiffies64 (kernel/time/tick-sched.c:150) 
[ 360.200360][ C0] ? tick_nohz_highres_handler (kernel/time/tick-sched.c:256 kernel/time/tick-sched.c:1516) 
[ 360.200360][ C0] ? __hrtimer_run_queues+0x54e/0x930 
[ 360.200360][ C0] ? tick_do_update_jiffies64 (kernel/time/tick-sched.c:1503) 
[ 360.200360][ C0] ? hrtimer_init (kernel/time/hrtimer.c:1720) 
[ 360.200360][ C0] ? hrtimer_interrupt (kernel/time/hrtimer.c:1817) 
[ 360.200360][ C0] ? __sysvec_apic_timer_interrupt (arch/x86/include/asm/atomic.h:23 include/linux/atomic/atomic-arch-fallback.h:457 include/linux/jump_label.h:260 include/linux/jump_label.h:270 arch/x86/include/asm/trace/irq_vectors.h:41 arch/x86/kernel/apic/apic.c:1083) 
[ 360.200360][ C0] ? sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1076 (discriminator 14)) 
[  360.200360][    C0]  </IRQ>
[  360.200360][    C0]  <TASK>
[ 360.200360][ C0] ? asm_sysvec_apic_timer_interrupt (arch/x86/include/asm/idtentry.h:645) 
[ 360.200360][ C0] ? xa_load (lib/xarray.c:1456) 
[ 360.200360][ C0] ? xa_clear_mark (lib/xarray.c:1455) 
[ 360.200360][ C0] ? __sanitizer_cov_trace_pc (kernel/kcov.c:203) 
[ 360.200360][ C0] check_xa_multi_store_adv+0x47c/0x650 
[ 360.200360][ C0] check_multi_store_advanced+0x44/0x90 
[ 360.200360][ C0] ? check_xas_retry+0xee0/0xee0 
[ 360.200360][ C0] xarray_checks (lib/test_xarray.c:1941) 
[ 360.200360][ C0] do_one_initcall (arch/x86/include/asm/atomic.h:23 include/linux/atomic/atomic-arch-fallback.h:457 include/linux/jump_label.h:260 include/linux/jump_label.h:270 include/trace/events/initcall.h:48 init/main.c:1237) 
[ 360.200360][ C0] ? trace_event_raw_event_initcall_level (init/main.c:1227) 
[ 360.200360][ C0] do_initcalls (init/main.c:1297 init/main.c:1314) 
[ 360.200360][ C0] kernel_init_freeable (init/main.c:1553) 
[ 360.200360][ C0] ? rest_init (init/main.c:1433) 
[ 360.200360][ C0] kernel_init (init/main.c:1443) 
[ 360.200360][ C0] ? rest_init (init/main.c:1433) 
[ 360.200360][ C0] ret_from_fork (arch/x86/kernel/process.c:153) 
[ 360.200360][ C0] ? rest_init (init/main.c:1433) 
[ 360.200360][ C0] ret_from_fork_asm (arch/x86/entry/entry_64.S:250) 
[  360.200360][    C0]  </TASK>


The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20231115/202311152254.610174ff-oliver.sang@intel.com
  
Luis Chamberlain Nov. 17, 2023, 8:54 p.m. UTC | #2
On Wed, Nov 15, 2023 at 11:02:59PM +0800, kernel test robot wrote:
> [  304.121213][    T1] WARNING: suspicious RCU usage
> [  304.122111][    T1] 6.6.0-12894-g68f563aa7e55 #1 Tainted: G                 N
> [  304.123404][    T1] -----------------------------
> [  304.124297][    T1] include/linux/xarray.h:1200 suspicious rcu_dereference_check() usage!
> [  304.125787][    T1]
> [  304.125787][    T1] other info that might help us debug this:
> [  304.125787][    T1]
> [  304.127648][    T1]
> [  304.127648][    T1] rcu_scheduler_active = 2, debug_locks = 1
> [  304.129454][    T1] no locks held by swapper/1.
> [  304.130560][    T1]
> [  304.130560][    T1] stack backtrace:
> [  304.131863][    T1] CPU: 0 PID: 1 Comm: swapper Tainted: G                 N 6.6.0-12894-g68f563aa7e55 #1
> [  304.132791][    T1] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
> [  304.132791][    T1] Call Trace:
> [  304.132791][    T1]  <TASK>
> [ 304.132791][ T1] dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) 
> [ 304.132791][ T1] lockdep_rcu_suspicious (include/linux/context_tracking.h:153 kernel/locking/lockdep.c:6712) 
> [ 304.132791][ T1] xas_start (include/linux/xarray.h:1200 include/linux/xarray.h:1198 lib/xarray.c:190) 
> [ 304.132791][ T1] xas_load (lib/xarray.c:237) 
> [ 304.132791][ T1] xas_store (lib/xarray.c:789) 
> [ 304.132791][ T1] ? xa_load (include/linux/rcupdate.h:306 include/linux/rcupdate.h:780 lib/xarray.c:1465) 
> [ 304.132791][ T1] check_xa_multi_store_adv_delete+0xf0/0x120 

Yup... I forgot to lock on deletion. I also decided to polish this up
some more and also address some soft lockups which can happen on low
end test machines when we test for order 20, which has 1<<20 entries
(1,048,576). The patch below fixes all this. I'll spin this series up
and send a v2.

diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 3c19d12c1bf5..ac0e887ccbd6 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -32,6 +32,16 @@ void xa_dump(const struct xarray *xa) { }
 } while (0)
 #endif
 
+/*
+ * Can be used in contexts which busy loop on large number of entries but can
+ * sleep and timing is if no importance to test correctness.
+ */
+#define XA_BUG_ON_RELAX(xa, x) do {				\
+	if ((tests_run % 1000) == 0)				\
+		schedule();					\
+	XA_BUG_ON(xa, x);					\
+} while (0)
+
 static void *xa_mk_index(unsigned long index)
 {
 	return xa_mk_value(index & LONG_MAX);
@@ -728,12 +738,17 @@ static noinline void check_multi_store(struct xarray *xa)
 }
 
 #ifdef CONFIG_XARRAY_MULTI
+/* mimics page cache __filemap_add_folio() */
 static noinline void check_xa_multi_store_adv_add(struct xarray *xa,
 						  unsigned long index,
 						  unsigned int order,
 						  void *p)
 {
 	XA_STATE(xas, xa, index);
+	unsigned int nrpages = 1UL << order;
+
+	/* users are responsible for index alignemnt to the order when adding */
+	XA_BUG_ON(xa, index & (nrpages - 1));
 
 	xas_set_order(&xas, index, order);
 
@@ -750,23 +765,48 @@ static noinline void check_xa_multi_store_adv_add(struct xarray *xa,
 	XA_BUG_ON(xa, xas_error(&xas));
 }
 
+/* mimics page_cache_delete() */
+static noinline void check_xa_multi_store_adv_del_entry(struct xarray *xa,
+						        unsigned long index,
+							unsigned int order)
+{
+	XA_STATE(xas, xa, index);
+
+	xas_set_order(&xas, index, order);
+	xas_store(&xas, NULL);
+	xas_init_marks(&xas);
+}
+
 static noinline void check_xa_multi_store_adv_delete(struct xarray *xa,
 						     unsigned long index,
 						     unsigned int order)
 {
-	unsigned int nrpages = 1UL << order;
-	unsigned long base = round_down(index, nrpages);
-	XA_STATE(xas, xa, base);
+	xa_lock_irq(xa);
+	check_xa_multi_store_adv_del_entry(xa, index, order);
+	xa_unlock_irq(xa);
+}
 
-	xas_set_order(&xas, base, order);
-	xas_store(&xas, NULL);
-	xas_init_marks(&xas);
+/* mimics page cache filemap_get_entry() */
+static noinline void *test_get_entry(struct xarray *xa, unsigned long index)
+{
+	XA_STATE(xas, xa, index);
+	void *p;
+
+	rcu_read_lock();
+repeat:
+	xas_reset(&xas);
+	p = xas_load(&xas);
+	if (xas_retry(&xas, p))
+		goto repeat;
+	rcu_read_unlock();
+
+	return p;
 }
 
 static unsigned long some_val = 0xdeadbeef;
 static unsigned long some_val_2 = 0xdeaddead;
 
-/* mimics the page cache */
+/* mimics the page cache usage */
 static noinline void check_xa_multi_store_adv(struct xarray *xa,
 					      unsigned long pos,
 					      unsigned int order)
@@ -783,13 +823,13 @@ static noinline void check_xa_multi_store_adv(struct xarray *xa,
 	check_xa_multi_store_adv_add(xa, base, order, &some_val);
 
 	for (i = 0; i < nrpages; i++)
-		XA_BUG_ON(xa, xa_load(xa, base + i) != &some_val);
+		XA_BUG_ON_RELAX(xa, test_get_entry(xa, base + i) != &some_val);
 
-	XA_BUG_ON(xa, xa_load(xa, next_index) != NULL);
+	XA_BUG_ON(xa, test_get_entry(xa, next_index) != NULL);
 
 	/* Use order 0 for the next item */
 	check_xa_multi_store_adv_add(xa, next_index, 0, &some_val_2);
-	XA_BUG_ON(xa, xa_load(xa, next_index) != &some_val_2);
+	XA_BUG_ON(xa, test_get_entry(xa, next_index) != &some_val_2);
 
 	/* Remove the next item */
 	check_xa_multi_store_adv_delete(xa, next_index, 0);
@@ -798,7 +838,8 @@ static noinline void check_xa_multi_store_adv(struct xarray *xa,
 	check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2);
 
 	for (i = 0; i < nrpages; i++)
-		XA_BUG_ON(xa, xa_load(xa, next_index + i) != &some_val_2);
+		XA_BUG_ON_RELAX(xa,
+			test_get_entry(xa, next_index + i) != &some_val_2);
 
 	check_xa_multi_store_adv_delete(xa, next_index, order);
 	check_xa_multi_store_adv_delete(xa, base, order);
@@ -812,13 +853,15 @@ static noinline void check_xa_multi_store_adv(struct xarray *xa,
 	check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2);
 
 	for (i = 0; i < nrpages; i++)
-		XA_BUG_ON(xa, xa_load(xa, base + i) != NULL);
+		XA_BUG_ON_RELAX(xa, test_get_entry(xa, base + i) != NULL);
 
 	for (i = 0; i < nrpages; i++)
-		XA_BUG_ON(xa, xa_load(xa, next_index + i) != &some_val_2);
+		XA_BUG_ON_RELAX(xa,
+			test_get_entry(xa, next_index + i) != &some_val_2);
 
 	for (i = 0; i < nrpages; i++)
-		XA_BUG_ON(xa, xa_load(xa, next_next_index + i) != NULL);
+		XA_BUG_ON_RELAX(xa,
+			test_get_entry(xa, next_next_index + i) != NULL);
 
 	check_xa_multi_store_adv_delete(xa, next_index, order);
 	XA_BUG_ON(xa, !xa_empty(xa));
@@ -828,13 +871,15 @@ static noinline void check_xa_multi_store_adv(struct xarray *xa,
 	check_xa_multi_store_adv_add(xa, next_next_index, order, &some_val_2);
 
 	for (i = 0; i < nrpages; i++)
-		XA_BUG_ON(xa, xa_load(xa, base + i) != NULL);
+		XA_BUG_ON_RELAX(xa,
+			test_get_entry(xa, base + i) != NULL);
 
 	for (i = 0; i < nrpages; i++)
-		XA_BUG_ON(xa, xa_load(xa, next_index + i) != NULL);
+		XA_BUG_ON_RELAX(xa, test_get_entry(xa, next_index + i) != NULL);
 
 	for (i = 0; i < nrpages; i++)
-		XA_BUG_ON(xa, xa_load(xa, next_next_index + i) != &some_val_2);
+		XA_BUG_ON_RELAX(xa,
+			test_get_entry(xa, next_next_index + i) != &some_val_2);
 
 	check_xa_multi_store_adv_delete(xa, next_next_index, order);
 	XA_BUG_ON(xa, !xa_empty(xa));
  
Matthew Wilcox Nov. 17, 2023, 8:58 p.m. UTC | #3
On Fri, Nov 17, 2023 at 12:54:09PM -0800, Luis Chamberlain wrote:
> +/*
> + * Can be used in contexts which busy loop on large number of entries but can
> + * sleep and timing is if no importance to test correctness.
> + */
> +#define XA_BUG_ON_RELAX(xa, x) do {				\
> +	if ((tests_run % 1000) == 0)				\
> +		schedule();					\
> +	XA_BUG_ON(xa, x);					\
> +} while (0)

That is awful.  Please don't do that.  You're mixing two completely
unrelated thing into the same macro, which makes no sense.  Not only
that, it's a macro which refers to something in the containing
environment that isn't a paramter to the macro.
  
Luis Chamberlain Nov. 17, 2023, 9:01 p.m. UTC | #4
On Fri, Nov 17, 2023 at 08:58:05PM +0000, Matthew Wilcox wrote:
> On Fri, Nov 17, 2023 at 12:54:09PM -0800, Luis Chamberlain wrote:
> > +/*
> > + * Can be used in contexts which busy loop on large number of entries but can
> > + * sleep and timing is if no importance to test correctness.
> > + */
> > +#define XA_BUG_ON_RELAX(xa, x) do {				\
> > +	if ((tests_run % 1000) == 0)				\
> > +		schedule();					\
> > +	XA_BUG_ON(xa, x);					\
> > +} while (0)
> 
> That is awful.  Please don't do that.  You're mixing two completely
> unrelated thing into the same macro, which makes no sense.  Not only
> that, it's a macro which refers to something in the containing
> environment that isn't a paramter to the macro.

I figured you'd puke. Would you prefer I just open code the check on the loop
though? I'm sure another alternative is we *not care* about these
overloaded systems running the test. What would you prefer?

  Luis
  
Luis Chamberlain Jan. 26, 2024, 7:12 p.m. UTC | #5
On Fri, Nov 17, 2023 at 01:01:18PM -0800, Luis Chamberlain wrote:
> On Fri, Nov 17, 2023 at 08:58:05PM +0000, Matthew Wilcox wrote:
> > On Fri, Nov 17, 2023 at 12:54:09PM -0800, Luis Chamberlain wrote:
> > > +/*
> > > + * Can be used in contexts which busy loop on large number of entries but can
> > > + * sleep and timing is if no importance to test correctness.
> > > + */
> > > +#define XA_BUG_ON_RELAX(xa, x) do {				\
> > > +	if ((tests_run % 1000) == 0)				\
> > > +		schedule();					\
> > > +	XA_BUG_ON(xa, x);					\
> > > +} while (0)
> > 
> > That is awful.  Please don't do that.  You're mixing two completely
> > unrelated thing into the same macro, which makes no sense.  Not only
> > that, it's a macro which refers to something in the containing
> > environment that isn't a paramter to the macro.
> 
> I figured you'd puke. Would you prefer I just open code the check on the loop
> though? I'm sure another alternative is we *not care* about these
> overloaded systems running the test. What would you prefer?

OK without any particular preferences outlined this is what I have,
splitting the two contexts and making the busy loop fix clearer.

+#define XA_BUSY_LOOP_RELAX(xa, x) do {                         \
+       if ((i % 1000) == 0)                                    \
+               schedule();                                     \
+} while (0)
+
+/*
+ * Can be used in contexts which busy loop on large number of entries but can
+ * sleep and timing is if no importance to test correctness.
+ */
+#define XA_BUG_ON_RELAX(i, xa, x) do {                         \
+       XA_BUSY_LOOP_RELAX(i);                                  \
+       XA_BUG_ON(xa, x);                                       \
+} while (0)

Thoughts?

  Luis
  
Matthew Wilcox Jan. 26, 2024, 7:26 p.m. UTC | #6
On Fri, Jan 26, 2024 at 11:12:03AM -0800, Luis Chamberlain wrote:
> On Fri, Nov 17, 2023 at 01:01:18PM -0800, Luis Chamberlain wrote:
> > On Fri, Nov 17, 2023 at 08:58:05PM +0000, Matthew Wilcox wrote:
> > > On Fri, Nov 17, 2023 at 12:54:09PM -0800, Luis Chamberlain wrote:
> > > > +/*
> > > > + * Can be used in contexts which busy loop on large number of entries but can
> > > > + * sleep and timing is if no importance to test correctness.
> > > > + */
> > > > +#define XA_BUG_ON_RELAX(xa, x) do {				\
> > > > +	if ((tests_run % 1000) == 0)				\
> > > > +		schedule();					\
> > > > +	XA_BUG_ON(xa, x);					\
> > > > +} while (0)
> > > 
> > > That is awful.  Please don't do that.  You're mixing two completely
> > > unrelated thing into the same macro, which makes no sense.  Not only
> > > that, it's a macro which refers to something in the containing
> > > environment that isn't a paramter to the macro.
> > 
> > I figured you'd puke. Would you prefer I just open code the check on the loop
> > though? I'm sure another alternative is we *not care* about these
> > overloaded systems running the test. What would you prefer?
> 
> OK without any particular preferences outlined this is what I have,
> splitting the two contexts and making the busy loop fix clearer.
> 
> +#define XA_BUSY_LOOP_RELAX(xa, x) do {                         \
> +       if ((i % 1000) == 0)                                    \
> +               schedule();                                     \
> +} while (0)
> +
> +/*
> + * Can be used in contexts which busy loop on large number of entries but can
> + * sleep and timing is if no importance to test correctness.
> + */
> +#define XA_BUG_ON_RELAX(i, xa, x) do {                         \
> +       XA_BUSY_LOOP_RELAX(i);                                  \
> +       XA_BUG_ON(xa, x);                                       \
> +} while (0)

No.  XA_BUG_ON_RELAX is not OK.  Really.

We have a perfectly good system for "relaxing":

        xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
                xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
                if (++tagged % XA_CHECK_SCHED)
                        continue;

                xas_pause(&xas);
                xas_unlock_irq(&xas);
                cond_resched();
                xas_lock_irq(&xas);
        }
  
Luis Chamberlain Jan. 26, 2024, 8:04 p.m. UTC | #7
On Fri, Jan 26, 2024 at 07:26:17PM +0000, Matthew Wilcox wrote:
> On Fri, Jan 26, 2024 at 11:12:03AM -0800, Luis Chamberlain wrote:
> > On Fri, Nov 17, 2023 at 01:01:18PM -0800, Luis Chamberlain wrote:
> > > On Fri, Nov 17, 2023 at 08:58:05PM +0000, Matthew Wilcox wrote:
> > > > On Fri, Nov 17, 2023 at 12:54:09PM -0800, Luis Chamberlain wrote:
> > > > > +/*
> > > > > + * Can be used in contexts which busy loop on large number of entries but can
> > > > > + * sleep and timing is if no importance to test correctness.
> > > > > + */
> > > > > +#define XA_BUG_ON_RELAX(xa, x) do {				\
> > > > > +	if ((tests_run % 1000) == 0)				\
> > > > > +		schedule();					\
> > > > > +	XA_BUG_ON(xa, x);					\
> > > > > +} while (0)
> > > > 
> > > > That is awful.  Please don't do that.  You're mixing two completely
> > > > unrelated thing into the same macro, which makes no sense.  Not only
> > > > that, it's a macro which refers to something in the containing
> > > > environment that isn't a paramter to the macro.
> > > 
> > > I figured you'd puke. Would you prefer I just open code the check on the loop
> > > though? I'm sure another alternative is we *not care* about these
> > > overloaded systems running the test. What would you prefer?
> > 
> > OK without any particular preferences outlined this is what I have,
> > splitting the two contexts and making the busy loop fix clearer.
> > 
> > +#define XA_BUSY_LOOP_RELAX(xa, x) do {                         \
> > +       if ((i % 1000) == 0)                                    \
> > +               schedule();                                     \
> > +} while (0)
> > +
> > +/*
> > + * Can be used in contexts which busy loop on large number of entries but can
> > + * sleep and timing is if no importance to test correctness.
> > + */
> > +#define XA_BUG_ON_RELAX(i, xa, x) do {                         \
> > +       XA_BUSY_LOOP_RELAX(i);                                  \
> > +       XA_BUG_ON(xa, x);                                       \
> > +} while (0)
> 
> No.  XA_BUG_ON_RELAX is not OK.  Really.
> 
> We have a perfectly good system for "relaxing":
> 
>         xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
>                 xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
>                 if (++tagged % XA_CHECK_SCHED)
>                         continue;
> 
>                 xas_pause(&xas);
>                 xas_unlock_irq(&xas);
>                 cond_resched();
>                 xas_lock_irq(&xas);
>         }

And yet we can get a soft lockup with order 20 (1,048,576 entries),
granted busy looping over 1 million entries is insane, but it seems it
the existing code may not be enough to avoid the soft lockup. Also
cond_resched() may be eventually removed [0].

Anyway, we can ignore the soft lockup then.

[0] https://lwn.net/Articles/950581/

  Luis
  
Matthew Wilcox Jan. 26, 2024, 8:32 p.m. UTC | #8
On Fri, Jan 26, 2024 at 12:04:44PM -0800, Luis Chamberlain wrote:
> > We have a perfectly good system for "relaxing":
> > 
> >         xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
> >                 xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
> >                 if (++tagged % XA_CHECK_SCHED)
> >                         continue;
> > 
> >                 xas_pause(&xas);
> >                 xas_unlock_irq(&xas);
> >                 cond_resched();
> >                 xas_lock_irq(&xas);
> >         }
> 
> And yet we can get a soft lockup with order 20 (1,048,576 entries),
> granted busy looping over 1 million entries is insane, but it seems it
> the existing code may not be enough to avoid the soft lockup. Also
> cond_resched() may be eventually removed [0].

what?  you're in charge of when you sleep.  you can do this:

unsigned i = 0;
rcu_read_lock();
xas_for_each(...) {
	...
	if (iter++ % XA_CHECK_SCHED)
		continue;
	xas_pause();
	rcu_read_unlock();
	rcu_read_lock();
}
rcu_read_unlock();

and that will get rid of the rcu warnings.  right?
  
Luis Chamberlain Jan. 31, 2024, 9:58 p.m. UTC | #9
On Fri, Jan 26, 2024 at 08:32:28PM +0000, Matthew Wilcox wrote:
> On Fri, Jan 26, 2024 at 12:04:44PM -0800, Luis Chamberlain wrote:
> > > We have a perfectly good system for "relaxing":
> > > 
> > >         xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
> > >                 xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
> > >                 if (++tagged % XA_CHECK_SCHED)
> > >                         continue;
> > > 
> > >                 xas_pause(&xas);
> > >                 xas_unlock_irq(&xas);
> > >                 cond_resched();
> > >                 xas_lock_irq(&xas);
> > >         }
> > 
> > And yet we can get a soft lockup with order 20 (1,048,576 entries),
> > granted busy looping over 1 million entries is insane, but it seems it
> > the existing code may not be enough to avoid the soft lockup. Also
> > cond_resched() may be eventually removed [0].
> 
> what?  you're in charge of when you sleep.  you can do this:
> 
> unsigned i = 0;
> rcu_read_lock();
> xas_for_each(...) {
> 	...
> 	if (iter++ % XA_CHECK_SCHED)
> 		continue;
> 	xas_pause();
> 	rcu_read_unlock();
> 	rcu_read_lock();
> }
> rcu_read_unlock();
> 
> and that will get rid of the rcu warnings.  right?

The RCU warning was due to my getting an try call missing an RCU lock,
I fixed that. The pending issue was a soft lockup that I get on low end systems
testing test_xarray with higher order but after testing on a 2 vcpus
with only 2 GiB of RAM I cannot reproduce so we can address this later.
I forget the exact type of low end system I tested this on... but anyway
I can't reproduce now. I suspect it may have been similar to the issue
0-day had found long ago and you noted an overloaded system [0]

[0] https://lore.kernel.org/all/20190306120843.GI13380@bombadil.infradead.org/

  Luis
  
Luis Chamberlain Feb. 15, 2024, 2:15 a.m. UTC | #10
On Fri, Jan 26, 2024 at 08:32:28PM +0000, Matthew Wilcox wrote:
> On Fri, Jan 26, 2024 at 12:04:44PM -0800, Luis Chamberlain wrote:
> > > We have a perfectly good system for "relaxing":
> > > 
> > >         xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
> > >                 xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
> > >                 if (++tagged % XA_CHECK_SCHED)
> > >                         continue;
> > > 
> > >                 xas_pause(&xas);
> > >                 xas_unlock_irq(&xas);
> > >                 cond_resched();
> > >                 xas_lock_irq(&xas);
> > >         }
> > 
> > And yet we can get a soft lockup with order 20 (1,048,576 entries),
> > granted busy looping over 1 million entries is insane, but it seems it
> > the existing code may not be enough to avoid the soft lockup. Also
> > cond_resched() may be eventually removed [0].
> 
> what?  you're in charge of when you sleep.  you can do this:
> 
> unsigned i = 0;
> rcu_read_lock();
> xas_for_each(...) {
> 	...
> 	if (iter++ % XA_CHECK_SCHED)
> 		continue;
> 	xas_pause();
> 	rcu_read_unlock();
> 	rcu_read_lock();
> }
> rcu_read_unlock();
> 
> and that will get rid of the rcu warnings.  right?

The RCU splat is long gone on my last iteration merged now on
linux-next, what's left is just a soft lockup over 22 seconds when you
enable disable preemption and enable RCU prooving and use 2 vcpus. This
could happen for instance if we loop over test_get_entry() and don't
want to use xas_for_each() API, in this case we don't as part of the
selftest is to not trust the xarray API and test it.

So in the simplest case for instance, this is used:

check_xa_multi_store_adv_add(xa, base, order, &some_val);               
                                                                                
for (i = 0; i < nrpages; i++)                                           
	XA_BUG_ON(xa, test_get_entry(xa, base + i) != &some_val);  

test_get_entry() will do the RCU locking for us. So while I agree that
if you are using the xarray API using xas_for_each*() is best, we want
to not trust the xarray API and prove it. So what do you think about
something like this, as it does fix the soft lockup.

diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index d4e55b4867dc..ac162025cc59 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -781,6 +781,7 @@ static noinline void *test_get_entry(struct xarray *xa, unsigned long index)
 {
 	XA_STATE(xas, xa, index);
 	void *p;
+	static unsigned int i = 0;
 
 	rcu_read_lock();
 repeat:
@@ -790,6 +791,17 @@ static noinline void *test_get_entry(struct xarray *xa, unsigned long index)
 		goto repeat;
 	rcu_read_unlock();
 
+	/*
+	 * This is not part of the page cache, this selftest is pretty
+	 * aggressive and does not want to trust the xarray API but rather
+	 * test it, and for order 20 (4 GiB block size) we can loop over
+	 * over a million entries which can cause a soft lockup. Page cache
+	 * APIs won't be stupid, proper page cache APIs loop over the proper
+	 * order so when using a larger order we skip shared entries.
+	 */
+	if (++i % XA_CHECK_SCHED == 0)
+		schedule();
+
 	return p;
 }
  

Patch

diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index e77d4856442c..0572a3ec2cf8 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -674,6 +674,139 @@  static noinline void check_multi_store(struct xarray *xa)
 #endif
 }
 
+#ifdef CONFIG_XARRAY_MULTI
+static noinline void check_xa_multi_store_adv_add(struct xarray *xa,
+						  unsigned long index,
+						  unsigned int order,
+						  void *p)
+{
+	XA_STATE(xas, xa, index);
+
+	xas_set_order(&xas, index, order);
+
+	do {
+		xas_lock_irq(&xas);
+
+		xas_store(&xas, p);
+		XA_BUG_ON(xa, xas_error(&xas));
+		XA_BUG_ON(xa, xa_load(xa, index) != p);
+
+		xas_unlock_irq(&xas);
+	} while (xas_nomem(&xas, GFP_KERNEL));
+
+	XA_BUG_ON(xa, xas_error(&xas));
+}
+
+static noinline void check_xa_multi_store_adv_delete(struct xarray *xa,
+						     unsigned long index,
+						     unsigned int order)
+{
+	unsigned int nrpages = 1UL << order;
+	unsigned long base = round_down(index, nrpages);
+	XA_STATE(xas, xa, base);
+
+	xas_set_order(&xas, base, order);
+	xas_store(&xas, NULL);
+	xas_init_marks(&xas);
+}
+
+static unsigned long some_val = 0xdeadbeef;
+static unsigned long some_val_2 = 0xdeaddead;
+
+/* mimics the page cache */
+static noinline void check_xa_multi_store_adv(struct xarray *xa,
+					      unsigned long pos,
+					      unsigned int order)
+{
+	unsigned int nrpages = 1UL << order;
+	unsigned long index, base, next_index, next_next_index;
+	unsigned int i;
+
+	index = pos >> PAGE_SHIFT;
+	base = round_down(index, nrpages);
+	next_index = round_down(base + nrpages, nrpages);
+	next_next_index = round_down(next_index + nrpages, nrpages);
+
+	check_xa_multi_store_adv_add(xa, base, order, &some_val);
+
+	for (i = 0; i < nrpages; i++)
+		XA_BUG_ON(xa, xa_load(xa, base + i) != &some_val);
+
+	XA_BUG_ON(xa, xa_load(xa, next_index) != NULL);
+
+	/* Use order 0 for the next item */
+	check_xa_multi_store_adv_add(xa, next_index, 0, &some_val_2);
+	XA_BUG_ON(xa, xa_load(xa, next_index) != &some_val_2);
+
+	/* Remove the next item */
+	check_xa_multi_store_adv_delete(xa, next_index, 0);
+
+	/* Now use order for a new pointer */
+	check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2);
+
+	for (i = 0; i < nrpages; i++)
+		XA_BUG_ON(xa, xa_load(xa, next_index + i) != &some_val_2);
+
+	check_xa_multi_store_adv_delete(xa, next_index, order);
+	check_xa_multi_store_adv_delete(xa, base, order);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* starting fresh again */
+
+	/* let's test some holes now */
+
+	/* hole at base and next_next */
+	check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2);
+
+	for (i = 0; i < nrpages; i++)
+		XA_BUG_ON(xa, xa_load(xa, base + i) != NULL);
+
+	for (i = 0; i < nrpages; i++)
+		XA_BUG_ON(xa, xa_load(xa, next_index + i) != &some_val_2);
+
+	for (i = 0; i < nrpages; i++)
+		XA_BUG_ON(xa, xa_load(xa, next_next_index + i) != NULL);
+
+	check_xa_multi_store_adv_delete(xa, next_index, order);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* hole at base and next */
+
+	check_xa_multi_store_adv_add(xa, next_next_index, order, &some_val_2);
+
+	for (i = 0; i < nrpages; i++)
+		XA_BUG_ON(xa, xa_load(xa, base + i) != NULL);
+
+	for (i = 0; i < nrpages; i++)
+		XA_BUG_ON(xa, xa_load(xa, next_index + i) != NULL);
+
+	for (i = 0; i < nrpages; i++)
+		XA_BUG_ON(xa, xa_load(xa, next_next_index + i) != &some_val_2);
+
+	check_xa_multi_store_adv_delete(xa, next_next_index, order);
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+#endif
+
+static noinline void check_multi_store_advanced(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
+	unsigned long end = ULONG_MAX/2;
+	unsigned long pos, i;
+
+	/*
+	 * About 117 million tests below.
+	 */
+	for (pos = 7; pos < end; pos = (pos * pos) + 564) {
+		for (i = 0; i < max_order; i++) {
+			check_xa_multi_store_adv(xa, pos, i);
+			check_xa_multi_store_adv(xa, pos + 157, i);
+		}
+	}
+#endif
+}
+
 static noinline void check_xa_alloc_1(struct xarray *xa, unsigned int base)
 {
 	int i;
@@ -1804,6 +1937,7 @@  static int xarray_checks(void)
 	check_reserve(&array);
 	check_reserve(&xa0);
 	check_multi_store(&array);
+	check_multi_store_advanced(&array);
 	check_get_order(&array);
 	check_xa_alloc();
 	check_find(&array);