nfc: llcp: Fix race in handling llcp_devices

Message ID 20221129094436.3975668-1-bobo.shaobowang@huawei.com
State New
Headers
Series nfc: llcp: Fix race in handling llcp_devices |

Commit Message

Wangshaobo (bobo) Nov. 29, 2022, 9:44 a.m. UTC
  There are multiple path operate llcp_devices list without protection:

         CPU0                        CPU1

nfc_unregister_device()        nfc_register_device()
 nfc_llcp_unregister_device()    nfc_llcp_register_device() //no lock
    ...                            list_add(local->list, llcp_devices)
    local_release()
      list_del(local->list)

        CPU2
...
 nfc_llcp_find_local()
   list_for_each_entry(,&llcp_devices,)

So reach race condition if two of the three occur simultaneously like
following crash report, although there is no reproduction script in
syzbot currently, our artificially constructed use cases can also
reproduce it:

list_del corruption. prev->next should be ffff888060ce7000, but was ffff88802a0ad000. (prev=ffffffff8e536240)
------------[ cut here ]------------
kernel BUG at lib/list_debug.c:59!
invalid opcode: 0000 [#1] PREEMPT SMP KASAN
CPU: 0 PID: 16622 Comm: syz-executor.5 Not tainted 6.1.0-rc6-next-20221125-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022
RIP: 0010:__list_del_entry_valid.cold+0x12/0x72 lib/list_debug.c:59
Code: f0 ff 0f 0b 48 89 f1 48 c7 c7 60 96 a6 8a 4c 89 e6 e8 4b 29 f0 ff 0f 0b 4c 89 e1 48 89 ee 48 c7 c7 c0 98 a6 8a e8 37 29 f0 ff <0f> 0b 48 89 ee 48 c7 c7 a0 97 a6 8a e8 26 29 f0 ff 0f 0b 4c 89 e2
RSP: 0018:ffffc900151afd58 EFLAGS: 00010282
RAX: 000000000000006d RBX: 0000000000000001 RCX: 0000000000000000
RDX: ffff88801e7eba80 RSI: ffffffff8166001c RDI: fffff52002a35f9d
RBP: ffff888060ce7000 R08: 000000000000006d R09: 0000000000000000
R10: 0000000080000000 R11: 0000000000000000 R12: ffffffff8e536240
R13: ffff88801f3f3000 R14: ffff888060ce1000 R15: ffff888079d855f0
FS:  0000555556f57400(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f095d5ad988 CR3: 000000002155a000 CR4: 00000000003506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <TASK>
 __list_del_entry include/linux/list.h:134 [inline]
 list_del include/linux/list.h:148 [inline]
 local_release net/nfc/llcp_core.c:171 [inline]
 kref_put include/linux/kref.h:65 [inline]
 nfc_llcp_local_put net/nfc/llcp_core.c:181 [inline]
 nfc_llcp_local_put net/nfc/llcp_core.c:176 [inline]
 nfc_llcp_unregister_device+0xb8/0x260 net/nfc/llcp_core.c:1619
 nfc_unregister_device+0x196/0x330 net/nfc/core.c:1179
 virtual_ncidev_close+0x52/0xb0 drivers/nfc/virtual_ncidev.c:163
 __fput+0x27c/0xa90 fs/file_table.c:320
 task_work_run+0x16f/0x270 kernel/task_work.c:179
 resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
 exit_to_user_mode_loop kernel/entry/common.c:171 [inline]
 exit_to_user_mode_prepare+0x23c/0x250 kernel/entry/common.c:203
 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline]
 syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:296
 do_syscall_64+0x46/0xb0 arch/x86/entry/common.c:86
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

This patch add specific mutex lock llcp_devices_list_lock to ensure
handling llcp_devices list safety.

Fixes: 30cc4587659e ("NFC: Move LLCP code to the NFC top level diirectory")
Reported-by: syzbot+81232c4a81a886e2b580@syzkaller.appspotmail.com
Signed-off-by: Wang ShaoBo <bobo.shaobowang@huawei.com>
---
 net/nfc/llcp_core.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)
  

Comments

Paolo Abeni Dec. 1, 2022, 11:25 a.m. UTC | #1
On Tue, 2022-11-29 at 17:44 +0800, Wang ShaoBo wrote:
> There are multiple path operate llcp_devices list without protection:
> 
>          CPU0                        CPU1
> 
> nfc_unregister_device()        nfc_register_device()
>  nfc_llcp_unregister_device()    nfc_llcp_register_device() //no lock
>     ...                            list_add(local->list, llcp_devices)
>     local_release()
>       list_del(local->list)
> 
>         CPU2
> ...
>  nfc_llcp_find_local()
>    list_for_each_entry(,&llcp_devices,)
> 
> So reach race condition if two of the three occur simultaneously like
> following crash report, although there is no reproduction script in
> syzbot currently, our artificially constructed use cases can also
> reproduce it:
> 
> list_del corruption. prev->next should be ffff888060ce7000, but was ffff88802a0ad000. (prev=ffffffff8e536240)
> ------------[ cut here ]------------
> kernel BUG at lib/list_debug.c:59!
> invalid opcode: 0000 [#1] PREEMPT SMP KASAN
> CPU: 0 PID: 16622 Comm: syz-executor.5 Not tainted 6.1.0-rc6-next-20221125-syzkaller #0
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022
> RIP: 0010:__list_del_entry_valid.cold+0x12/0x72 lib/list_debug.c:59
> Code: f0 ff 0f 0b 48 89 f1 48 c7 c7 60 96 a6 8a 4c 89 e6 e8 4b 29 f0 ff 0f 0b 4c 89 e1 48 89 ee 48 c7 c7 c0 98 a6 8a e8 37 29 f0 ff <0f> 0b 48 89 ee 48 c7 c7 a0 97 a6 8a e8 26 29 f0 ff 0f 0b 4c 89 e2
> RSP: 0018:ffffc900151afd58 EFLAGS: 00010282
> RAX: 000000000000006d RBX: 0000000000000001 RCX: 0000000000000000
> RDX: ffff88801e7eba80 RSI: ffffffff8166001c RDI: fffff52002a35f9d
> RBP: ffff888060ce7000 R08: 000000000000006d R09: 0000000000000000
> R10: 0000000080000000 R11: 0000000000000000 R12: ffffffff8e536240
> R13: ffff88801f3f3000 R14: ffff888060ce1000 R15: ffff888079d855f0
> FS:  0000555556f57400(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00007f095d5ad988 CR3: 000000002155a000 CR4: 00000000003506f0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> Call Trace:
>  <TASK>
>  __list_del_entry include/linux/list.h:134 [inline]
>  list_del include/linux/list.h:148 [inline]
>  local_release net/nfc/llcp_core.c:171 [inline]
>  kref_put include/linux/kref.h:65 [inline]
>  nfc_llcp_local_put net/nfc/llcp_core.c:181 [inline]
>  nfc_llcp_local_put net/nfc/llcp_core.c:176 [inline]
>  nfc_llcp_unregister_device+0xb8/0x260 net/nfc/llcp_core.c:1619
>  nfc_unregister_device+0x196/0x330 net/nfc/core.c:1179
>  virtual_ncidev_close+0x52/0xb0 drivers/nfc/virtual_ncidev.c:163
>  __fput+0x27c/0xa90 fs/file_table.c:320
>  task_work_run+0x16f/0x270 kernel/task_work.c:179
>  resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
>  exit_to_user_mode_loop kernel/entry/common.c:171 [inline]
>  exit_to_user_mode_prepare+0x23c/0x250 kernel/entry/common.c:203
>  __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline]
>  syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:296
>  do_syscall_64+0x46/0xb0 arch/x86/entry/common.c:86
>  entry_SYSCALL_64_after_hwframe+0x63/0xcd
> 
> This patch add specific mutex lock llcp_devices_list_lock to ensure
> handling llcp_devices list safety.

Why a mutex instead of a spinlock? all the critical sections are very
small (both code and time-wise), while the list of callers reaching
that code is quite large making hard to check each of them is really in
process context.

Please switch to a spinlock instead.

Cheers,

Paolo
  
Dongliang Mu Feb. 8, 2023, 9:01 a.m. UTC | #2
On Tue, Nov 29, 2022 at 5:46 PM 'Wang ShaoBo' via syzkaller-bugs
<syzkaller-bugs@googlegroups.com> wrote:
>
> There are multiple path operate llcp_devices list without protection:
>
>          CPU0                        CPU1
>
> nfc_unregister_device()        nfc_register_device()
>  nfc_llcp_unregister_device()    nfc_llcp_register_device() //no lock
>     ...                            list_add(local->list, llcp_devices)
>     local_release()
>       list_del(local->list)
>
>         CPU2
> ...
>  nfc_llcp_find_local()
>    list_for_each_entry(,&llcp_devices,)
>
> So reach race condition if two of the three occur simultaneously like
> following crash report, although there is no reproduction script in
> syzbot currently, our artificially constructed use cases can also
> reproduce it:
>
> list_del corruption. prev->next should be ffff888060ce7000, but was ffff88802a0ad000. (prev=ffffffff8e536240)
> ------------[ cut here ]------------
> kernel BUG at lib/list_debug.c:59!
> invalid opcode: 0000 [#1] PREEMPT SMP KASAN
> CPU: 0 PID: 16622 Comm: syz-executor.5 Not tainted 6.1.0-rc6-next-20221125-syzkaller #0
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022
> RIP: 0010:__list_del_entry_valid.cold+0x12/0x72 lib/list_debug.c:59
> Code: f0 ff 0f 0b 48 89 f1 48 c7 c7 60 96 a6 8a 4c 89 e6 e8 4b 29 f0 ff 0f 0b 4c 89 e1 48 89 ee 48 c7 c7 c0 98 a6 8a e8 37 29 f0 ff <0f> 0b 48 89 ee 48 c7 c7 a0 97 a6 8a e8 26 29 f0 ff 0f 0b 4c 89 e2
> RSP: 0018:ffffc900151afd58 EFLAGS: 00010282
> RAX: 000000000000006d RBX: 0000000000000001 RCX: 0000000000000000
> RDX: ffff88801e7eba80 RSI: ffffffff8166001c RDI: fffff52002a35f9d
> RBP: ffff888060ce7000 R08: 000000000000006d R09: 0000000000000000
> R10: 0000000080000000 R11: 0000000000000000 R12: ffffffff8e536240
> R13: ffff88801f3f3000 R14: ffff888060ce1000 R15: ffff888079d855f0
> FS:  0000555556f57400(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00007f095d5ad988 CR3: 000000002155a000 CR4: 00000000003506f0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> Call Trace:
>  <TASK>
>  __list_del_entry include/linux/list.h:134 [inline]
>  list_del include/linux/list.h:148 [inline]
>  local_release net/nfc/llcp_core.c:171 [inline]
>  kref_put include/linux/kref.h:65 [inline]
>  nfc_llcp_local_put net/nfc/llcp_core.c:181 [inline]
>  nfc_llcp_local_put net/nfc/llcp_core.c:176 [inline]
>  nfc_llcp_unregister_device+0xb8/0x260 net/nfc/llcp_core.c:1619
>  nfc_unregister_device+0x196/0x330 net/nfc/core.c:1179
>  virtual_ncidev_close+0x52/0xb0 drivers/nfc/virtual_ncidev.c:163
>  __fput+0x27c/0xa90 fs/file_table.c:320
>  task_work_run+0x16f/0x270 kernel/task_work.c:179
>  resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
>  exit_to_user_mode_loop kernel/entry/common.c:171 [inline]
>  exit_to_user_mode_prepare+0x23c/0x250 kernel/entry/common.c:203
>  __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline]
>  syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:296
>  do_syscall_64+0x46/0xb0 arch/x86/entry/common.c:86
>  entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> This patch add specific mutex lock llcp_devices_list_lock to ensure
> handling llcp_devices list safety.
>
> Fixes: 30cc4587659e ("NFC: Move LLCP code to the NFC top level diirectory")
> Reported-by: syzbot+81232c4a81a886e2b580@syzkaller.appspotmail.com

There is another syzbot bug report [1] contributed by the same bug. It
has the syz reproducer. A patch testing has been deployed on the
syzbot dashboard.

Besides, I have a doubt about this patch. This patch adds a mutex lock
to make list operations(add, delete, find etc.) exclusive. If the
following thread interleaving occurs, will this patch work?

CPU0                                         CPU2
nfc_llcp_unregister_device        nfc_llcp_unregister_device
    ......                                             nfc_llcp_find_local
    local_release                              ......
        list_del                                    ......
                                                       local_cleanup
                                                       local_llcp_local_put

If nfc_llcp_find_local executes before list deletion, it will still
lead to double free and UAF. Please correct me if I make any mistakes.
Thanks in advance.

[1] https://syzkaller.appspot.com/bug?id=41d9ed9b6dcd7b7c5611ed5eb64835b1a554e998

> Signed-off-by: Wang ShaoBo <bobo.shaobowang@huawei.com>
> ---
>  net/nfc/llcp_core.c | 11 ++++++++++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
> index 3364caabef8b..7deaecd9d3cd 100644
> --- a/net/nfc/llcp_core.c
> +++ b/net/nfc/llcp_core.c
> @@ -17,6 +17,7 @@
>  static u8 llcp_magic[3] = {0x46, 0x66, 0x6d};
>
>  static LIST_HEAD(llcp_devices);
> +static DEFINE_MUTEX(llcp_devices_list_lock);
>
>  static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb);
>
> @@ -168,7 +169,9 @@ static void local_release(struct kref *ref)
>
>         local = container_of(ref, struct nfc_llcp_local, ref);
>
> +       mutex_lock(&llcp_devices_list_lock);
>         list_del(&local->list);
> +       mutex_unlock(&llcp_devices_list_lock);
>         local_cleanup(local);
>         kfree(local);
>  }
> @@ -282,9 +285,13 @@ struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)
>  {
>         struct nfc_llcp_local *local;
>
> +       mutex_lock(&llcp_devices_list_lock);
>         list_for_each_entry(local, &llcp_devices, list)
> -               if (local->dev == dev)
> +               if (local->dev == dev) {
> +                       mutex_unlock(&llcp_devices_list_lock);
>                         return local;
> +               }
> +       mutex_unlock(&llcp_devices_list_lock);
>
>         pr_debug("No device found\n");
>
> @@ -1600,7 +1607,9 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
>         timer_setup(&local->sdreq_timer, nfc_llcp_sdreq_timer, 0);
>         INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
>
> +       mutex_lock(&llcp_devices_list_lock);
>         list_add(&local->list, &llcp_devices);
> +       mutex_unlock(&llcp_devices_list_lock);
>
>         return 0;
>  }
> --
> 2.25.1
>
> --
> You received this message because you are subscribed to the Google Groups "syzkaller-bugs" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to syzkaller-bugs+unsubscribe@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/syzkaller-bugs/20221129094436.3975668-1-bobo.shaobowang%40huawei.com.
  

Patch

diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 3364caabef8b..7deaecd9d3cd 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -17,6 +17,7 @@ 
 static u8 llcp_magic[3] = {0x46, 0x66, 0x6d};
 
 static LIST_HEAD(llcp_devices);
+static DEFINE_MUTEX(llcp_devices_list_lock);
 
 static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb);
 
@@ -168,7 +169,9 @@  static void local_release(struct kref *ref)
 
 	local = container_of(ref, struct nfc_llcp_local, ref);
 
+	mutex_lock(&llcp_devices_list_lock);
 	list_del(&local->list);
+	mutex_unlock(&llcp_devices_list_lock);
 	local_cleanup(local);
 	kfree(local);
 }
@@ -282,9 +285,13 @@  struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)
 {
 	struct nfc_llcp_local *local;
 
+	mutex_lock(&llcp_devices_list_lock);
 	list_for_each_entry(local, &llcp_devices, list)
-		if (local->dev == dev)
+		if (local->dev == dev) {
+			mutex_unlock(&llcp_devices_list_lock);
 			return local;
+		}
+	mutex_unlock(&llcp_devices_list_lock);
 
 	pr_debug("No device found\n");
 
@@ -1600,7 +1607,9 @@  int nfc_llcp_register_device(struct nfc_dev *ndev)
 	timer_setup(&local->sdreq_timer, nfc_llcp_sdreq_timer, 0);
 	INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
 
+	mutex_lock(&llcp_devices_list_lock);
 	list_add(&local->list, &llcp_devices);
+	mutex_unlock(&llcp_devices_list_lock);
 
 	return 0;
 }