[v3,3/3] md: fix stopping sync thread

Message ID 20231129043127.2245901-4-yukuai1@huaweicloud.com
State New
Headers
Series md: fix stopping sync thread |

Commit Message

Yu Kuai Nov. 29, 2023, 4:31 a.m. UTC
  From: Yu Kuai <yukuai3@huawei.com>

Currently sync thread is stopped from multiple contex:
 - idle_sync_thread
 - frozen_sync_thread
 - __md_stop_writes
 - md_set_readonly
 - do_md_stop

And there are some problems:
1) sync_work is flushed while reconfig_mutex is grabbed, this can
   deadlock because the work function will grab reconfig_mutex as well.
2) md_reap_sync_thread() can't be called directly while md_do_sync() is
   not finished yet, for example, commit 130443d60b1b ("md: refactor
   idle/frozen_sync_thread() to fix deadlock").
3) If MD_RECOVERY_RUNNING is not set, there is no need to stop
   sync_thread at all because sync_thread must not be registered.

Factor out a helper prepare_to_stop_sync_thread(), so that above contex
will behave the same. Fix 1) by flushing sync_work after reconfig_mutex
is released, before waiting for sync_thread to be done; Fix 2) bt
letting daemon thread to unregister sync_thread; Fix 3) by always
checking MD_RECOVERY_RUNNING first.

Fixes: db5e653d7c9f ("md: delay choosing sync action to md_start_sync()")
Acked-by: Xiao Ni <xni@redhat.com>

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/md/md.c | 96 +++++++++++++++++++++++--------------------------
 1 file changed, 45 insertions(+), 51 deletions(-)
  

Comments

Song Liu Dec. 1, 2023, 10:08 p.m. UTC | #1
On Tue, Nov 28, 2023 at 8:32 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>
> From: Yu Kuai <yukuai3@huawei.com>
>
> Currently sync thread is stopped from multiple contex:
>  - idle_sync_thread
>  - frozen_sync_thread
>  - __md_stop_writes
>  - md_set_readonly
>  - do_md_stop
>
> And there are some problems:
> 1) sync_work is flushed while reconfig_mutex is grabbed, this can
>    deadlock because the work function will grab reconfig_mutex as well.
> 2) md_reap_sync_thread() can't be called directly while md_do_sync() is
>    not finished yet, for example, commit 130443d60b1b ("md: refactor
>    idle/frozen_sync_thread() to fix deadlock").
> 3) If MD_RECOVERY_RUNNING is not set, there is no need to stop
>    sync_thread at all because sync_thread must not be registered.
>
> Factor out a helper prepare_to_stop_sync_thread(), so that above contex
> will behave the same. Fix 1) by flushing sync_work after reconfig_mutex
> is released, before waiting for sync_thread to be done; Fix 2) bt
> letting daemon thread to unregister sync_thread; Fix 3) by always
> checking MD_RECOVERY_RUNNING first.
>
> Fixes: db5e653d7c9f ("md: delay choosing sync action to md_start_sync()")
> Acked-by: Xiao Ni <xni@redhat.com>
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  drivers/md/md.c | 96 +++++++++++++++++++++++--------------------------
>  1 file changed, 45 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 2d8e45a1af23..05902e36db66 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -4840,26 +4840,9 @@ action_show(struct mddev *mddev, char *page)
>         return sprintf(page, "%s\n", type);
>  }
>
> -static void stop_sync_thread(struct mddev *mddev)
> +static void prepare_to_stop_sync_thread(struct mddev *mddev)
> +       __releases(&mddev->reconfig_mutex)
>  {
> -       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> -               return;
> -
> -       if (mddev_lock(mddev))
> -               return;
> -
> -       /*
> -        * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
> -        * held.
> -        */
> -       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> -               mddev_unlock(mddev);
> -               return;
> -       }
> -
> -       if (work_pending(&mddev->sync_work))
> -               flush_workqueue(md_misc_wq);
> -
>         set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>         /*
>          * Thread might be blocked waiting for metadata update which will now
> @@ -4868,6 +4851,8 @@ static void stop_sync_thread(struct mddev *mddev)
>         md_wakeup_thread_directly(mddev->sync_thread);
>
>         mddev_unlock(mddev);
> +       if (work_pending(&mddev->sync_work))
> +               flush_work(&mddev->sync_work);
>  }
>
>  static void idle_sync_thread(struct mddev *mddev)
> @@ -4876,10 +4861,20 @@ static void idle_sync_thread(struct mddev *mddev)
>
>         mutex_lock(&mddev->sync_mutex);
>         clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> -       stop_sync_thread(mddev);
>
> -       wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) ||
> +       if (mddev_lock(mddev)) {
> +               mutex_unlock(&mddev->sync_mutex);
> +               return;
> +       }
> +
> +       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> +               prepare_to_stop_sync_thread(mddev);
> +               wait_event(resync_wait,
> +                       sync_seq != atomic_read(&mddev->sync_seq) ||
>                         !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> +       } else {
> +               mddev_unlock(mddev);
> +       }

We have a few patterns like this:

        if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
                prepare_to_stop_sync_thread(mddev);
                wait_event(resync_wait,
                        sync_seq != atomic_read(&mddev->sync_seq) ||
                        !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
        } else {
                mddev_unlock(mddev);
        }

This is pretty confusing. Maybe try something like (untested)

static void stop_sync_thread(struct mddev *mddev,
                             bool do_unlock, bool check_sync_seq)
{
        if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
                if (do_unlock)
                        mddev_unlock(mddev);
                return;
        }
        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
        /*
         * Thread might be blocked waiting for metadata update which will now
         * never happen
         */
        md_wakeup_thread_directly(mddev->sync_thread);

        mddev_unlock(mddev);
        if (work_pending(&mddev->sync_work))
                flush_work(&mddev->sync_work);
        wait_event(resync_wait,
                   !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
                   (check_sync_seq && sync_seq !=
atomic_read(&mddev->sync_seq)));
        if (!do_unlock)
                mddev_lock_nointr(mddev);
}

Thanks,
Song
  
Yu Kuai Dec. 2, 2023, 7:50 a.m. UTC | #2
Hi,

在 2023/12/02 6:08, Song Liu 写道:
> On Tue, Nov 28, 2023 at 8:32 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>>
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> Currently sync thread is stopped from multiple contex:
>>   - idle_sync_thread
>>   - frozen_sync_thread
>>   - __md_stop_writes
>>   - md_set_readonly
>>   - do_md_stop
>>
>> And there are some problems:
>> 1) sync_work is flushed while reconfig_mutex is grabbed, this can
>>     deadlock because the work function will grab reconfig_mutex as well.
>> 2) md_reap_sync_thread() can't be called directly while md_do_sync() is
>>     not finished yet, for example, commit 130443d60b1b ("md: refactor
>>     idle/frozen_sync_thread() to fix deadlock").
>> 3) If MD_RECOVERY_RUNNING is not set, there is no need to stop
>>     sync_thread at all because sync_thread must not be registered.
>>
>> Factor out a helper prepare_to_stop_sync_thread(), so that above contex
>> will behave the same. Fix 1) by flushing sync_work after reconfig_mutex
>> is released, before waiting for sync_thread to be done; Fix 2) bt
>> letting daemon thread to unregister sync_thread; Fix 3) by always
>> checking MD_RECOVERY_RUNNING first.
>>
>> Fixes: db5e653d7c9f ("md: delay choosing sync action to md_start_sync()")
>> Acked-by: Xiao Ni <xni@redhat.com>
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   drivers/md/md.c | 96 +++++++++++++++++++++++--------------------------
>>   1 file changed, 45 insertions(+), 51 deletions(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index 2d8e45a1af23..05902e36db66 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -4840,26 +4840,9 @@ action_show(struct mddev *mddev, char *page)
>>          return sprintf(page, "%s\n", type);
>>   }
>>
>> -static void stop_sync_thread(struct mddev *mddev)
>> +static void prepare_to_stop_sync_thread(struct mddev *mddev)
>> +       __releases(&mddev->reconfig_mutex)
>>   {
>> -       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
>> -               return;
>> -
>> -       if (mddev_lock(mddev))
>> -               return;
>> -
>> -       /*
>> -        * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
>> -        * held.
>> -        */
>> -       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>> -               mddev_unlock(mddev);
>> -               return;
>> -       }
>> -
>> -       if (work_pending(&mddev->sync_work))
>> -               flush_workqueue(md_misc_wq);
>> -
>>          set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>>          /*
>>           * Thread might be blocked waiting for metadata update which will now
>> @@ -4868,6 +4851,8 @@ static void stop_sync_thread(struct mddev *mddev)
>>          md_wakeup_thread_directly(mddev->sync_thread);
>>
>>          mddev_unlock(mddev);
>> +       if (work_pending(&mddev->sync_work))
>> +               flush_work(&mddev->sync_work);
>>   }
>>
>>   static void idle_sync_thread(struct mddev *mddev)
>> @@ -4876,10 +4861,20 @@ static void idle_sync_thread(struct mddev *mddev)
>>
>>          mutex_lock(&mddev->sync_mutex);
>>          clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> -       stop_sync_thread(mddev);
>>
>> -       wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) ||
>> +       if (mddev_lock(mddev)) {
>> +               mutex_unlock(&mddev->sync_mutex);
>> +               return;
>> +       }
>> +
>> +       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>> +               prepare_to_stop_sync_thread(mddev);
>> +               wait_event(resync_wait,
>> +                       sync_seq != atomic_read(&mddev->sync_seq) ||
>>                          !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
>> +       } else {
>> +               mddev_unlock(mddev);
>> +       }
> 
> We have a few patterns like this:
> 
>          if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>                  prepare_to_stop_sync_thread(mddev);
>                  wait_event(resync_wait,
>                          sync_seq != atomic_read(&mddev->sync_seq) ||
>                          !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
>          } else {
>                  mddev_unlock(mddev);
>          }
> 
> This is pretty confusing. Maybe try something like (untested)
> 
> static void stop_sync_thread(struct mddev *mddev,
>                               bool do_unlock, bool check_sync_seq)
Yes, if we can accept use parameter to distinguish different use case.

Thanks,
Kuai

> {
>          if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>                  if (do_unlock)
>                          mddev_unlock(mddev);
>                  return;
>          }
>          set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>          /*
>           * Thread might be blocked waiting for metadata update which will now
>           * never happen
>           */
>          md_wakeup_thread_directly(mddev->sync_thread);
> 
>          mddev_unlock(mddev);
>          if (work_pending(&mddev->sync_work))
>                  flush_work(&mddev->sync_work);
>          wait_event(resync_wait,
>                     !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
>                     (check_sync_seq && sync_seq !=
> atomic_read(&mddev->sync_seq)));
>          if (!do_unlock)
>                  mddev_lock_nointr(mddev);
> }
> 
> Thanks,
> Song
> 
> .
>
  

Patch

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 2d8e45a1af23..05902e36db66 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4840,26 +4840,9 @@  action_show(struct mddev *mddev, char *page)
 	return sprintf(page, "%s\n", type);
 }
 
-static void stop_sync_thread(struct mddev *mddev)
+static void prepare_to_stop_sync_thread(struct mddev *mddev)
+	__releases(&mddev->reconfig_mutex)
 {
-	if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-		return;
-
-	if (mddev_lock(mddev))
-		return;
-
-	/*
-	 * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
-	 * held.
-	 */
-	if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
-		mddev_unlock(mddev);
-		return;
-	}
-
-	if (work_pending(&mddev->sync_work))
-		flush_workqueue(md_misc_wq);
-
 	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	/*
 	 * Thread might be blocked waiting for metadata update which will now
@@ -4868,6 +4851,8 @@  static void stop_sync_thread(struct mddev *mddev)
 	md_wakeup_thread_directly(mddev->sync_thread);
 
 	mddev_unlock(mddev);
+	if (work_pending(&mddev->sync_work))
+		flush_work(&mddev->sync_work);
 }
 
 static void idle_sync_thread(struct mddev *mddev)
@@ -4876,10 +4861,20 @@  static void idle_sync_thread(struct mddev *mddev)
 
 	mutex_lock(&mddev->sync_mutex);
 	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-	stop_sync_thread(mddev);
 
-	wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) ||
+	if (mddev_lock(mddev)) {
+		mutex_unlock(&mddev->sync_mutex);
+		return;
+	}
+
+	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+		prepare_to_stop_sync_thread(mddev);
+		wait_event(resync_wait,
+			sync_seq != atomic_read(&mddev->sync_seq) ||
 			!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+	} else {
+		mddev_unlock(mddev);
+	}
 
 	mutex_unlock(&mddev->sync_mutex);
 }
@@ -4888,10 +4883,19 @@  static void frozen_sync_thread(struct mddev *mddev)
 {
 	mutex_lock(&mddev->sync_mutex);
 	set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-	stop_sync_thread(mddev);
 
-	wait_event(resync_wait, mddev->sync_thread == NULL &&
+	if (mddev_lock(mddev)) {
+		mutex_unlock(&mddev->sync_mutex);
+		return;
+	}
+
+	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+		prepare_to_stop_sync_thread(mddev);
+		wait_event(resync_wait,
 			!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+	} else {
+		mddev_unlock(mddev);
+	}
 
 	mutex_unlock(&mddev->sync_mutex);
 }
@@ -6265,11 +6269,11 @@  static void md_clean(struct mddev *mddev)
 static void __md_stop_writes(struct mddev *mddev)
 {
 	set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-	if (work_pending(&mddev->sync_work))
-		flush_workqueue(md_misc_wq);
-	if (mddev->sync_thread) {
-		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-		md_reap_sync_thread(mddev);
+	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+		prepare_to_stop_sync_thread(mddev);
+		wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
+						  &mddev->recovery));
+		mddev_lock_nointr(mddev);
 	}
 
 	del_timer_sync(&mddev->safemode_timer);
@@ -6363,18 +6367,15 @@  static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
 		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 		md_wakeup_thread(mddev->thread);
 	}
-	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 
-	/*
-	 * Thread might be blocked waiting for metadata update which will now
-	 * never happen
-	 */
-	md_wakeup_thread_directly(mddev->sync_thread);
+	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+		prepare_to_stop_sync_thread(mddev);
+		wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
+						  &mddev->recovery));
+	} else {
+		mddev_unlock(mddev);
+	}
 
-	mddev_unlock(mddev);
-	wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
-					  &mddev->recovery));
 	wait_event(mddev->sb_wait,
 		   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
 	mddev_lock_nointr(mddev);
@@ -6428,20 +6429,13 @@  static int do_md_stop(struct mddev *mddev, int mode,
 		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 		md_wakeup_thread(mddev->thread);
 	}
-	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 
-	/*
-	 * Thread might be blocked waiting for metadata update which will now
-	 * never happen
-	 */
-	md_wakeup_thread_directly(mddev->sync_thread);
-
-	mddev_unlock(mddev);
-	wait_event(resync_wait, (mddev->sync_thread == NULL &&
-				 !test_bit(MD_RECOVERY_RUNNING,
-					   &mddev->recovery)));
-	mddev_lock_nointr(mddev);
+	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+		prepare_to_stop_sync_thread(mddev);
+		wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
+						  &mddev->recovery));
+		mddev_lock_nointr(mddev);
+	}
 
 	mutex_lock(&mddev->open_mutex);
 	if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||