[-next,6/8] md: factor out a helper to stop sync_thread

Message ID 20231110172834.3939490-7-yukuai1@huaweicloud.com
State New
Headers
Series md: bugfix and cleanup for sync_thread |

Commit Message

Yu Kuai Nov. 10, 2023, 5:28 p.m. UTC
  From: Yu Kuai <yukuai3@huawei.com>

stop_sync_thread(), md_set_readonly() and do_md_stop() are trying to
stop sync_thread() the same way, hence factor out a helper to make code
cleaner, and also prepare to use the new helper to fix problems later.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Yu Kuai <yukuai1@huaweicloud.com>
---
 drivers/md/md.c | 129 ++++++++++++++++++++++++++----------------------
 1 file changed, 69 insertions(+), 60 deletions(-)
  

Comments

Xiao Ni Nov. 14, 2023, 7:37 a.m. UTC | #1
Hi all

It's good to put the common codes into one function. Before this, I
want to check a problem. Does idle_sync_thread need to stop sync
thread? The sync thread can be run again immediately after stopping
the sync thread when echo idle > sync_action. It looks like there is
no meaning to stop the sync thread for idle_sync_thread. If we don't
need to stop sync thread in idle_sync_thread, there is no need to
introduce mddev->sync_seq and only needs to clear MD_RECOVERY_FROZEN
in idle_sync_thread. Then it can make this patch simpler. Something
like this

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3484a0fc4d2a..34245c4c71b8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -716,7 +716,6 @@ int mddev_init(struct mddev *mddev)
        timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
        atomic_set(&mddev->active, 1);
        atomic_set(&mddev->openers, 0);
-       atomic_set(&mddev->sync_seq, 0);
        spin_lock_init(&mddev->lock);
        atomic_set(&mddev->flush_pending, 0);
        init_waitqueue_head(&mddev->sb_wait);
@@ -4848,38 +4847,33 @@ action_show(struct mddev *mddev, char *page)
        return sprintf(page, "%s\n", type);
 }

-static int stop_sync_thread(struct mddev *mddev)
+static void stop_sync_thread(struct mddev *mddev)
 {
        int ret = 0;

-       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-               return 0;

-       ret = mddev_lock(mddev);
-       if (ret)
-               return ret;
+       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+               return;

-       /*
-        * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
-        * held.
-        */
-       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
-               mddev_unlock(mddev);
-               return 0;
+       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
+               did_freeze = 1;
+               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+               md_wakeup_thread(mddev->thread);
        }

-       if (work_pending(&mddev->sync_work))
-               flush_workqueue(md_misc_wq);
-
        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+
        /*
         * Thread might be blocked waiting for metadata update which will now
         * never happen
         */
        md_wakeup_thread_directly(mddev->sync_thread);

-       mddev_unlock(mddev);
-       return 0;
+        mddev_unlock(mddev);
+       wait_event(resync_wait, (mddev->sync_thread == NULL &&
+                                       !test_bit(MD_RECOVERY_RUNNING,
+                                       &mddev->recovery)));
+        mddev_lock_nointr(mddev);
 }

 static int idle_sync_thread(struct mddev *mddev)
@@ -4891,8 +4885,14 @@ static int idle_sync_thread(struct mddev *mddev)
        if (ret)
                return ret;

-       mddev_lock(mddev);
-       test_and_clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+       ret = mddev_lock(mddev);
+       if (ret) {
+               mutex_unlock(&mddev->sync_mutex);
+               return ret;
+       }
+
+       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+
        mddev_unlock(mddev);
        mutex_unlock(&mddev->sync_mutex);
        return ret;
@@ -4906,17 +4906,15 @@ static int frozen_sync_thread(struct mddev *mddev)
        if (ret)
                return ret;

-       flag = test_and_set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-       ret = stop_sync_thread(mddev);
-       if (ret)
-               goto out;
+       ret = mddev_lock(mddev);
+       if (ret) {
+               mutex_unlock(&mddev->sync_mutex);
+               return ret;
+       }

-       ret = wait_event_interruptible(resync_wait,
-                       mddev->sync_thread == NULL &&
-                       !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
-out:
-       if (ret && !flag)
-               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+       stop_sync_thread(mddev);
+
+       mddev_unlock(mddev);
        mutex_unlock(&mddev->sync_mutex);
        return ret;
 }
@@ -6388,22 +6386,9 @@ static int md_set_readonly(struct mddev *mddev,
struct block_device *bdev)
        if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
                return -EBUSY;

-       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
-               did_freeze = 1;
-               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-       }
-       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-
-       /*
-        * Thread might be blocked waiting for metadata update which will now
-        * never happen
-        */
-       md_wakeup_thread_directly(mddev->sync_thread);
+       stop_sync_thread(mddev);

        mddev_unlock(mddev);
-       wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
-                                         &mddev->recovery));
        wait_event(mddev->sb_wait,
                   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
        mddev_lock_nointr(mddev);
@@ -6448,25 +6433,8 @@ static int do_md_stop(struct mddev *mddev, int mode,
        struct md_rdev *rdev;
        int did_freeze = 0;

-       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
-               did_freeze = 1;
-               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-       }
-       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-
-       /*
-        * Thread might be blocked waiting for metadata update which will now
-        * never happen
-        */
-       md_wakeup_thread_directly(mddev->sync_thread);
-
-       mddev_unlock(mddev);
-       wait_event(resync_wait, (mddev->sync_thread == NULL &&
-                                !test_bit(MD_RECOVERY_RUNNING,
-                                          &mddev->recovery)));
-       mddev_lock_nointr(mddev);
-
+       stop_sync_thread(mddev);
+
        mutex_lock(&mddev->open_mutex);
        if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
            mddev->sysfs_active ||
@@ -9622,7 +9590,6 @@ void md_reap_sync_thread(struct mddev *mddev)

        /* resync has finished, collect result */
        md_unregister_thread(mddev, &mddev->sync_thread);
-       atomic_inc(&mddev->sync_seq);

        if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
            !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&

Best Regards
Xiao

On Fri, Nov 10, 2023 at 5:34 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>
> From: Yu Kuai <yukuai3@huawei.com>
>
> stop_sync_thread(), md_set_readonly() and do_md_stop() are trying to
> stop sync_thread() the same way, hence factor out a helper to make code
> cleaner, and also prepare to use the new helper to fix problems later.
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> Signed-off-by: Yu Kuai <yukuai1@huaweicloud.com>
> ---
>  drivers/md/md.c | 129 ++++++++++++++++++++++++++----------------------
>  1 file changed, 69 insertions(+), 60 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index c0f2bdafe46a..7252fae0c989 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -4848,29 +4848,46 @@ action_show(struct mddev *mddev, char *page)
>         return sprintf(page, "%s\n", type);
>  }
>
> -static int stop_sync_thread(struct mddev *mddev)
> +static bool sync_thread_stopped(struct mddev *mddev, int *seq_ptr)
>  {
> -       int ret = 0;
> +       if (seq_ptr && *seq_ptr != atomic_read(&mddev->sync_seq))
> +               return true;
>
> -       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> -               return 0;
> +       return (!mddev->sync_thread &&
> +               !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> +}
>
> -       ret = mddev_lock(mddev);
> -       if (ret)
> -               return ret;
> +/*
> + * stop_sync_thread() - stop running sync_thread.
> + * @mddev: the array that sync_thread belongs to.
> + * @freeze: set true to prevent new sync_thread to start.
> + * @interruptible: if set true, then user can interrupt while waiting for
> + * sync_thread to be done.
> + *
> + * Noted that this function must be called with 'reconfig_mutex' grabbed, and
> + * fter this function return, 'reconfig_mtuex' will be released.
> + */
> +static int stop_sync_thread(struct mddev *mddev, bool freeze,
> +                           bool interruptible)
> +       __releases(&mddev->reconfig_mutex)
> +{
> +       int *seq_ptr = NULL;
> +       int sync_seq;
> +       int ret = 0;
> +
> +       if (freeze) {
> +               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       } else {
> +               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +               sync_seq = atomic_read(&mddev->sync_seq);
> +               seq_ptr = &sync_seq;
> +       }
>
> -       /*
> -        * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
> -        * held.
> -        */
>         if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>                 mddev_unlock(mddev);
>                 return 0;
>         }
>
> -       if (work_pending(&mddev->sync_work))
> -               flush_workqueue(md_misc_wq);
> -
>         set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>         /*
>          * Thread might be blocked waiting for metadata update which will now
> @@ -4879,53 +4896,58 @@ static int stop_sync_thread(struct mddev *mddev)
>         md_wakeup_thread_directly(mddev->sync_thread);
>
>         mddev_unlock(mddev);
> -       return 0;
> +       if (work_pending(&mddev->sync_work))
> +               flush_work(&mddev->sync_work);
> +
> +       if (interruptible)
> +               ret = wait_event_interruptible(resync_wait,
> +                                       sync_thread_stopped(mddev, seq_ptr));
> +       else
> +               wait_event(resync_wait, sync_thread_stopped(mddev, seq_ptr));
> +
> +       return ret;
>  }
>
>  static int idle_sync_thread(struct mddev *mddev)
>  {
>         int ret;
> -       int sync_seq = atomic_read(&mddev->sync_seq);
>         bool flag;
>
>         ret = mutex_lock_interruptible(&mddev->sync_mutex);
>         if (ret)
>                 return ret;
>
> -       flag = test_and_clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> -       ret = stop_sync_thread(mddev);
> +       flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       ret = mddev_lock(mddev);
>         if (ret)
> -               goto out;
> +               goto unlock;
>
> -       ret = wait_event_interruptible(resync_wait,
> -                       sync_seq != atomic_read(&mddev->sync_seq) ||
> -                       !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> -out:
> +       ret = stop_sync_thread(mddev, false, true);
>         if (ret && flag)
>                 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +unlock:
>         mutex_unlock(&mddev->sync_mutex);
>         return ret;
>  }
>
>  static int frozen_sync_thread(struct mddev *mddev)
>  {
> -       int ret = mutex_lock_interruptible(&mddev->sync_mutex);
> +       int ret;
>         bool flag;
>
> +       ret = mutex_lock_interruptible(&mddev->sync_mutex);
>         if (ret)
>                 return ret;
>
> -       flag = test_and_set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> -       ret = stop_sync_thread(mddev);
> +       flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       ret = mddev_lock(mddev);
>         if (ret)
> -               goto out;
> +               goto unlock;
>
> -       ret = wait_event_interruptible(resync_wait,
> -                       mddev->sync_thread == NULL &&
> -                       !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> -out:
> +       ret = stop_sync_thread(mddev, true, true);
>         if (ret && !flag)
>                 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +unlock:
>         mutex_unlock(&mddev->sync_mutex);
>         return ret;
>  }
> @@ -6397,22 +6419,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
>         if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
>                 return -EBUSY;
>
> -       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
> +       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
>                 did_freeze = 1;
> -               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> -       }
> -       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> -               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>
> -       /*
> -        * Thread might be blocked waiting for metadata update which will now
> -        * never happen
> -        */
> -       md_wakeup_thread_directly(mddev->sync_thread);
> -
> -       mddev_unlock(mddev);
> -       wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
> -                                         &mddev->recovery));
> +       stop_sync_thread(mddev, true, false);
>         wait_event(mddev->sb_wait,
>                    !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
>         mddev_lock_nointr(mddev);
> @@ -6421,6 +6431,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
>         if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
>             mddev->sync_thread ||
>             test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> +               /*
> +                * This could happen if user change array state through
> +                * ioctl/sysfs while reconfig_mutex is released.
> +                */
>                 pr_warn("md: %s still in use.\n",mdname(mddev));
>                 err = -EBUSY;
>                 goto out;
> @@ -6457,30 +6471,25 @@ static int do_md_stop(struct mddev *mddev, int mode,
>         struct md_rdev *rdev;
>         int did_freeze = 0;
>
> -       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
> +       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
>                 did_freeze = 1;
> +
> +       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> +               stop_sync_thread(mddev, true, false);
> +               mddev_lock_nointr(mddev);
> +       } else {
>                 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>         }
> -       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> -               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> -
> -       /*
> -        * Thread might be blocked waiting for metadata update which will now
> -        * never happen
> -        */
> -       md_wakeup_thread_directly(mddev->sync_thread);
> -
> -       mddev_unlock(mddev);
> -       wait_event(resync_wait, (mddev->sync_thread == NULL &&
> -                                !test_bit(MD_RECOVERY_RUNNING,
> -                                          &mddev->recovery)));
> -       mddev_lock_nointr(mddev);
>
>         mutex_lock(&mddev->open_mutex);
>         if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
>             mddev->sysfs_active ||
>             mddev->sync_thread ||
>             test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> +               /*
> +                * This could happen if user change array state through
> +                * ioctl/sysfs while reconfig_mutex is released.
> +                */
>                 pr_warn("md: %s still in use.\n",mdname(mddev));
>                 mutex_unlock(&mddev->open_mutex);
>                 if (did_freeze) {
> --
> 2.39.2
>
  
Yu Kuai Nov. 14, 2023, 9:34 a.m. UTC | #2
Hi,

在 2023/11/14 15:37, Xiao Ni 写道:
> Hi all
> 
> It's good to put the common codes into one function. Before this, I
> want to check a problem. Does idle_sync_thread need to stop sync
> thread? The sync thread can be run again immediately after stopping
> the sync thread when echo idle > sync_action. It looks like there is
> no meaning to stop the sync thread for idle_sync_thread. If we don't
> need to stop sync thread in idle_sync_thread, there is no need to
> introduce mddev->sync_seq and only needs to clear MD_RECOVERY_FROZEN
> in idle_sync_thread. Then it can make this patch simpler. Something
> like this

1) commit 7eec314d7512 ("[PATCH] md: improve 'scan_mode' and rename it
to 'sync_action'"), first introduce echo idle > sync_action, and it make
sure to stop current sync_thread.

2) commit 8e8e2518fcec ("md: Close race when setting 'action' to
'idle'.") added mddev_unlock after stopping sync_thread, that's why
sync_thread will always restart after echo idle > sync_action.

That's actually an regression problem that echo idle > sync_action
doen't work anymore, but looks like nobody cares all there years,
I'm ok to remove echo idle, or try to fix this regression.

Song, please let me know what you think.

Thanks,
Kuai

> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 3484a0fc4d2a..34245c4c71b8 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -716,7 +716,6 @@ int mddev_init(struct mddev *mddev)
>          timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
>          atomic_set(&mddev->active, 1);
>          atomic_set(&mddev->openers, 0);
> -       atomic_set(&mddev->sync_seq, 0);
>          spin_lock_init(&mddev->lock);
>          atomic_set(&mddev->flush_pending, 0);
>          init_waitqueue_head(&mddev->sb_wait);
> @@ -4848,38 +4847,33 @@ action_show(struct mddev *mddev, char *page)
>          return sprintf(page, "%s\n", type);
>   }
> 
> -static int stop_sync_thread(struct mddev *mddev)
> +static void stop_sync_thread(struct mddev *mddev)
>   {
>          int ret = 0;
> 
> -       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> -               return 0;
> 
> -       ret = mddev_lock(mddev);
> -       if (ret)
> -               return ret;
> +       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> +               return;
> 
> -       /*
> -        * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
> -        * held.
> -        */
> -       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> -               mddev_unlock(mddev);
> -               return 0;
> +       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
> +               did_freeze = 1;
> +               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +               md_wakeup_thread(mddev->thread);
>          }
> 
> -       if (work_pending(&mddev->sync_work))
> -               flush_workqueue(md_misc_wq);
> -
>          set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> +
>          /*
>           * Thread might be blocked waiting for metadata update which will now
>           * never happen
>           */
>          md_wakeup_thread_directly(mddev->sync_thread);
> 
> -       mddev_unlock(mddev);
> -       return 0;
> +        mddev_unlock(mddev);
> +       wait_event(resync_wait, (mddev->sync_thread == NULL &&
> +                                       !test_bit(MD_RECOVERY_RUNNING,
> +                                       &mddev->recovery)));
> +        mddev_lock_nointr(mddev);
>   }
> 
>   static int idle_sync_thread(struct mddev *mddev)
> @@ -4891,8 +4885,14 @@ static int idle_sync_thread(struct mddev *mddev)
>          if (ret)
>                  return ret;
> 
> -       mddev_lock(mddev);
> -       test_and_clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       ret = mddev_lock(mddev);
> +       if (ret) {
> +               mutex_unlock(&mddev->sync_mutex);
> +               return ret;
> +       }
> +
> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +
>          mddev_unlock(mddev);
>          mutex_unlock(&mddev->sync_mutex);
>          return ret;
> @@ -4906,17 +4906,15 @@ static int frozen_sync_thread(struct mddev *mddev)
>          if (ret)
>                  return ret;
> 
> -       flag = test_and_set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> -       ret = stop_sync_thread(mddev);
> -       if (ret)
> -               goto out;
> +       ret = mddev_lock(mddev);
> +       if (ret) {
> +               mutex_unlock(&mddev->sync_mutex);
> +               return ret;
> +       }
> 
> -       ret = wait_event_interruptible(resync_wait,
> -                       mddev->sync_thread == NULL &&
> -                       !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> -out:
> -       if (ret && !flag)
> -               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       stop_sync_thread(mddev);
> +
> +       mddev_unlock(mddev);
>          mutex_unlock(&mddev->sync_mutex);
>          return ret;
>   }
> @@ -6388,22 +6386,9 @@ static int md_set_readonly(struct mddev *mddev,
> struct block_device *bdev)
>          if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
>                  return -EBUSY;
> 
> -       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
> -               did_freeze = 1;
> -               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> -       }
> -       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> -               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> -
> -       /*
> -        * Thread might be blocked waiting for metadata update which will now
> -        * never happen
> -        */
> -       md_wakeup_thread_directly(mddev->sync_thread);
> +       stop_sync_thread(mddev);
> 
>          mddev_unlock(mddev);
> -       wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
> -                                         &mddev->recovery));
>          wait_event(mddev->sb_wait,
>                     !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
>          mddev_lock_nointr(mddev);
> @@ -6448,25 +6433,8 @@ static int do_md_stop(struct mddev *mddev, int mode,
>          struct md_rdev *rdev;
>          int did_freeze = 0;
> 
> -       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
> -               did_freeze = 1;
> -               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> -       }
> -       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> -               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> -
> -       /*
> -        * Thread might be blocked waiting for metadata update which will now
> -        * never happen
> -        */
> -       md_wakeup_thread_directly(mddev->sync_thread);
> -
> -       mddev_unlock(mddev);
> -       wait_event(resync_wait, (mddev->sync_thread == NULL &&
> -                                !test_bit(MD_RECOVERY_RUNNING,
> -                                          &mddev->recovery)));
> -       mddev_lock_nointr(mddev);
> -
> +       stop_sync_thread(mddev);
> +
>          mutex_lock(&mddev->open_mutex);
>          if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
>              mddev->sysfs_active ||
> @@ -9622,7 +9590,6 @@ void md_reap_sync_thread(struct mddev *mddev)
> 
>          /* resync has finished, collect result */
>          md_unregister_thread(mddev, &mddev->sync_thread);
> -       atomic_inc(&mddev->sync_seq);
> 
>          if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
>              !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
> 
> Best Regards
> Xiao
> 
> On Fri, Nov 10, 2023 at 5:34 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>>
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> stop_sync_thread(), md_set_readonly() and do_md_stop() are trying to
>> stop sync_thread() the same way, hence factor out a helper to make code
>> cleaner, and also prepare to use the new helper to fix problems later.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> Signed-off-by: Yu Kuai <yukuai1@huaweicloud.com>
>> ---
>>   drivers/md/md.c | 129 ++++++++++++++++++++++++++----------------------
>>   1 file changed, 69 insertions(+), 60 deletions(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index c0f2bdafe46a..7252fae0c989 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -4848,29 +4848,46 @@ action_show(struct mddev *mddev, char *page)
>>          return sprintf(page, "%s\n", type);
>>   }
>>
>> -static int stop_sync_thread(struct mddev *mddev)
>> +static bool sync_thread_stopped(struct mddev *mddev, int *seq_ptr)
>>   {
>> -       int ret = 0;
>> +       if (seq_ptr && *seq_ptr != atomic_read(&mddev->sync_seq))
>> +               return true;
>>
>> -       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
>> -               return 0;
>> +       return (!mddev->sync_thread &&
>> +               !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
>> +}
>>
>> -       ret = mddev_lock(mddev);
>> -       if (ret)
>> -               return ret;
>> +/*
>> + * stop_sync_thread() - stop running sync_thread.
>> + * @mddev: the array that sync_thread belongs to.
>> + * @freeze: set true to prevent new sync_thread to start.
>> + * @interruptible: if set true, then user can interrupt while waiting for
>> + * sync_thread to be done.
>> + *
>> + * Noted that this function must be called with 'reconfig_mutex' grabbed, and
>> + * fter this function return, 'reconfig_mtuex' will be released.
>> + */
>> +static int stop_sync_thread(struct mddev *mddev, bool freeze,
>> +                           bool interruptible)
>> +       __releases(&mddev->reconfig_mutex)
>> +{
>> +       int *seq_ptr = NULL;
>> +       int sync_seq;
>> +       int ret = 0;
>> +
>> +       if (freeze) {
>> +               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> +       } else {
>> +               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> +               sync_seq = atomic_read(&mddev->sync_seq);
>> +               seq_ptr = &sync_seq;
>> +       }
>>
>> -       /*
>> -        * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
>> -        * held.
>> -        */
>>          if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>>                  mddev_unlock(mddev);
>>                  return 0;
>>          }
>>
>> -       if (work_pending(&mddev->sync_work))
>> -               flush_workqueue(md_misc_wq);
>> -
>>          set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>>          /*
>>           * Thread might be blocked waiting for metadata update which will now
>> @@ -4879,53 +4896,58 @@ static int stop_sync_thread(struct mddev *mddev)
>>          md_wakeup_thread_directly(mddev->sync_thread);
>>
>>          mddev_unlock(mddev);
>> -       return 0;
>> +       if (work_pending(&mddev->sync_work))
>> +               flush_work(&mddev->sync_work);
>> +
>> +       if (interruptible)
>> +               ret = wait_event_interruptible(resync_wait,
>> +                                       sync_thread_stopped(mddev, seq_ptr));
>> +       else
>> +               wait_event(resync_wait, sync_thread_stopped(mddev, seq_ptr));
>> +
>> +       return ret;
>>   }
>>
>>   static int idle_sync_thread(struct mddev *mddev)
>>   {
>>          int ret;
>> -       int sync_seq = atomic_read(&mddev->sync_seq);
>>          bool flag;
>>
>>          ret = mutex_lock_interruptible(&mddev->sync_mutex);
>>          if (ret)
>>                  return ret;
>>
>> -       flag = test_and_clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> -       ret = stop_sync_thread(mddev);
>> +       flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> +       ret = mddev_lock(mddev);
>>          if (ret)
>> -               goto out;
>> +               goto unlock;
>>
>> -       ret = wait_event_interruptible(resync_wait,
>> -                       sync_seq != atomic_read(&mddev->sync_seq) ||
>> -                       !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
>> -out:
>> +       ret = stop_sync_thread(mddev, false, true);
>>          if (ret && flag)
>>                  set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> +unlock:
>>          mutex_unlock(&mddev->sync_mutex);
>>          return ret;
>>   }
>>
>>   static int frozen_sync_thread(struct mddev *mddev)
>>   {
>> -       int ret = mutex_lock_interruptible(&mddev->sync_mutex);
>> +       int ret;
>>          bool flag;
>>
>> +       ret = mutex_lock_interruptible(&mddev->sync_mutex);
>>          if (ret)
>>                  return ret;
>>
>> -       flag = test_and_set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> -       ret = stop_sync_thread(mddev);
>> +       flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> +       ret = mddev_lock(mddev);
>>          if (ret)
>> -               goto out;
>> +               goto unlock;
>>
>> -       ret = wait_event_interruptible(resync_wait,
>> -                       mddev->sync_thread == NULL &&
>> -                       !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
>> -out:
>> +       ret = stop_sync_thread(mddev, true, true);
>>          if (ret && !flag)
>>                  clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> +unlock:
>>          mutex_unlock(&mddev->sync_mutex);
>>          return ret;
>>   }
>> @@ -6397,22 +6419,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
>>          if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
>>                  return -EBUSY;
>>
>> -       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
>> +       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
>>                  did_freeze = 1;
>> -               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> -       }
>> -       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
>> -               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>>
>> -       /*
>> -        * Thread might be blocked waiting for metadata update which will now
>> -        * never happen
>> -        */
>> -       md_wakeup_thread_directly(mddev->sync_thread);
>> -
>> -       mddev_unlock(mddev);
>> -       wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
>> -                                         &mddev->recovery));
>> +       stop_sync_thread(mddev, true, false);
>>          wait_event(mddev->sb_wait,
>>                     !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
>>          mddev_lock_nointr(mddev);
>> @@ -6421,6 +6431,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
>>          if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
>>              mddev->sync_thread ||
>>              test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>> +               /*
>> +                * This could happen if user change array state through
>> +                * ioctl/sysfs while reconfig_mutex is released.
>> +                */
>>                  pr_warn("md: %s still in use.\n",mdname(mddev));
>>                  err = -EBUSY;
>>                  goto out;
>> @@ -6457,30 +6471,25 @@ static int do_md_stop(struct mddev *mddev, int mode,
>>          struct md_rdev *rdev;
>>          int did_freeze = 0;
>>
>> -       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
>> +       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
>>                  did_freeze = 1;
>> +
>> +       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>> +               stop_sync_thread(mddev, true, false);
>> +               mddev_lock_nointr(mddev);
>> +       } else {
>>                  set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>          }
>> -       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
>> -               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>> -
>> -       /*
>> -        * Thread might be blocked waiting for metadata update which will now
>> -        * never happen
>> -        */
>> -       md_wakeup_thread_directly(mddev->sync_thread);
>> -
>> -       mddev_unlock(mddev);
>> -       wait_event(resync_wait, (mddev->sync_thread == NULL &&
>> -                                !test_bit(MD_RECOVERY_RUNNING,
>> -                                          &mddev->recovery)));
>> -       mddev_lock_nointr(mddev);
>>
>>          mutex_lock(&mddev->open_mutex);
>>          if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
>>              mddev->sysfs_active ||
>>              mddev->sync_thread ||
>>              test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>> +               /*
>> +                * This could happen if user change array state through
>> +                * ioctl/sysfs while reconfig_mutex is released.
>> +                */
>>                  pr_warn("md: %s still in use.\n",mdname(mddev));
>>                  mutex_unlock(&mddev->open_mutex);
>>                  if (did_freeze) {
>> --
>> 2.39.2
>>
> 
> .
>
  
Xiao Ni Nov. 21, 2023, 6:02 a.m. UTC | #3
On Fri, Nov 10, 2023 at 5:34 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>
> From: Yu Kuai <yukuai3@huawei.com>
>
> stop_sync_thread(), md_set_readonly() and do_md_stop() are trying to
> stop sync_thread() the same way, hence factor out a helper to make code
> cleaner, and also prepare to use the new helper to fix problems later.
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> Signed-off-by: Yu Kuai <yukuai1@huaweicloud.com>
> ---
>  drivers/md/md.c | 129 ++++++++++++++++++++++++++----------------------
>  1 file changed, 69 insertions(+), 60 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index c0f2bdafe46a..7252fae0c989 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -4848,29 +4848,46 @@ action_show(struct mddev *mddev, char *page)
>         return sprintf(page, "%s\n", type);
>  }
>
> -static int stop_sync_thread(struct mddev *mddev)
> +static bool sync_thread_stopped(struct mddev *mddev, int *seq_ptr)
>  {
> -       int ret = 0;
> +       if (seq_ptr && *seq_ptr != atomic_read(&mddev->sync_seq))
> +               return true;
>
> -       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> -               return 0;
> +       return (!mddev->sync_thread &&
> +               !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> +}
>
> -       ret = mddev_lock(mddev);
> -       if (ret)
> -               return ret;
> +/*
> + * stop_sync_thread() - stop running sync_thread.
> + * @mddev: the array that sync_thread belongs to.
> + * @freeze: set true to prevent new sync_thread to start.
> + * @interruptible: if set true, then user can interrupt while waiting for
> + * sync_thread to be done.
> + *
> + * Noted that this function must be called with 'reconfig_mutex' grabbed, and
> + * fter this function return, 'reconfig_mtuex' will be released.
> + */
> +static int stop_sync_thread(struct mddev *mddev, bool freeze,
> +                           bool interruptible)
> +       __releases(&mddev->reconfig_mutex)
> +{
> +       int *seq_ptr = NULL;
> +       int sync_seq;
> +       int ret = 0;
> +
> +       if (freeze) {
> +               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       } else {
> +               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +               sync_seq = atomic_read(&mddev->sync_seq);
> +               seq_ptr = &sync_seq;
> +       }
>
> -       /*
> -        * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
> -        * held.
> -        */
>         if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>                 mddev_unlock(mddev);
>                 return 0;
>         }
Hi Kuai

It does the unlock inside this function. For me, it's not good,
because the caller does the lock. So the caller should do the unlock
too.
>
> -       if (work_pending(&mddev->sync_work))
> -               flush_workqueue(md_misc_wq);
> -
>         set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>         /*
>          * Thread might be blocked waiting for metadata update which will now
> @@ -4879,53 +4896,58 @@ static int stop_sync_thread(struct mddev *mddev)
>         md_wakeup_thread_directly(mddev->sync_thread);
>
>         mddev_unlock(mddev);

Same with above point.

> -       return 0;
> +       if (work_pending(&mddev->sync_work))
> +               flush_work(&mddev->sync_work);
> +
> +       if (interruptible)
> +               ret = wait_event_interruptible(resync_wait,
> +                                       sync_thread_stopped(mddev, seq_ptr));
> +       else
> +               wait_event(resync_wait, sync_thread_stopped(mddev, seq_ptr));
> +

It looks like the three roles (md_set_readonly, do_md_stop and
stop_sync_thread) need to wait for different events. We can move these
codes out this helper function and make this helper function to be
more common.

Best Regards
Xiao


> +       return ret;
>  }
>
>  static int idle_sync_thread(struct mddev *mddev)
>  {
>         int ret;
> -       int sync_seq = atomic_read(&mddev->sync_seq);
>         bool flag;
>
>         ret = mutex_lock_interruptible(&mddev->sync_mutex);
>         if (ret)
>                 return ret;
>
> -       flag = test_and_clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> -       ret = stop_sync_thread(mddev);
> +       flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       ret = mddev_lock(mddev);
>         if (ret)
> -               goto out;
> +               goto unlock;
>
> -       ret = wait_event_interruptible(resync_wait,
> -                       sync_seq != atomic_read(&mddev->sync_seq) ||
> -                       !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> -out:
> +       ret = stop_sync_thread(mddev, false, true);
>         if (ret && flag)
>                 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +unlock:
>         mutex_unlock(&mddev->sync_mutex);
>         return ret;
>  }
>
>  static int frozen_sync_thread(struct mddev *mddev)
>  {
> -       int ret = mutex_lock_interruptible(&mddev->sync_mutex);
> +       int ret;
>         bool flag;
>
> +       ret = mutex_lock_interruptible(&mddev->sync_mutex);
>         if (ret)
>                 return ret;
>
> -       flag = test_and_set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> -       ret = stop_sync_thread(mddev);
> +       flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       ret = mddev_lock(mddev);
>         if (ret)
> -               goto out;
> +               goto unlock;
>
> -       ret = wait_event_interruptible(resync_wait,
> -                       mddev->sync_thread == NULL &&
> -                       !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> -out:
> +       ret = stop_sync_thread(mddev, true, true);
>         if (ret && !flag)
>                 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +unlock:
>         mutex_unlock(&mddev->sync_mutex);
>         return ret;
>  }
> @@ -6397,22 +6419,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
>         if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
>                 return -EBUSY;
>
> -       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
> +       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
>                 did_freeze = 1;
> -               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> -       }
> -       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> -               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>
> -       /*
> -        * Thread might be blocked waiting for metadata update which will now
> -        * never happen
> -        */
> -       md_wakeup_thread_directly(mddev->sync_thread);
> -
> -       mddev_unlock(mddev);
> -       wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
> -                                         &mddev->recovery));
> +       stop_sync_thread(mddev, true, false);
>         wait_event(mddev->sb_wait,
>                    !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
>         mddev_lock_nointr(mddev);
> @@ -6421,6 +6431,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
>         if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
>             mddev->sync_thread ||
>             test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> +               /*
> +                * This could happen if user change array state through
> +                * ioctl/sysfs while reconfig_mutex is released.
> +                */
>                 pr_warn("md: %s still in use.\n",mdname(mddev));
>                 err = -EBUSY;
>                 goto out;
> @@ -6457,30 +6471,25 @@ static int do_md_stop(struct mddev *mddev, int mode,
>         struct md_rdev *rdev;
>         int did_freeze = 0;
>
> -       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
> +       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
>                 did_freeze = 1;
> +
> +       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> +               stop_sync_thread(mddev, true, false);
> +               mddev_lock_nointr(mddev);
> +       } else {
>                 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>         }
> -       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> -               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> -
> -       /*
> -        * Thread might be blocked waiting for metadata update which will now
> -        * never happen
> -        */
> -       md_wakeup_thread_directly(mddev->sync_thread);
> -
> -       mddev_unlock(mddev);
> -       wait_event(resync_wait, (mddev->sync_thread == NULL &&
> -                                !test_bit(MD_RECOVERY_RUNNING,
> -                                          &mddev->recovery)));
> -       mddev_lock_nointr(mddev);
>
>         mutex_lock(&mddev->open_mutex);
>         if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
>             mddev->sysfs_active ||
>             mddev->sync_thread ||
>             test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> +               /*
> +                * This could happen if user change array state through
> +                * ioctl/sysfs while reconfig_mutex is released.
> +                */
>                 pr_warn("md: %s still in use.\n",mdname(mddev));
>                 mutex_unlock(&mddev->open_mutex);
>                 if (did_freeze) {
> --
> 2.39.2
>
  
Xiao Ni Nov. 21, 2023, 6:34 a.m. UTC | #4
On Tue, Nov 21, 2023 at 2:02 PM Xiao Ni <xni@redhat.com> wrote:
>
> On Fri, Nov 10, 2023 at 5:34 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
> >
> > From: Yu Kuai <yukuai3@huawei.com>
> >
> > stop_sync_thread(), md_set_readonly() and do_md_stop() are trying to
> > stop sync_thread() the same way, hence factor out a helper to make code
> > cleaner, and also prepare to use the new helper to fix problems later.
> >
> > Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> > Signed-off-by: Yu Kuai <yukuai1@huaweicloud.com>
> > ---
> >  drivers/md/md.c | 129 ++++++++++++++++++++++++++----------------------
> >  1 file changed, 69 insertions(+), 60 deletions(-)
> >
> > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > index c0f2bdafe46a..7252fae0c989 100644
> > --- a/drivers/md/md.c
> > +++ b/drivers/md/md.c
> > @@ -4848,29 +4848,46 @@ action_show(struct mddev *mddev, char *page)
> >         return sprintf(page, "%s\n", type);
> >  }
> >
> > -static int stop_sync_thread(struct mddev *mddev)
> > +static bool sync_thread_stopped(struct mddev *mddev, int *seq_ptr)
> >  {
> > -       int ret = 0;
> > +       if (seq_ptr && *seq_ptr != atomic_read(&mddev->sync_seq))
> > +               return true;
> >
> > -       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> > -               return 0;
> > +       return (!mddev->sync_thread &&
> > +               !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> > +}
> >
> > -       ret = mddev_lock(mddev);
> > -       if (ret)
> > -               return ret;
> > +/*
> > + * stop_sync_thread() - stop running sync_thread.
> > + * @mddev: the array that sync_thread belongs to.
> > + * @freeze: set true to prevent new sync_thread to start.
> > + * @interruptible: if set true, then user can interrupt while waiting for
> > + * sync_thread to be done.
> > + *
> > + * Noted that this function must be called with 'reconfig_mutex' grabbed, and
> > + * fter this function return, 'reconfig_mtuex' will be released.
> > + */
> > +static int stop_sync_thread(struct mddev *mddev, bool freeze,
> > +                           bool interruptible)
> > +       __releases(&mddev->reconfig_mutex)
> > +{
> > +       int *seq_ptr = NULL;
> > +       int sync_seq;
> > +       int ret = 0;
> > +
> > +       if (freeze) {
> > +               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > +       } else {
> > +               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > +               sync_seq = atomic_read(&mddev->sync_seq);
> > +               seq_ptr = &sync_seq;
> > +       }
> >
> > -       /*
> > -        * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
> > -        * held.
> > -        */
> >         if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> >                 mddev_unlock(mddev);
> >                 return 0;
> >         }
> Hi Kuai
>
> It does the unlock inside this function. For me, it's not good,
> because the caller does the lock. So the caller should do the unlock
> too.
> >
> > -       if (work_pending(&mddev->sync_work))
> > -               flush_workqueue(md_misc_wq);
> > -
> >         set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> >         /*
> >          * Thread might be blocked waiting for metadata update which will now
> > @@ -4879,53 +4896,58 @@ static int stop_sync_thread(struct mddev *mddev)
> >         md_wakeup_thread_directly(mddev->sync_thread);
> >
> >         mddev_unlock(mddev);
>
> Same with above point.
>
> > -       return 0;
> > +       if (work_pending(&mddev->sync_work))
> > +               flush_work(&mddev->sync_work);
> > +
> > +       if (interruptible)
> > +               ret = wait_event_interruptible(resync_wait,
> > +                                       sync_thread_stopped(mddev, seq_ptr));
> > +       else
> > +               wait_event(resync_wait, sync_thread_stopped(mddev, seq_ptr));
> > +
>
> It looks like the three roles (md_set_readonly, do_md_stop and
> stop_sync_thread) need to wait for different events. We can move these
> codes out this helper function and make this helper function to be
> more common.

Or get lock again before returning this function and leave the wait here?

Regards
Xiao


>
> Best Regards
> Xiao
>
>
> > +       return ret;
> >  }
> >
> >  static int idle_sync_thread(struct mddev *mddev)
> >  {
> >         int ret;
> > -       int sync_seq = atomic_read(&mddev->sync_seq);
> >         bool flag;
> >
> >         ret = mutex_lock_interruptible(&mddev->sync_mutex);
> >         if (ret)
> >                 return ret;
> >
> > -       flag = test_and_clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > -       ret = stop_sync_thread(mddev);
> > +       flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > +       ret = mddev_lock(mddev);
> >         if (ret)
> > -               goto out;
> > +               goto unlock;
> >
> > -       ret = wait_event_interruptible(resync_wait,
> > -                       sync_seq != atomic_read(&mddev->sync_seq) ||
> > -                       !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> > -out:
> > +       ret = stop_sync_thread(mddev, false, true);
> >         if (ret && flag)
> >                 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > +unlock:
> >         mutex_unlock(&mddev->sync_mutex);
> >         return ret;
> >  }
> >
> >  static int frozen_sync_thread(struct mddev *mddev)
> >  {
> > -       int ret = mutex_lock_interruptible(&mddev->sync_mutex);
> > +       int ret;
> >         bool flag;
> >
> > +       ret = mutex_lock_interruptible(&mddev->sync_mutex);
> >         if (ret)
> >                 return ret;
> >
> > -       flag = test_and_set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > -       ret = stop_sync_thread(mddev);
> > +       flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > +       ret = mddev_lock(mddev);
> >         if (ret)
> > -               goto out;
> > +               goto unlock;
> >
> > -       ret = wait_event_interruptible(resync_wait,
> > -                       mddev->sync_thread == NULL &&
> > -                       !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> > -out:
> > +       ret = stop_sync_thread(mddev, true, true);
> >         if (ret && !flag)
> >                 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > +unlock:
> >         mutex_unlock(&mddev->sync_mutex);
> >         return ret;
> >  }
> > @@ -6397,22 +6419,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
> >         if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
> >                 return -EBUSY;
> >
> > -       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
> > +       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
> >                 did_freeze = 1;
> > -               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > -       }
> > -       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> > -               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> >
> > -       /*
> > -        * Thread might be blocked waiting for metadata update which will now
> > -        * never happen
> > -        */
> > -       md_wakeup_thread_directly(mddev->sync_thread);
> > -
> > -       mddev_unlock(mddev);
> > -       wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
> > -                                         &mddev->recovery));
> > +       stop_sync_thread(mddev, true, false);
> >         wait_event(mddev->sb_wait,
> >                    !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
> >         mddev_lock_nointr(mddev);
> > @@ -6421,6 +6431,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
> >         if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
> >             mddev->sync_thread ||
> >             test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> > +               /*
> > +                * This could happen if user change array state through
> > +                * ioctl/sysfs while reconfig_mutex is released.
> > +                */
> >                 pr_warn("md: %s still in use.\n",mdname(mddev));
> >                 err = -EBUSY;
> >                 goto out;
> > @@ -6457,30 +6471,25 @@ static int do_md_stop(struct mddev *mddev, int mode,
> >         struct md_rdev *rdev;
> >         int did_freeze = 0;
> >
> > -       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
> > +       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
> >                 did_freeze = 1;
> > +
> > +       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> > +               stop_sync_thread(mddev, true, false);
> > +               mddev_lock_nointr(mddev);
> > +       } else {
> >                 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> >         }
> > -       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> > -               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> > -
> > -       /*
> > -        * Thread might be blocked waiting for metadata update which will now
> > -        * never happen
> > -        */
> > -       md_wakeup_thread_directly(mddev->sync_thread);
> > -
> > -       mddev_unlock(mddev);
> > -       wait_event(resync_wait, (mddev->sync_thread == NULL &&
> > -                                !test_bit(MD_RECOVERY_RUNNING,
> > -                                          &mddev->recovery)));
> > -       mddev_lock_nointr(mddev);
> >
> >         mutex_lock(&mddev->open_mutex);
> >         if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
> >             mddev->sysfs_active ||
> >             mddev->sync_thread ||
> >             test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> > +               /*
> > +                * This could happen if user change array state through
> > +                * ioctl/sysfs while reconfig_mutex is released.
> > +                */
> >                 pr_warn("md: %s still in use.\n",mdname(mddev));
> >                 mutex_unlock(&mddev->open_mutex);
> >                 if (did_freeze) {
> > --
> > 2.39.2
> >
  
Yu Kuai Nov. 21, 2023, 1:01 p.m. UTC | #5
Hi,

在 2023/11/21 14:34, Xiao Ni 写道:
> On Tue, Nov 21, 2023 at 2:02 PM Xiao Ni <xni@redhat.com> wrote:
>>
>> On Fri, Nov 10, 2023 at 5:34 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>>>
>>> From: Yu Kuai <yukuai3@huawei.com>
>>>
>>> stop_sync_thread(), md_set_readonly() and do_md_stop() are trying to
>>> stop sync_thread() the same way, hence factor out a helper to make code
>>> cleaner, and also prepare to use the new helper to fix problems later.
>>>
>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>>> Signed-off-by: Yu Kuai <yukuai1@huaweicloud.com>
>>> ---
>>>   drivers/md/md.c | 129 ++++++++++++++++++++++++++----------------------
>>>   1 file changed, 69 insertions(+), 60 deletions(-)
>>>
>>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>>> index c0f2bdafe46a..7252fae0c989 100644
>>> --- a/drivers/md/md.c
>>> +++ b/drivers/md/md.c
>>> @@ -4848,29 +4848,46 @@ action_show(struct mddev *mddev, char *page)
>>>          return sprintf(page, "%s\n", type);
>>>   }
>>>
>>> -static int stop_sync_thread(struct mddev *mddev)
>>> +static bool sync_thread_stopped(struct mddev *mddev, int *seq_ptr)
>>>   {
>>> -       int ret = 0;
>>> +       if (seq_ptr && *seq_ptr != atomic_read(&mddev->sync_seq))
>>> +               return true;
>>>
>>> -       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
>>> -               return 0;
>>> +       return (!mddev->sync_thread &&
>>> +               !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
>>> +}
>>>
>>> -       ret = mddev_lock(mddev);
>>> -       if (ret)
>>> -               return ret;
>>> +/*
>>> + * stop_sync_thread() - stop running sync_thread.
>>> + * @mddev: the array that sync_thread belongs to.
>>> + * @freeze: set true to prevent new sync_thread to start.
>>> + * @interruptible: if set true, then user can interrupt while waiting for
>>> + * sync_thread to be done.
>>> + *
>>> + * Noted that this function must be called with 'reconfig_mutex' grabbed, and
>>> + * fter this function return, 'reconfig_mtuex' will be released.
>>> + */
>>> +static int stop_sync_thread(struct mddev *mddev, bool freeze,
>>> +                           bool interruptible)
>>> +       __releases(&mddev->reconfig_mutex)
>>> +{
>>> +       int *seq_ptr = NULL;
>>> +       int sync_seq;
>>> +       int ret = 0;
>>> +
>>> +       if (freeze) {
>>> +               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> +       } else {
>>> +               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> +               sync_seq = atomic_read(&mddev->sync_seq);
>>> +               seq_ptr = &sync_seq;
>>> +       }
>>>
>>> -       /*
>>> -        * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
>>> -        * held.
>>> -        */
>>>          if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>>>                  mddev_unlock(mddev);
>>>                  return 0;
>>>          }
>> Hi Kuai
>>
>> It does the unlock inside this function. For me, it's not good,
>> because the caller does the lock. So the caller should do the unlock
>> too.
>>>
>>> -       if (work_pending(&mddev->sync_work))
>>> -               flush_workqueue(md_misc_wq);
>>> -
>>>          set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>>>          /*
>>>           * Thread might be blocked waiting for metadata update which will now
>>> @@ -4879,53 +4896,58 @@ static int stop_sync_thread(struct mddev *mddev)
>>>          md_wakeup_thread_directly(mddev->sync_thread);
>>>
>>>          mddev_unlock(mddev);
>>
>> Same with above point.
>>
>>> -       return 0;
>>> +       if (work_pending(&mddev->sync_work))
>>> +               flush_work(&mddev->sync_work);
>>> +
>>> +       if (interruptible)
>>> +               ret = wait_event_interruptible(resync_wait,
>>> +                                       sync_thread_stopped(mddev, seq_ptr));
>>> +       else
>>> +               wait_event(resync_wait, sync_thread_stopped(mddev, seq_ptr));
>>> +
>>
>> It looks like the three roles (md_set_readonly, do_md_stop and
>> stop_sync_thread) need to wait for different events. We can move these
>> codes out this helper function and make this helper function to be
>> more common.
> 
> Or get lock again before returning this function and leave the wait here?
> 

I tried but I made code complex. 😞

I guess I'll need to drop this version and restart...

Thanks,
Kuai

> Regards
> Xiao
> 
> 
>>
>> Best Regards
>> Xiao
>>
>>
>>> +       return ret;
>>>   }
>>>
>>>   static int idle_sync_thread(struct mddev *mddev)
>>>   {
>>>          int ret;
>>> -       int sync_seq = atomic_read(&mddev->sync_seq);
>>>          bool flag;
>>>
>>>          ret = mutex_lock_interruptible(&mddev->sync_mutex);
>>>          if (ret)
>>>                  return ret;
>>>
>>> -       flag = test_and_clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> -       ret = stop_sync_thread(mddev);
>>> +       flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> +       ret = mddev_lock(mddev);
>>>          if (ret)
>>> -               goto out;
>>> +               goto unlock;
>>>
>>> -       ret = wait_event_interruptible(resync_wait,
>>> -                       sync_seq != atomic_read(&mddev->sync_seq) ||
>>> -                       !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
>>> -out:
>>> +       ret = stop_sync_thread(mddev, false, true);
>>>          if (ret && flag)
>>>                  set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> +unlock:
>>>          mutex_unlock(&mddev->sync_mutex);
>>>          return ret;
>>>   }
>>>
>>>   static int frozen_sync_thread(struct mddev *mddev)
>>>   {
>>> -       int ret = mutex_lock_interruptible(&mddev->sync_mutex);
>>> +       int ret;
>>>          bool flag;
>>>
>>> +       ret = mutex_lock_interruptible(&mddev->sync_mutex);
>>>          if (ret)
>>>                  return ret;
>>>
>>> -       flag = test_and_set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> -       ret = stop_sync_thread(mddev);
>>> +       flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> +       ret = mddev_lock(mddev);
>>>          if (ret)
>>> -               goto out;
>>> +               goto unlock;
>>>
>>> -       ret = wait_event_interruptible(resync_wait,
>>> -                       mddev->sync_thread == NULL &&
>>> -                       !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
>>> -out:
>>> +       ret = stop_sync_thread(mddev, true, true);
>>>          if (ret && !flag)
>>>                  clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> +unlock:
>>>          mutex_unlock(&mddev->sync_mutex);
>>>          return ret;
>>>   }
>>> @@ -6397,22 +6419,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
>>>          if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
>>>                  return -EBUSY;
>>>
>>> -       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
>>> +       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
>>>                  did_freeze = 1;
>>> -               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> -       }
>>> -       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
>>> -               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>>>
>>> -       /*
>>> -        * Thread might be blocked waiting for metadata update which will now
>>> -        * never happen
>>> -        */
>>> -       md_wakeup_thread_directly(mddev->sync_thread);
>>> -
>>> -       mddev_unlock(mddev);
>>> -       wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
>>> -                                         &mddev->recovery));
>>> +       stop_sync_thread(mddev, true, false);
>>>          wait_event(mddev->sb_wait,
>>>                     !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
>>>          mddev_lock_nointr(mddev);
>>> @@ -6421,6 +6431,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
>>>          if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
>>>              mddev->sync_thread ||
>>>              test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>>> +               /*
>>> +                * This could happen if user change array state through
>>> +                * ioctl/sysfs while reconfig_mutex is released.
>>> +                */
>>>                  pr_warn("md: %s still in use.\n",mdname(mddev));
>>>                  err = -EBUSY;
>>>                  goto out;
>>> @@ -6457,30 +6471,25 @@ static int do_md_stop(struct mddev *mddev, int mode,
>>>          struct md_rdev *rdev;
>>>          int did_freeze = 0;
>>>
>>> -       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
>>> +       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
>>>                  did_freeze = 1;
>>> +
>>> +       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>>> +               stop_sync_thread(mddev, true, false);
>>> +               mddev_lock_nointr(mddev);
>>> +       } else {
>>>                  set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>>          }
>>> -       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
>>> -               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>>> -
>>> -       /*
>>> -        * Thread might be blocked waiting for metadata update which will now
>>> -        * never happen
>>> -        */
>>> -       md_wakeup_thread_directly(mddev->sync_thread);
>>> -
>>> -       mddev_unlock(mddev);
>>> -       wait_event(resync_wait, (mddev->sync_thread == NULL &&
>>> -                                !test_bit(MD_RECOVERY_RUNNING,
>>> -                                          &mddev->recovery)));
>>> -       mddev_lock_nointr(mddev);
>>>
>>>          mutex_lock(&mddev->open_mutex);
>>>          if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
>>>              mddev->sysfs_active ||
>>>              mddev->sync_thread ||
>>>              test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>>> +               /*
>>> +                * This could happen if user change array state through
>>> +                * ioctl/sysfs while reconfig_mutex is released.
>>> +                */
>>>                  pr_warn("md: %s still in use.\n",mdname(mddev));
>>>                  mutex_unlock(&mddev->open_mutex);
>>>                  if (did_freeze) {
>>> --
>>> 2.39.2
>>>
> 
> 
> .
>
  
Yu Kuai Nov. 24, 2023, 1:59 a.m. UTC | #6
Hi,

在 2023/11/21 21:01, Yu Kuai 写道:
>>>
>>> It looks like the three roles (md_set_readonly, do_md_stop and
>>> stop_sync_thread) need to wait for different events. We can move these
>>> codes out this helper function and make this helper function to be
>>> more common.
>>
>> Or get lock again before returning this function and leave the wait here?

How about following patch?

  drivers/md/md.c | 89 
+++++++++++++++++++++++++++++++++++------------------------------------------------------
  1 file changed, 35 insertions(+), 54 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 78f32a2e434c..fe46b67f6e87 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4868,35 +4868,18 @@ action_show(struct mddev *mddev, char *page)
         return sprintf(page, "%s\n", action_names[get_sync_action(mddev)]);
  }

-static int stop_sync_thread(struct mddev *mddev)
+static void prepare_to_stop_sync_thread(struct mddev *mddev)
+       __releases(&mddev->reconfig_mutex)
  {
-       int err = mddev_lock(mddev);
-
-       if (err)
-               return err;
-
-       /*
-        * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
-        * held.
-        */
-       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
-               mddev_unlock(mddev);
-               return 0;
-       }
-
-       if (work_pending(&mddev->sync_work))
-               flush_workqueue(md_misc_wq);
-
         set_bit(MD_RECOVERY_INTR, &mddev->recovery);
         /*
-        * Thread might be blocked waiting for metadata update which 
will now
-        * never happen
+        * Thread might be blocked waiting for metadata update which
+        * will now never happen.
          */
         md_wakeup_thread_directly(mddev->sync_thread);
-
         mddev_unlock(mddev);
-
-       return 0;
+       if (work_pending(&mddev->sync_work))
+               flush_work(&mddev->sync_work);
  }

  static int idle_sync_thread(struct mddev *mddev)
@@ -4905,13 +4888,17 @@ static int idle_sync_thread(struct mddev *mddev)
         int err;

         clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-       err = stop_sync_thread(mddev);
+       err = mddev_lock(mddev);
         if (err)
                 return err;

-       err = wait_event_interruptible(resync_wait,
+       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+               prepare_to_stop_sync_thread(mddev);
+               err = wait_event_interruptible(resync_wait,
                         sync_seq != atomic_read(&mddev->sync_seq) ||
                         !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+       }
+
         return err;
  }

@@ -4920,12 +4907,15 @@ static int frozen_sync_thread(struct mddev *mddev)
         int err;

         set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-       err = stop_sync_thread(mddev);
+       err = mddev_lock(mddev);
         if (err)
                 return err;

-       err = wait_event_interruptible(resync_wait,
+       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+               prepare_to_stop_sync_thread(mddev);
+               err = wait_event_interruptible(resync_wait,
                         !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+       }

         return err;
  }
@@ -6350,11 +6340,11 @@ static void md_clean(struct mddev *mddev)
  static void __md_stop_writes(struct mddev *mddev)
  {
         set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-       if (work_pending(&mddev->sync_work))
-               flush_workqueue(md_misc_wq);
-       if (mddev->sync_thread) {
-               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-               md_reap_sync_thread(mddev);
+       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+               prepare_to_stop_sync_thread(mddev);
+               wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
+                                                 &mddev->recovery));
+               mddev_lock_nointr(mddev);
         }

         del_timer_sync(&mddev->safemode_timer);
@@ -6447,18 +6437,15 @@ static int md_set_readonly(struct mddev *mddev, 
struct block_device *bdev)
                 did_freeze = 1;
                 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
         }
-       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-               set_bit(MD_RECOVERY_INTR, &mddev->recovery);

-       /*
-        * Thread might be blocked waiting for metadata update which 
will now
-        * never happen
-        */
-       md_wakeup_thread_directly(mddev->sync_thread);
+       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+               prepare_to_stop_sync_thread(mddev);
+               wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
+                                                 &mddev->recovery));
+       } else {
+               mddev_unlock(mddev);
+       }

-       mddev_unlock(mddev);
-       wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
-                                         &mddev->recovery));
         wait_event(mddev->sb_wait,
                    !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
         mddev_lock_nointr(mddev);
@@ -6509,19 +6496,13 @@ static int do_md_stop(struct mddev *mddev, int mode,
                 did_freeze = 1;
                 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
         }
-       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-
-       /*
-        * Thread might be blocked waiting for metadata update which 
will now
-        * never happen
-        */
-       md_wakeup_thread_directly(mddev->sync_thread);

-       mddev_unlock(mddev);
-       wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
-                                         &mddev->recovery));
-       mddev_lock_nointr(mddev);
+       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+               prepare_to_stop_sync_thread(mddev);
+               wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
+                                                 &mddev->recovery));
+               mddev_lock_nointr(mddev);
+       }

         mutex_lock(&mddev->open_mutex);
         if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
  

Patch

diff --git a/drivers/md/md.c b/drivers/md/md.c
index c0f2bdafe46a..7252fae0c989 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4848,29 +4848,46 @@  action_show(struct mddev *mddev, char *page)
 	return sprintf(page, "%s\n", type);
 }
 
-static int stop_sync_thread(struct mddev *mddev)
+static bool sync_thread_stopped(struct mddev *mddev, int *seq_ptr)
 {
-	int ret = 0;
+	if (seq_ptr && *seq_ptr != atomic_read(&mddev->sync_seq))
+		return true;
 
-	if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-		return 0;
+	return (!mddev->sync_thread &&
+		!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+}
 
-	ret = mddev_lock(mddev);
-	if (ret)
-		return ret;
+/*
+ * stop_sync_thread() - stop running sync_thread.
+ * @mddev: the array that sync_thread belongs to.
+ * @freeze: set true to prevent new sync_thread to start.
+ * @interruptible: if set true, then user can interrupt while waiting for
+ * sync_thread to be done.
+ *
+ * Noted that this function must be called with 'reconfig_mutex' grabbed, and
+ * fter this function return, 'reconfig_mtuex' will be released.
+ */
+static int stop_sync_thread(struct mddev *mddev, bool freeze,
+			    bool interruptible)
+	__releases(&mddev->reconfig_mutex)
+{
+	int *seq_ptr = NULL;
+	int sync_seq;
+	int ret = 0;
+
+	if (freeze) {
+		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+	} else {
+		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+		sync_seq = atomic_read(&mddev->sync_seq);
+		seq_ptr = &sync_seq;
+	}
 
-	/*
-	 * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
-	 * held.
-	 */
 	if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
 		mddev_unlock(mddev);
 		return 0;
 	}
 
-	if (work_pending(&mddev->sync_work))
-		flush_workqueue(md_misc_wq);
-
 	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	/*
 	 * Thread might be blocked waiting for metadata update which will now
@@ -4879,53 +4896,58 @@  static int stop_sync_thread(struct mddev *mddev)
 	md_wakeup_thread_directly(mddev->sync_thread);
 
 	mddev_unlock(mddev);
-	return 0;
+	if (work_pending(&mddev->sync_work))
+		flush_work(&mddev->sync_work);
+
+	if (interruptible)
+		ret = wait_event_interruptible(resync_wait,
+					sync_thread_stopped(mddev, seq_ptr));
+	else
+		wait_event(resync_wait, sync_thread_stopped(mddev, seq_ptr));
+
+	return ret;
 }
 
 static int idle_sync_thread(struct mddev *mddev)
 {
 	int ret;
-	int sync_seq = atomic_read(&mddev->sync_seq);
 	bool flag;
 
 	ret = mutex_lock_interruptible(&mddev->sync_mutex);
 	if (ret)
 		return ret;
 
-	flag = test_and_clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-	ret = stop_sync_thread(mddev);
+	flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+	ret = mddev_lock(mddev);
 	if (ret)
-		goto out;
+		goto unlock;
 
-	ret = wait_event_interruptible(resync_wait,
-			sync_seq != atomic_read(&mddev->sync_seq) ||
-			!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
-out:
+	ret = stop_sync_thread(mddev, false, true);
 	if (ret && flag)
 		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+unlock:
 	mutex_unlock(&mddev->sync_mutex);
 	return ret;
 }
 
 static int frozen_sync_thread(struct mddev *mddev)
 {
-	int ret = mutex_lock_interruptible(&mddev->sync_mutex);
+	int ret;
 	bool flag;
 
+	ret = mutex_lock_interruptible(&mddev->sync_mutex);
 	if (ret)
 		return ret;
 
-	flag = test_and_set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-	ret = stop_sync_thread(mddev);
+	flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+	ret = mddev_lock(mddev);
 	if (ret)
-		goto out;
+		goto unlock;
 
-	ret = wait_event_interruptible(resync_wait,
-			mddev->sync_thread == NULL &&
-			!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
-out:
+	ret = stop_sync_thread(mddev, true, true);
 	if (ret && !flag)
 		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+unlock:
 	mutex_unlock(&mddev->sync_mutex);
 	return ret;
 }
@@ -6397,22 +6419,10 @@  static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
 	if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
 		return -EBUSY;
 
-	if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
+	if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
 		did_freeze = 1;
-		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-	}
-	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 
-	/*
-	 * Thread might be blocked waiting for metadata update which will now
-	 * never happen
-	 */
-	md_wakeup_thread_directly(mddev->sync_thread);
-
-	mddev_unlock(mddev);
-	wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
-					  &mddev->recovery));
+	stop_sync_thread(mddev, true, false);
 	wait_event(mddev->sb_wait,
 		   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
 	mddev_lock_nointr(mddev);
@@ -6421,6 +6431,10 @@  static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
 	if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
 	    mddev->sync_thread ||
 	    test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+		/*
+		 * This could happen if user change array state through
+		 * ioctl/sysfs while reconfig_mutex is released.
+		 */
 		pr_warn("md: %s still in use.\n",mdname(mddev));
 		err = -EBUSY;
 		goto out;
@@ -6457,30 +6471,25 @@  static int do_md_stop(struct mddev *mddev, int mode,
 	struct md_rdev *rdev;
 	int did_freeze = 0;
 
-	if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
+	if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
 		did_freeze = 1;
+
+	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+		stop_sync_thread(mddev, true, false);
+		mddev_lock_nointr(mddev);
+	} else {
 		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 	}
-	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-
-	/*
-	 * Thread might be blocked waiting for metadata update which will now
-	 * never happen
-	 */
-	md_wakeup_thread_directly(mddev->sync_thread);
-
-	mddev_unlock(mddev);
-	wait_event(resync_wait, (mddev->sync_thread == NULL &&
-				 !test_bit(MD_RECOVERY_RUNNING,
-					   &mddev->recovery)));
-	mddev_lock_nointr(mddev);
 
 	mutex_lock(&mddev->open_mutex);
 	if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
 	    mddev->sysfs_active ||
 	    mddev->sync_thread ||
 	    test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+		/*
+		 * This could happen if user change array state through
+		 * ioctl/sysfs while reconfig_mutex is released.
+		 */
 		pr_warn("md: %s still in use.\n",mdname(mddev));
 		mutex_unlock(&mddev->open_mutex);
 		if (did_freeze) {