[v3,2/3] md: don't leave 'MD_RECOVERY_FROZEN' in error path of md_set_readonly()

Message ID 20231129043127.2245901-3-yukuai1@huaweicloud.com
State New
Headers
Series md: fix stopping sync thread |

Commit Message

Yu Kuai Nov. 29, 2023, 4:31 a.m. UTC
  From: Yu Kuai <yukuai3@huawei.com>

If md_set_readonly() failed, the array could still be read-write, however
'MD_RECOVERY_FROZEN' could still be set, which leave the array in an
abnormal state that sync or recovery can't continue anymore.
Hence make sure the flag is cleared after md_set_readonly() returns.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Acked-by: Xiao Ni <xni@redhat.com>
---
 drivers/md/md.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)
  

Comments

Song Liu Dec. 1, 2023, 8:53 p.m. UTC | #1
On Tue, Nov 28, 2023 at 8:32 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>
> From: Yu Kuai <yukuai3@huawei.com>
>
> If md_set_readonly() failed, the array could still be read-write, however
> 'MD_RECOVERY_FROZEN' could still be set, which leave the array in an
> abnormal state that sync or recovery can't continue anymore.
> Hence make sure the flag is cleared after md_set_readonly() returns.
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> Acked-by: Xiao Ni <xni@redhat.com>

Since we are shipping this via the md-fixes branch, we need a Fixes tag.

> ---
>  drivers/md/md.c | 24 +++++++++++++-----------
>  1 file changed, 13 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 5640a948086b..2d8e45a1af23 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -6355,6 +6355,9 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
>         int err = 0;
>         int did_freeze = 0;
>
> +       if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
> +               return -EBUSY;
> +
>         if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
>                 did_freeze = 1;
>                 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> @@ -6369,8 +6372,6 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
>          */
>         md_wakeup_thread_directly(mddev->sync_thread);
>
> -       if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
> -               return -EBUSY;
>         mddev_unlock(mddev);
>         wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
>                                           &mddev->recovery));
> @@ -6383,29 +6384,30 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
>             mddev->sync_thread ||
>             test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>                 pr_warn("md: %s still in use.\n",mdname(mddev));
> -               if (did_freeze) {
> -                       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> -                       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> -                       md_wakeup_thread(mddev->thread);
> -               }

This change (move did_freeze, etc.) is not explained in the commit log.
Is it just refactor?

Thanks,
Song


>                 err = -EBUSY;
>                 goto out;
>         }
> +
>         if (mddev->pers) {
>                 __md_stop_writes(mddev);
>
> -               err  = -ENXIO;
> -               if (mddev->ro == MD_RDONLY)
> +               if (mddev->ro == MD_RDONLY) {
> +                       err  = -ENXIO;
>                         goto out;
> +               }
> +
>                 mddev->ro = MD_RDONLY;
>                 set_disk_ro(mddev->gendisk, 1);
> +       }
> +
> +out:
> +       if ((mddev->pers && !err) || did_freeze) {
>                 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>                 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
>                 md_wakeup_thread(mddev->thread);
>                 sysfs_notify_dirent_safe(mddev->sysfs_state);
> -               err = 0;
>         }
> -out:
> +
>         mutex_unlock(&mddev->open_mutex);
>         return err;
>  }
> --
> 2.39.2
>
  
Yu Kuai Dec. 2, 2023, 7:41 a.m. UTC | #2
Hi,

在 2023/12/02 4:53, Song Liu 写道:
> On Tue, Nov 28, 2023 at 8:32 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>>
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> If md_set_readonly() failed, the array could still be read-write, however
>> 'MD_RECOVERY_FROZEN' could still be set, which leave the array in an
>> abnormal state that sync or recovery can't continue anymore.
>> Hence make sure the flag is cleared after md_set_readonly() returns.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> Acked-by: Xiao Ni <xni@redhat.com>
> 
> Since we are shipping this via the md-fixes branch, we need a Fixes tag.

Okay, I'll add following fix tag:

Fixes: 88724bfa68be ("md: wait for pending superblock updates before 
switching to read-only")
> 
>> ---
>>   drivers/md/md.c | 24 +++++++++++++-----------
>>   1 file changed, 13 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index 5640a948086b..2d8e45a1af23 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -6355,6 +6355,9 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
>>          int err = 0;
>>          int did_freeze = 0;
>>
>> +       if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
>> +               return -EBUSY;
>> +
>>          if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
>>                  did_freeze = 1;
>>                  set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> @@ -6369,8 +6372,6 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
>>           */
>>          md_wakeup_thread_directly(mddev->sync_thread);
>>
>> -       if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
>> -               return -EBUSY;
>>          mddev_unlock(mddev);
>>          wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
>>                                            &mddev->recovery));
>> @@ -6383,29 +6384,30 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
>>              mddev->sync_thread ||
>>              test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>>                  pr_warn("md: %s still in use.\n",mdname(mddev));
>> -               if (did_freeze) {
>> -                       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> -                       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
>> -                       md_wakeup_thread(mddev->thread);
>> -               }
> 
> This change (move did_freeze, etc.) is not explained in the commit log.
> Is it just refactor?

It is refactor, but it is also part of "make sure the flag is cleared
after md_set_readonly() returns", because now that MD_RECOVERY_FROZEN
will be cleared:

if ((mddev->pers && !err) || did_freeze)

Which means,
  - If set readonly succeed, or;
  - if something is wrong and did_freeze is set, exactly what this patch
    tries to do;

Thanks,
Kuai

> 
> Thanks,
> Song
> 
> 
>>                  err = -EBUSY;
>>                  goto out;
>>          }
>> +
>>          if (mddev->pers) {
>>                  __md_stop_writes(mddev);
>>
>> -               err  = -ENXIO;
>> -               if (mddev->ro == MD_RDONLY)
>> +               if (mddev->ro == MD_RDONLY) {
>> +                       err  = -ENXIO;
>>                          goto out;
>> +               }
>> +
>>                  mddev->ro = MD_RDONLY;
>>                  set_disk_ro(mddev->gendisk, 1);
>> +       }
>> +
>> +out:
>> +       if ((mddev->pers && !err) || did_freeze) {
>>                  clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>                  set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
>>                  md_wakeup_thread(mddev->thread);
>>                  sysfs_notify_dirent_safe(mddev->sysfs_state);
>> -               err = 0;
>>          }
>> -out:
>> +
>>          mutex_unlock(&mddev->open_mutex);
>>          return err;
>>   }
>> --
>> 2.39.2
>>
> .
>
  

Patch

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5640a948086b..2d8e45a1af23 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6355,6 +6355,9 @@  static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
 	int err = 0;
 	int did_freeze = 0;
 
+	if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
+		return -EBUSY;
+
 	if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
 		did_freeze = 1;
 		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -6369,8 +6372,6 @@  static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
 	 */
 	md_wakeup_thread_directly(mddev->sync_thread);
 
-	if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
-		return -EBUSY;
 	mddev_unlock(mddev);
 	wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
 					  &mddev->recovery));
@@ -6383,29 +6384,30 @@  static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
 	    mddev->sync_thread ||
 	    test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
 		pr_warn("md: %s still in use.\n",mdname(mddev));
-		if (did_freeze) {
-			clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-			md_wakeup_thread(mddev->thread);
-		}
 		err = -EBUSY;
 		goto out;
 	}
+
 	if (mddev->pers) {
 		__md_stop_writes(mddev);
 
-		err  = -ENXIO;
-		if (mddev->ro == MD_RDONLY)
+		if (mddev->ro == MD_RDONLY) {
+			err  = -ENXIO;
 			goto out;
+		}
+
 		mddev->ro = MD_RDONLY;
 		set_disk_ro(mddev->gendisk, 1);
+	}
+
+out:
+	if ((mddev->pers && !err) || did_freeze) {
 		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 		md_wakeup_thread(mddev->thread);
 		sysfs_notify_dirent_safe(mddev->sysfs_state);
-		err = 0;
 	}
-out:
+
 	mutex_unlock(&mddev->open_mutex);
 	return err;
 }