diff mbox series

[v3,3/4] md/raid10: fix incorrect done of recovery

Message ID	20230527072218.2365857-4-linan666@huaweicloud.com
State	New
Headers	Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) client-ip=2620:137:e000::1:20; From: linan666@huaweicloud.com To: song@kernel.org, bingjingc@synology.com, allenpeng@synology.com, shli@fb.com, alexwu@synology.com, neilb@suse.de Cc: linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org, linan122@huawei.com, yukuai3@huawei.com, yi.zhang@huawei.com, houtao1@huawei.com, yangerkun@huawei.com Subject: [PATCH v3 3/4] md/raid10: fix incorrect done of recovery Date: Sat, 27 May 2023 15:22:17 +0800 Message-Id: <20230527072218.2365857-4-linan666@huaweicloud.com> In-Reply-To: <20230527072218.2365857-1-linan666@huaweicloud.com> References: <20230527072218.2365857-1-linan666@huaweicloud.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	raid10 bugfix \| [v3,0/4] raid10 bugfix [v3,1/4] md/raid10: fix null-ptr-deref of mreplace in raid10_sync_request [v3,2/4] md/raid10: improve code of mrdev in raid10_sync_request [v3,3/4] md/raid10: fix incorrect done of recovery [v3,4/4] md/raid10: fix io loss while replacement replace rdev

Commit Message

Li Nan May 27, 2023, 7:22 a.m. UTC

  From: Li Nan <linan122@huawei.com>

Recovery will go to giveup and let chunks_skipped++ in
raid10_sync_request() if there are some bad_blocks, and it will return
max_sector when chunks_skipped >= geo.raid_disks. Now, recovery fail and
data is inconsistent but user think recovery is done, it is wrong.

Fix it by set mirror's recovery_disabled, spare device will not  be added
to here. The same issue alos exists on resync, it will be fixd in future.

Signed-off-by: Li Nan <linan122@huawei.com>
---
 drivers/md/raid10.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

Comments

Song Liu May 30, 2023, 9:55 p.m. UTC | #1

On Sat, May 27, 2023 at 12:24 AM <linan666@huaweicloud.com> wrote:
>
> From: Li Nan <linan122@huawei.com>
>
> Recovery will go to giveup and let chunks_skipped++ in
> raid10_sync_request() if there are some bad_blocks, and it will return
> max_sector when chunks_skipped >= geo.raid_disks. Now, recovery fail and
> data is inconsistent but user think recovery is done, it is wrong.
>
> Fix it by set mirror's recovery_disabled, spare device will not  be added
> to here. The same issue alos exists on resync, it will be fixd in future.
>
> Signed-off-by: Li Nan <linan122@huawei.com>

I applied 1/4 and 2/4 of the set to md-next.

For 3/4 and 4/4, please improve the commit log (rephrase confusing statements,
fix typo's, etc.). Please also add a mdadm test for 3/4.

Thanks,
Song

> ---
>  drivers/md/raid10.c | 18 +++++++++++++++++-
>  1 file changed, 17 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index d93d8cb2b620..3ba1516ea160 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -3303,6 +3303,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>         int chunks_skipped = 0;
>         sector_t chunk_mask = conf->geo.chunk_mask;
>         int page_idx = 0;
> +       int error_disk = -1;
>
>         /*
>          * Allow skipping a full rebuild for incremental assembly
> @@ -3386,7 +3387,20 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>                 return reshape_request(mddev, sector_nr, skipped);
>
>         if (chunks_skipped >= conf->geo.raid_disks) {
> -               /* if there has been nothing to do on any drive,
> +               pr_err("md/raid10:%s: %s fail\n", mdname(mddev),
> +                       test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?  "resync" : "recovery");
> +               if (error_disk >= 0 &&
> +                   !test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
> +                       /*
> +                        * recovery fail, set mirrors.recovory_disabled,
> +                        * device shouldn't be added to there.
> +                        */
> +                       conf->mirrors[error_disk].recovery_disabled =
> +                                               mddev->recovery_disabled;
> +                       return 0;
> +               }
> +               /*
> +                * if there has been nothing to do on any drive,
>                  * then there is nothing to do at all..
>                  */
>                 *skipped = 1;
> @@ -3638,6 +3652,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>                                                        mdname(mddev));
>                                         mirror->recovery_disabled
>                                                 = mddev->recovery_disabled;
> +                               } else {
> +                                       error_disk = i;
>                                 }
>                                 put_buf(r10_bio);
>                                 if (rb2)
> --
> 2.31.1
>

Li Nan May 31, 2023, 9:31 a.m. UTC | #2

在 2023/5/31 5:55, Song Liu 写道:
> On Sat, May 27, 2023 at 12:24 AM <linan666@huaweicloud.com> wrote:
>>
>> From: Li Nan <linan122@huawei.com>
>>
>> Recovery will go to giveup and let chunks_skipped++ in
>> raid10_sync_request() if there are some bad_blocks, and it will return
>> max_sector when chunks_skipped >= geo.raid_disks. Now, recovery fail and
>> data is inconsistent but user think recovery is done, it is wrong.
>>
>> Fix it by set mirror's recovery_disabled, spare device will not  be added
>> to here. The same issue alos exists on resync, it will be fixd in future.
>>
>> Signed-off-by: Li Nan <linan122@huawei.com>
> 
> I applied 1/4 and 2/4 of the set to md-next.
> 
> For 3/4 and 4/4, please improve the commit log (rephrase confusing statements,
> fix typo's, etc.). Please also add a mdadm test for 3/4.
> 
> Thanks,
> Song
> 

I will add a test later. Thanks for review.

diff mbox series

Patch

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index d93d8cb2b620..3ba1516ea160 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3303,6 +3303,7 @@  static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 	int chunks_skipped = 0;
 	sector_t chunk_mask = conf->geo.chunk_mask;
 	int page_idx = 0;
+	int error_disk = -1;
 
 	/*
 	 * Allow skipping a full rebuild for incremental assembly
@@ -3386,7 +3387,20 @@  static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 		return reshape_request(mddev, sector_nr, skipped);
 
 	if (chunks_skipped >= conf->geo.raid_disks) {
-		/* if there has been nothing to do on any drive,
+		pr_err("md/raid10:%s: %s fail\n", mdname(mddev),
+			test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?  "resync" : "recovery");
+		if (error_disk >= 0 &&
+		    !test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
+			/*
+			 * recovery fail, set mirrors.recovory_disabled,
+			 * device shouldn't be added to there.
+			 */
+			conf->mirrors[error_disk].recovery_disabled =
+						mddev->recovery_disabled;
+			return 0;
+		}
+		/*
+		 * if there has been nothing to do on any drive,
 		 * then there is nothing to do at all..
 		 */
 		*skipped = 1;
@@ -3638,6 +3652,8 @@  static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 						       mdname(mddev));
 					mirror->recovery_disabled
 						= mddev->recovery_disabled;
+				} else {
+					error_disk = i;
 				}
 				put_buf(r10_bio);
 				if (rb2)