[md-6.9,05/10] md/raid1-10: factor out a new helper raid1_should_read_first()

Message ID 20240222075806.1816400-6-yukuai1@huaweicloud.com
State New
Headers
Series md/raid1: refactor read_balance() and some minor fix |

Commit Message

Yu Kuai Feb. 22, 2024, 7:58 a.m. UTC
  From: Yu Kuai <yukuai3@huawei.com>

If resync is in progress, read_balance() should find the first usable
disk, otherwise, data could be inconsistent after resync is done. raid1
and raid10 implement the same checking, hence factor out the checking
to make code cleaner.

Noted that raid1 is using 'mddev->recovery_cp', which is updated after
all resync IO is done, while raid10 is using 'conf->next_resync', which
is inaccurate because raid10 update it before submitting resync IO.
Fortunately, raid10 read IO can't concurrent with resync IO, hence there
is no problem. And this patch also switch raid10 to use
'mddev->recovery_cp'.

Co-developed-by: Paul Luse <paul.e.luse@linux.intel.com>
Signed-off-by: Paul Luse <paul.e.luse@linux.intel.com>
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/md/raid1-10.c | 20 ++++++++++++++++++++
 drivers/md/raid1.c    | 15 ++-------------
 drivers/md/raid10.c   | 13 ++-----------
 3 files changed, 24 insertions(+), 24 deletions(-)
  

Comments

Xiao Ni Feb. 26, 2024, 1:46 p.m. UTC | #1
On Thu, Feb 22, 2024 at 4:05 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>
> From: Yu Kuai <yukuai3@huawei.com>
>
> If resync is in progress, read_balance() should find the first usable
> disk, otherwise, data could be inconsistent after resync is done. raid1
> and raid10 implement the same checking, hence factor out the checking
> to make code cleaner.
>
> Noted that raid1 is using 'mddev->recovery_cp', which is updated after
> all resync IO is done, while raid10 is using 'conf->next_resync', which
> is inaccurate because raid10 update it before submitting resync IO.
> Fortunately, raid10 read IO can't concurrent with resync IO, hence there
> is no problem. And this patch also switch raid10 to use
> 'mddev->recovery_cp'.
>
> Co-developed-by: Paul Luse <paul.e.luse@linux.intel.com>
> Signed-off-by: Paul Luse <paul.e.luse@linux.intel.com>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  drivers/md/raid1-10.c | 20 ++++++++++++++++++++
>  drivers/md/raid1.c    | 15 ++-------------
>  drivers/md/raid10.c   | 13 ++-----------
>  3 files changed, 24 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c
> index 9bc0f0022a6c..2ea1710a3b70 100644
> --- a/drivers/md/raid1-10.c
> +++ b/drivers/md/raid1-10.c
> @@ -276,3 +276,23 @@ static inline int raid1_check_read_range(struct md_rdev *rdev,
>         *len = first_bad + bad_sectors - this_sector;
>         return 0;
>  }
> +
> +/*
> + * Check if read should choose the first rdev.
> + *
> + * Balance on the whole device if no resync is going on (recovery is ok) or
> + * below the resync window. Otherwise, take the first readable disk.
> + */
> +static inline bool raid1_should_read_first(struct mddev *mddev,
> +                                          sector_t this_sector, int len)
> +{
> +       if ((mddev->recovery_cp < this_sector + len))
> +               return true;
> +
> +       if (mddev_is_clustered(mddev) &&
> +           md_cluster_ops->area_resyncing(mddev, READ, this_sector,
> +                                          this_sector + len))
> +               return true;
> +
> +       return false;
> +}
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index d0bc67e6d068..8089c569e84f 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -605,11 +605,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
>         struct md_rdev *rdev;
>         int choose_first;
>
> -       /*
> -        * Check if we can balance. We can balance on the whole
> -        * device if no resync is going on, or below the resync window.
> -        * We take the first readable disk when above the resync window.
> -        */
>   retry:
>         sectors = r1_bio->sectors;
>         best_disk = -1;
> @@ -618,16 +613,10 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
>         best_pending_disk = -1;
>         min_pending = UINT_MAX;
>         best_good_sectors = 0;
> +       choose_first = raid1_should_read_first(conf->mddev, this_sector,
> +                                              sectors);
>         clear_bit(R1BIO_FailFast, &r1_bio->state);
>
> -       if ((conf->mddev->recovery_cp < this_sector + sectors) ||
> -           (mddev_is_clustered(conf->mddev) &&
> -           md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector,
> -                   this_sector + sectors)))
> -               choose_first = 1;
> -       else
> -               choose_first = 0;
> -
>         for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
>                 sector_t dist;
>                 sector_t first_bad;
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 1f6693e40e12..22bcc3ce11d3 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -747,17 +747,8 @@ static struct md_rdev *read_balance(struct r10conf *conf,
>         best_good_sectors = 0;
>         do_balance = 1;
>         clear_bit(R10BIO_FailFast, &r10_bio->state);
> -       /*
> -        * Check if we can balance. We can balance on the whole
> -        * device if no resync is going on (recovery is ok), or below
> -        * the resync window. We take the first readable disk when
> -        * above the resync window.
> -        */
> -       if ((conf->mddev->recovery_cp < MaxSector
> -            && (this_sector + sectors >= conf->next_resync)) ||
> -           (mddev_is_clustered(conf->mddev) &&
> -            md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector,
> -                                           this_sector + sectors)))
> +
> +       if (raid1_should_read_first(conf->mddev, this_sector, sectors))
>                 do_balance = 0;
>
>         for (slot = 0; slot < conf->copies ; slot++) {
> --
> 2.39.2
>
>

This patch looks good to me.
Reviewed-by: Xiao Ni <xni@redhat.com>
  

Patch

diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c
index 9bc0f0022a6c..2ea1710a3b70 100644
--- a/drivers/md/raid1-10.c
+++ b/drivers/md/raid1-10.c
@@ -276,3 +276,23 @@  static inline int raid1_check_read_range(struct md_rdev *rdev,
 	*len = first_bad + bad_sectors - this_sector;
 	return 0;
 }
+
+/*
+ * Check if read should choose the first rdev.
+ *
+ * Balance on the whole device if no resync is going on (recovery is ok) or
+ * below the resync window. Otherwise, take the first readable disk.
+ */
+static inline bool raid1_should_read_first(struct mddev *mddev,
+					   sector_t this_sector, int len)
+{
+	if ((mddev->recovery_cp < this_sector + len))
+		return true;
+
+	if (mddev_is_clustered(mddev) &&
+	    md_cluster_ops->area_resyncing(mddev, READ, this_sector,
+					   this_sector + len))
+		return true;
+
+	return false;
+}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index d0bc67e6d068..8089c569e84f 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -605,11 +605,6 @@  static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
 	struct md_rdev *rdev;
 	int choose_first;
 
-	/*
-	 * Check if we can balance. We can balance on the whole
-	 * device if no resync is going on, or below the resync window.
-	 * We take the first readable disk when above the resync window.
-	 */
  retry:
 	sectors = r1_bio->sectors;
 	best_disk = -1;
@@ -618,16 +613,10 @@  static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
 	best_pending_disk = -1;
 	min_pending = UINT_MAX;
 	best_good_sectors = 0;
+	choose_first = raid1_should_read_first(conf->mddev, this_sector,
+					       sectors);
 	clear_bit(R1BIO_FailFast, &r1_bio->state);
 
-	if ((conf->mddev->recovery_cp < this_sector + sectors) ||
-	    (mddev_is_clustered(conf->mddev) &&
-	    md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector,
-		    this_sector + sectors)))
-		choose_first = 1;
-	else
-		choose_first = 0;
-
 	for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
 		sector_t dist;
 		sector_t first_bad;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 1f6693e40e12..22bcc3ce11d3 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -747,17 +747,8 @@  static struct md_rdev *read_balance(struct r10conf *conf,
 	best_good_sectors = 0;
 	do_balance = 1;
 	clear_bit(R10BIO_FailFast, &r10_bio->state);
-	/*
-	 * Check if we can balance. We can balance on the whole
-	 * device if no resync is going on (recovery is ok), or below
-	 * the resync window. We take the first readable disk when
-	 * above the resync window.
-	 */
-	if ((conf->mddev->recovery_cp < MaxSector
-	     && (this_sector + sectors >= conf->next_resync)) ||
-	    (mddev_is_clustered(conf->mddev) &&
-	     md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector,
-					    this_sector + sectors)))
+
+	if (raid1_should_read_first(conf->mddev, this_sector, sectors))
 		do_balance = 0;
 
 	for (slot = 0; slot < conf->copies ; slot++) {