diff mbox series

[v4,3/3] md/raid10: optimize check_decay_read_errors()

Message ID	20230522072535.1523740-4-linan666@huaweicloud.com
State	New
Headers	Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) client-ip=2620:137:e000::1:20; From: linan666@huaweicloud.com To: song@kernel.org, neilb@suse.de, Rob.Becker@riverbed.com Cc: linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org, linan122@huawei.com, yukuai3@huawei.com, yi.zhang@huawei.com, houtao1@huawei.com, yangerkun@huawei.com Subject: [PATCH v4 3/3] md/raid10: optimize check_decay_read_errors() Date: Mon, 22 May 2023 15:25:35 +0800 Message-Id: <20230522072535.1523740-4-linan666@huaweicloud.com> In-Reply-To: <20230522072535.1523740-1-linan666@huaweicloud.com> References: <20230522072535.1523740-1-linan666@huaweicloud.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	md: bugfix of writing raid sysfs \| [v4,0/3] md: bugfix of writing raid sysfs [v4,1/3] md/raid10: fix overflow of md/safe_mode_delay [v4,2/3] md/raid10: fix wrong setting of max_corr_read_errors [v4,3/3] md/raid10: optimize check_decay_read_errors()

Commit Message

Li Nan May 22, 2023, 7:25 a.m. UTC

  From: Li Nan <linan122@huawei.com>

check_decay_read_errors() is used to handle rdev->read_errors. But
read_errors is inc and read after check_decay_read_errors() is invoked
in fix_read_error().

Put all operations of read_errors into check_decay_read_errors() and
clean up unnecessary atomic_read of read_errors.

Suggested-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Li Nan <linan122@huawei.com>
---
 drivers/md/raid10.c | 41 ++++++++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 17 deletions(-)

Comments

Song Liu May 22, 2023, 5:43 p.m. UTC | #1

On Mon, May 22, 2023 at 12:27 AM <linan666@huaweicloud.com> wrote:
>
> From: Li Nan <linan122@huawei.com>
>
> check_decay_read_errors() is used to handle rdev->read_errors. But
> read_errors is inc and read after check_decay_read_errors() is invoked
> in fix_read_error().
>
> Put all operations of read_errors into check_decay_read_errors() and
> clean up unnecessary atomic_read of read_errors.

If I understand correctly, this patch doesn't change the behavior of the
code. If this is the case, I guess we don't really need it. The original code
looks reasonable to me.

Thanks,
Song

>
> Suggested-by: Yu Kuai <yukuai3@huawei.com>
> Signed-off-by: Li Nan <linan122@huawei.com>
> ---
>  drivers/md/raid10.c | 41 ++++++++++++++++++++++++-----------------
>  1 file changed, 24 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 4fcfcb350d2b..d31eed17f186 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -2655,23 +2655,24 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
>  }
>
>  /*
> - * Used by fix_read_error() to decay the per rdev read_errors.
> + * Used by fix_read_error() to decay the per rdev read_errors and check if
> + * read_error > max_read_errors.
>   * We halve the read error count for every hour that has elapsed
>   * since the last recorded read error.
>   *
>   */
> -static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
> +static bool check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
>  {
> -       long cur_time_mon;
> +       time64_t cur_time_mon = ktime_get_seconds();
>         unsigned long hours_since_last;
> -       unsigned int read_errors = atomic_read(&rdev->read_errors);
> -
> -       cur_time_mon = ktime_get_seconds();
> +       unsigned int read_errors;
> +       unsigned int max_read_errors =
> +                       atomic_read(&mddev->max_corr_read_errors);
>
>         if (rdev->last_read_error == 0) {
>                 /* first time we've seen a read error */
>                 rdev->last_read_error = cur_time_mon;
> -               return;
> +               goto increase;
>         }
>
>         hours_since_last = (long)(cur_time_mon -
> @@ -2684,10 +2685,25 @@ static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
>          * just set read errors to 0. We do this to avoid
>          * overflowing the shift of read_errors by hours_since_last.
>          */
> +       read_errors = atomic_read(&rdev->read_errors);
>         if (hours_since_last >= 8 * sizeof(read_errors))
>                 atomic_set(&rdev->read_errors, 0);
>         else
>                 atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
> +
> +increase:
> +       read_errors = atomic_inc_return(&rdev->read_errors);
> +       if (read_errors > max_read_errors) {
> +               pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
> +                         mdname(mddev), rdev->bdev,
> +                         read_errors, max_read_errors);
> +               pr_notice("md/raid10:%s: %pg: Failing raid device\n",
> +                         mdname(mddev), rdev->bdev);
> +               md_error(mddev, rdev);
> +               return false;
> +       }
> +
> +       return true;
>  }
>
>  static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
> @@ -2727,7 +2743,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
>         int sect = 0; /* Offset from r10_bio->sector */
>         int sectors = r10_bio->sectors;
>         struct md_rdev *rdev;
> -       int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
>         int d = r10_bio->devs[r10_bio->read_slot].devnum;
>
>         /* still own a reference to this rdev, so it cannot
> @@ -2740,15 +2755,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
>                    more fix_read_error() attempts */
>                 return;
>
> -       check_decay_read_errors(mddev, rdev);
> -       atomic_inc(&rdev->read_errors);
> -       if (atomic_read(&rdev->read_errors) > max_read_errors) {
> -               pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
> -                         mdname(mddev), rdev->bdev,
> -                         atomic_read(&rdev->read_errors), max_read_errors);
> -               pr_notice("md/raid10:%s: %pg: Failing raid device\n",
> -                         mdname(mddev), rdev->bdev);
> -               md_error(mddev, rdev);
> +       if (!check_decay_read_errors(mddev, rdev)) {
>                 r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
>                 return;
>         }
> --
> 2.31.1
>

diff mbox series

Patch

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 4fcfcb350d2b..d31eed17f186 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2655,23 +2655,24 @@  static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 }
 
 /*
- * Used by fix_read_error() to decay the per rdev read_errors.
+ * Used by fix_read_error() to decay the per rdev read_errors and check if
+ * read_error > max_read_errors.
  * We halve the read error count for every hour that has elapsed
  * since the last recorded read error.
  *
  */
-static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
+static bool check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
 {
-	long cur_time_mon;
+	time64_t cur_time_mon = ktime_get_seconds();
 	unsigned long hours_since_last;
-	unsigned int read_errors = atomic_read(&rdev->read_errors);
-
-	cur_time_mon = ktime_get_seconds();
+	unsigned int read_errors;
+	unsigned int max_read_errors =
+			atomic_read(&mddev->max_corr_read_errors);
 
 	if (rdev->last_read_error == 0) {
 		/* first time we've seen a read error */
 		rdev->last_read_error = cur_time_mon;
-		return;
+		goto increase;
 	}
 
 	hours_since_last = (long)(cur_time_mon -
@@ -2684,10 +2685,25 @@  static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
 	 * just set read errors to 0. We do this to avoid
 	 * overflowing the shift of read_errors by hours_since_last.
 	 */
+	read_errors = atomic_read(&rdev->read_errors);
 	if (hours_since_last >= 8 * sizeof(read_errors))
 		atomic_set(&rdev->read_errors, 0);
 	else
 		atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
+
+increase:
+	read_errors = atomic_inc_return(&rdev->read_errors);
+	if (read_errors > max_read_errors) {
+		pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
+			  mdname(mddev), rdev->bdev,
+			  read_errors, max_read_errors);
+		pr_notice("md/raid10:%s: %pg: Failing raid device\n",
+			  mdname(mddev), rdev->bdev);
+		md_error(mddev, rdev);
+		return false;
+	}
+
+	return true;
 }
 
 static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
@@ -2727,7 +2743,6 @@  static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 	int sect = 0; /* Offset from r10_bio->sector */
 	int sectors = r10_bio->sectors;
 	struct md_rdev *rdev;
-	int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
 	int d = r10_bio->devs[r10_bio->read_slot].devnum;
 
 	/* still own a reference to this rdev, so it cannot
@@ -2740,15 +2755,7 @@  static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 		   more fix_read_error() attempts */
 		return;
 
-	check_decay_read_errors(mddev, rdev);
-	atomic_inc(&rdev->read_errors);
-	if (atomic_read(&rdev->read_errors) > max_read_errors) {
-		pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
-			  mdname(mddev), rdev->bdev,
-			  atomic_read(&rdev->read_errors), max_read_errors);
-		pr_notice("md/raid10:%s: %pg: Failing raid device\n",
-			  mdname(mddev), rdev->bdev);
-		md_error(mddev, rdev);
+	if (!check_decay_read_errors(mddev, rdev)) {
 		r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
 		return;
 	}