f2fs-tools: make six open zone check resilient

Message ID 20231018223925.2135987-1-daeho43@gmail.com
State New
Headers
Series f2fs-tools: make six open zone check resilient |

Commit Message

Daeho Jeong Oct. 18, 2023, 10:39 p.m. UTC
  From: Daeho Jeong <daehojeong@google.com>

We need to make sure to finish all the zones except six open zones. In
a case of that the previous mount wasn't successfully unmounted, we have
to change all the current segments.

Signed-off-by: Daeho Jeong <daehojeong@google.com>
---
 fsck/fsck.c         | 87 +++++++++++++++++++--------------------------
 include/f2fs_fs.h   |  1 +
 lib/libf2fs_zoned.c | 28 +++++++++++++++
 3 files changed, 65 insertions(+), 51 deletions(-)
  

Comments

Daeho Jeong Oct. 26, 2023, 2:56 a.m. UTC | #1
Let me fix one thing for this patch.

On Wed, Oct 18, 2023 at 3:39 PM Daeho Jeong <daeho43@gmail.com> wrote:
>
> From: Daeho Jeong <daehojeong@google.com>
>
> We need to make sure to finish all the zones except six open zones. In
> a case of that the previous mount wasn't successfully unmounted, we have
> to change all the current segments.
>
> Signed-off-by: Daeho Jeong <daehojeong@google.com>
> ---
>  fsck/fsck.c         | 87 +++++++++++++++++++--------------------------
>  include/f2fs_fs.h   |  1 +
>  lib/libf2fs_zoned.c | 28 +++++++++++++++
>  3 files changed, 65 insertions(+), 51 deletions(-)
>
> diff --git a/fsck/fsck.c b/fsck/fsck.c
> index 99cface..890b536 100644
> --- a/fsck/fsck.c
> +++ b/fsck/fsck.c
> @@ -2587,10 +2587,9 @@ static int check_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
>         struct curseg_info *curseg = CURSEG_I(sbi, type);
>         struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
>         struct blk_zone blkz;
> -       block_t cs_block, wp_block, zone_last_vblock;
> +       block_t cs_block, wp_block;
>         uint64_t cs_sector, wp_sector;
>         int i, ret;
> -       unsigned int zone_segno;
>         int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
>
>         /* get the device the curseg points to */
> @@ -2624,49 +2623,28 @@ static int check_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
>                 (blk_zone_wp_sector(&blkz) >> log_sectors_per_block);
>         wp_sector = blk_zone_wp_sector(&blkz);
>
> -       if (cs_sector == wp_sector)
> -               return 0;
> -
> -       if (cs_sector > wp_sector) {
> +       if (cs_sector == wp_sector) {
> +               if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
> +                       return 0;
> +               MSG(0, "Correct write pointer. But, we can't trust it, "
> +                   "since the previous mount wasn't safely unmounted: "
> +                   "curseg %d[0x%x,0x%x]\n",
> +                   type, curseg->segno, curseg->next_blkoff);
> +       } else if (cs_sector > wp_sector) {
>                 MSG(0, "Inconsistent write pointer with curseg %d: "
>                     "curseg %d[0x%x,0x%x] > wp[0x%x,0x%x]\n",
>                     type, type, curseg->segno, curseg->next_blkoff,
> +                   GET_SEGNO(sbi, wp_block),
> +                   OFFSET_IN_SEG(sbi, wp_block));
> +               if (!c.fix_on)
> +                       fsck->chk.wp_inconsistent_zones++;
> +       } else {
> +               MSG(0, "Write pointer goes advance from curseg %d: "
> +                   "curseg %d[0x%x,0x%x] wp[0x%x,0x%x]\n",
> +                   type, type, curseg->segno, curseg->next_blkoff,
>                     GET_SEGNO(sbi, wp_block), OFFSET_IN_SEG(sbi, wp_block));
> -               fsck->chk.wp_inconsistent_zones++;
> -               return -EINVAL;
> -       }
> -
> -       MSG(0, "Write pointer goes advance from curseg %d: "
> -           "curseg %d[0x%x,0x%x] wp[0x%x,0x%x]\n",
> -           type, type, curseg->segno, curseg->next_blkoff,
> -           GET_SEGNO(sbi, wp_block), OFFSET_IN_SEG(sbi, wp_block));
> -
> -       zone_segno = GET_SEG_FROM_SEC(sbi,
> -                                     GET_SEC_FROM_SEG(sbi, curseg->segno));
> -       zone_last_vblock = START_BLOCK(sbi, zone_segno) +
> -               last_vblk_off_in_zone(sbi, zone_segno);
> -
> -       /*
> -        * If valid blocks exist between the curseg position and the write
> -        * pointer, they are fsync data. This is not an error to fix. Leave it
> -        * for kernel to recover later.
> -        * If valid blocks exist between the curseg's zone start and the curseg
> -        * position, or if there is no valid block in the curseg's zone, fix
> -        * the inconsistency between the curseg and the writ pointer.
> -        * Of Note is that if there is no valid block in the curseg's zone,
> -        * last_vblk_off_in_zone() returns -1 and zone_last_vblock is always
> -        * smaller than cs_block.
> -        */
> -       if (cs_block <= zone_last_vblock && zone_last_vblock < wp_block) {
> -               MSG(0, "Curseg has fsync data: curseg %d[0x%x,0x%x] "
> -                   "last valid block in zone[0x%x,0x%x]\n",
> -                   type, curseg->segno, curseg->next_blkoff,
> -                   GET_SEGNO(sbi, zone_last_vblock),
> -                   OFFSET_IN_SEG(sbi, zone_last_vblock));
> -               return 0;
>         }
>
> -       fsck->chk.wp_inconsistent_zones++;
>         return -EINVAL;
>  }
>
> @@ -3155,10 +3133,8 @@ static int chk_and_fix_wp_with_sit(int UNUSED(i), void *blkzone, void *opaque)
>         struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
>         block_t zone_block, wp_block, wp_blkoff;
>         unsigned int zone_segno, wp_segno;
> -       struct curseg_info *cs;
> -       int cs_index, ret, last_valid_blkoff;
> +       int ret, last_valid_blkoff;
>         int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
> -       unsigned int segs_per_zone = sbi->segs_per_sec * sbi->secs_per_zone;
>
>         if (blk_zone_conv(blkz))
>                 return 0;
> @@ -3174,14 +3150,6 @@ static int chk_and_fix_wp_with_sit(int UNUSED(i), void *blkzone, void *opaque)
>         wp_segno = GET_SEGNO(sbi, wp_block);
>         wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
>
> -       /* if a curseg points to the zone, skip the check */
> -       for (cs_index = 0; cs_index < NO_CHECK_TYPE; cs_index++) {
> -               cs = &SM_I(sbi)->curseg_array[cs_index];
> -               if (zone_segno <= cs->segno &&
> -                   cs->segno < zone_segno + segs_per_zone)
> -                       return 0;
> -       }
> -
>         last_valid_blkoff = last_vblk_off_in_zone(sbi, zone_segno);
>
>         /*
> @@ -3217,10 +3185,27 @@ static int chk_and_fix_wp_with_sit(int UNUSED(i), void *blkzone, void *opaque)
>         if (last_valid_blkoff + zone_block > wp_block) {
>                 MSG(0, "Unexpected invalid write pointer: wp[0x%x,0x%x]\n",
>                     wp_segno, wp_blkoff);
> +               if (!c.fix_on)
> +                       fsck->chk.wp_inconsistent_zones++;
> +       }
> +
> +       if (!c.fix_on)
>                 return 0;
> +
> +       ret = f2fs_finish_zone(wpd->dev_index, blkz);
> +       if (ret) {
> +               u64 fill_sects = blk_zone_length(blkz) -
> +                       (blk_zone_wp_sector(blkz) - blk_zone_sector(blkz));
> +               printf("[FSCK] Finishing zone failed: %s\n", dev->path);
> +               ret = dev_fill(NULL, wp_block * F2FS_BLKSIZE,
> +                       (fill_sects >> log_sectors_per_block) * F2FS_BLKSIZE);
> +               if (ret)
> +                       printf("[FSCK] Fill up zone failed: %s\n", dev->path);
>         }
>
> -       return 0;
> +       if (!ret)
> +               fsck->chk.wp_fixed = 1;
> +       return ret;
>  }
>
>  static void fix_wp_sit_alignment(struct f2fs_sb_info *sbi)
> diff --git a/include/f2fs_fs.h b/include/f2fs_fs.h
> index 772a6a5..d94e447 100644
> --- a/include/f2fs_fs.h
> +++ b/include/f2fs_fs.h
> @@ -1749,6 +1749,7 @@ extern int f2fs_report_zones(int, report_zones_cb_t *, void *);
>  extern int f2fs_check_zones(int);
>  int f2fs_reset_zone(int, void *);
>  extern int f2fs_reset_zones(int);
> +int f2fs_finish_zone(int i, void *blkzone);
>  extern uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb);
>
>  #define SIZE_ALIGN(val, size)  (((val) + (size) - 1) / (size))
> diff --git a/lib/libf2fs_zoned.c b/lib/libf2fs_zoned.c
> index 2ab2497..ba9286f 100644
> --- a/lib/libf2fs_zoned.c
> +++ b/lib/libf2fs_zoned.c
> @@ -502,6 +502,28 @@ out:
>         return ret;
>  }
>
> +int f2fs_finish_zone(int i, void *blkzone)
> +{
> +       struct blk_zone *blkz = (struct blk_zone *)blkzone;
> +       struct device_info *dev = c.devices + i;
> +       struct blk_zone_range range;
> +       int ret;
> +
> +       if (!blk_zone_seq(blkz) || blk_zone_empty(blkz))
> +               return 0;
> +
> +       /* Non empty sequential zone: finish */
> +       range.sector = blk_zone_sector(blkz);
> +       range.nr_sectors = blk_zone_length(blkz);
> +       ret = ioctl(dev->fd, BLKFINISHZONE, &range);
> +       if (ret != 0) {
> +               ret = -errno;
> +               ERR_MSG("ioctl BLKFINISHZONE failed: errno=%d\n", errno);
> +       }
> +
> +       return ret;
> +}
> +
>  uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb)
>  {
>  #ifdef HAVE_BLK_ZONE_REP_V2
> @@ -588,6 +610,12 @@ int f2fs_reset_zones(int i)
>         return -1;
>  }
>
> +int f2fs_finish_zone(int i, void *UNUSED(blkzone))
> +{
> +       ERR_MSG("%d: Unsupported zoned block device\n", i);
> +       return -1;
> +}
> +
>  uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb)
>  {
>         return get_sb(segment_count_main);
> --
> 2.42.0.655.g421f12c284-goog
>
  

Patch

diff --git a/fsck/fsck.c b/fsck/fsck.c
index 99cface..890b536 100644
--- a/fsck/fsck.c
+++ b/fsck/fsck.c
@@ -2587,10 +2587,9 @@  static int check_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
 	struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
 	struct blk_zone blkz;
-	block_t cs_block, wp_block, zone_last_vblock;
+	block_t cs_block, wp_block;
 	uint64_t cs_sector, wp_sector;
 	int i, ret;
-	unsigned int zone_segno;
 	int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
 
 	/* get the device the curseg points to */
@@ -2624,49 +2623,28 @@  static int check_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
 		(blk_zone_wp_sector(&blkz) >> log_sectors_per_block);
 	wp_sector = blk_zone_wp_sector(&blkz);
 
-	if (cs_sector == wp_sector)
-		return 0;
-
-	if (cs_sector > wp_sector) {
+	if (cs_sector == wp_sector) {
+		if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
+			return 0;
+		MSG(0, "Correct write pointer. But, we can't trust it, "
+		    "since the previous mount wasn't safely unmounted: "
+		    "curseg %d[0x%x,0x%x]\n",
+		    type, curseg->segno, curseg->next_blkoff);
+	} else if (cs_sector > wp_sector) {
 		MSG(0, "Inconsistent write pointer with curseg %d: "
 		    "curseg %d[0x%x,0x%x] > wp[0x%x,0x%x]\n",
 		    type, type, curseg->segno, curseg->next_blkoff,
+		    GET_SEGNO(sbi, wp_block),
+		    OFFSET_IN_SEG(sbi, wp_block));
+		if (!c.fix_on)
+			fsck->chk.wp_inconsistent_zones++;
+	} else {
+		MSG(0, "Write pointer goes advance from curseg %d: "
+		    "curseg %d[0x%x,0x%x] wp[0x%x,0x%x]\n",
+		    type, type, curseg->segno, curseg->next_blkoff,
 		    GET_SEGNO(sbi, wp_block), OFFSET_IN_SEG(sbi, wp_block));
-		fsck->chk.wp_inconsistent_zones++;
-		return -EINVAL;
-	}
-
-	MSG(0, "Write pointer goes advance from curseg %d: "
-	    "curseg %d[0x%x,0x%x] wp[0x%x,0x%x]\n",
-	    type, type, curseg->segno, curseg->next_blkoff,
-	    GET_SEGNO(sbi, wp_block), OFFSET_IN_SEG(sbi, wp_block));
-
-	zone_segno = GET_SEG_FROM_SEC(sbi,
-				      GET_SEC_FROM_SEG(sbi, curseg->segno));
-	zone_last_vblock = START_BLOCK(sbi, zone_segno) +
-		last_vblk_off_in_zone(sbi, zone_segno);
-
-	/*
-	 * If valid blocks exist between the curseg position and the write
-	 * pointer, they are fsync data. This is not an error to fix. Leave it
-	 * for kernel to recover later.
-	 * If valid blocks exist between the curseg's zone start and the curseg
-	 * position, or if there is no valid block in the curseg's zone, fix
-	 * the inconsistency between the curseg and the writ pointer.
-	 * Of Note is that if there is no valid block in the curseg's zone,
-	 * last_vblk_off_in_zone() returns -1 and zone_last_vblock is always
-	 * smaller than cs_block.
-	 */
-	if (cs_block <= zone_last_vblock && zone_last_vblock < wp_block) {
-		MSG(0, "Curseg has fsync data: curseg %d[0x%x,0x%x] "
-		    "last valid block in zone[0x%x,0x%x]\n",
-		    type, curseg->segno, curseg->next_blkoff,
-		    GET_SEGNO(sbi, zone_last_vblock),
-		    OFFSET_IN_SEG(sbi, zone_last_vblock));
-		return 0;
 	}
 
-	fsck->chk.wp_inconsistent_zones++;
 	return -EINVAL;
 }
 
@@ -3155,10 +3133,8 @@  static int chk_and_fix_wp_with_sit(int UNUSED(i), void *blkzone, void *opaque)
 	struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
 	block_t zone_block, wp_block, wp_blkoff;
 	unsigned int zone_segno, wp_segno;
-	struct curseg_info *cs;
-	int cs_index, ret, last_valid_blkoff;
+	int ret, last_valid_blkoff;
 	int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
-	unsigned int segs_per_zone = sbi->segs_per_sec * sbi->secs_per_zone;
 
 	if (blk_zone_conv(blkz))
 		return 0;
@@ -3174,14 +3150,6 @@  static int chk_and_fix_wp_with_sit(int UNUSED(i), void *blkzone, void *opaque)
 	wp_segno = GET_SEGNO(sbi, wp_block);
 	wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
 
-	/* if a curseg points to the zone, skip the check */
-	for (cs_index = 0; cs_index < NO_CHECK_TYPE; cs_index++) {
-		cs = &SM_I(sbi)->curseg_array[cs_index];
-		if (zone_segno <= cs->segno &&
-		    cs->segno < zone_segno + segs_per_zone)
-			return 0;
-	}
-
 	last_valid_blkoff = last_vblk_off_in_zone(sbi, zone_segno);
 
 	/*
@@ -3217,10 +3185,27 @@  static int chk_and_fix_wp_with_sit(int UNUSED(i), void *blkzone, void *opaque)
 	if (last_valid_blkoff + zone_block > wp_block) {
 		MSG(0, "Unexpected invalid write pointer: wp[0x%x,0x%x]\n",
 		    wp_segno, wp_blkoff);
+		if (!c.fix_on)
+			fsck->chk.wp_inconsistent_zones++;
+	}
+
+	if (!c.fix_on)
 		return 0;
+
+	ret = f2fs_finish_zone(wpd->dev_index, blkz);
+	if (ret) {
+		u64 fill_sects = blk_zone_length(blkz) -
+			(blk_zone_wp_sector(blkz) - blk_zone_sector(blkz));
+		printf("[FSCK] Finishing zone failed: %s\n", dev->path);
+		ret = dev_fill(NULL, wp_block * F2FS_BLKSIZE,
+			(fill_sects >> log_sectors_per_block) * F2FS_BLKSIZE);
+		if (ret)
+			printf("[FSCK] Fill up zone failed: %s\n", dev->path);
 	}
 
-	return 0;
+	if (!ret)
+		fsck->chk.wp_fixed = 1;
+	return ret;
 }
 
 static void fix_wp_sit_alignment(struct f2fs_sb_info *sbi)
diff --git a/include/f2fs_fs.h b/include/f2fs_fs.h
index 772a6a5..d94e447 100644
--- a/include/f2fs_fs.h
+++ b/include/f2fs_fs.h
@@ -1749,6 +1749,7 @@  extern int f2fs_report_zones(int, report_zones_cb_t *, void *);
 extern int f2fs_check_zones(int);
 int f2fs_reset_zone(int, void *);
 extern int f2fs_reset_zones(int);
+int f2fs_finish_zone(int i, void *blkzone);
 extern uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb);
 
 #define SIZE_ALIGN(val, size)	(((val) + (size) - 1) / (size))
diff --git a/lib/libf2fs_zoned.c b/lib/libf2fs_zoned.c
index 2ab2497..ba9286f 100644
--- a/lib/libf2fs_zoned.c
+++ b/lib/libf2fs_zoned.c
@@ -502,6 +502,28 @@  out:
 	return ret;
 }
 
+int f2fs_finish_zone(int i, void *blkzone)
+{
+	struct blk_zone *blkz = (struct blk_zone *)blkzone;
+	struct device_info *dev = c.devices + i;
+	struct blk_zone_range range;
+	int ret;
+
+	if (!blk_zone_seq(blkz) || blk_zone_empty(blkz))
+		return 0;
+
+	/* Non empty sequential zone: finish */
+	range.sector = blk_zone_sector(blkz);
+	range.nr_sectors = blk_zone_length(blkz);
+	ret = ioctl(dev->fd, BLKFINISHZONE, &range);
+	if (ret != 0) {
+		ret = -errno;
+		ERR_MSG("ioctl BLKFINISHZONE failed: errno=%d\n", errno);
+	}
+
+	return ret;
+}
+
 uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb)
 {
 #ifdef HAVE_BLK_ZONE_REP_V2
@@ -588,6 +610,12 @@  int f2fs_reset_zones(int i)
 	return -1;
 }
 
+int f2fs_finish_zone(int i, void *UNUSED(blkzone))
+{
+	ERR_MSG("%d: Unsupported zoned block device\n", i);
+	return -1;
+}
+
 uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb)
 {
 	return get_sb(segment_count_main);