[v3,14/23] f2fs: Convert f2fs_write_cache_pages() to use filemap_get_folios_tag()

Message ID 20221017202451.4951-15-vishal.moola@gmail.com
State New
Headers
Series Convert to filemap_get_folios_tag() |

Commit Message

Vishal Moola Oct. 17, 2022, 8:24 p.m. UTC
  Converted the function to use a folio_batch instead of pagevec. This is in
preparation for the removal of find_get_pages_range_tag().

Also modified f2fs_all_cluster_page_ready to take in a folio_batch instead
of pagevec. This does NOT support large folios. The function currently
only utilizes folios of size 1 so this shouldn't cause any issues right
now.

Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
---
 fs/f2fs/compress.c | 13 +++++----
 fs/f2fs/data.c     | 69 +++++++++++++++++++++++++---------------------
 fs/f2fs/f2fs.h     |  5 ++--
 3 files changed, 47 insertions(+), 40 deletions(-)
  

Comments

Chao Yu Nov. 14, 2022, 7:02 a.m. UTC | #1
On 2022/10/18 4:24, Vishal Moola (Oracle) wrote:
> Converted the function to use a folio_batch instead of pagevec. This is in
> preparation for the removal of find_get_pages_range_tag().
> 
> Also modified f2fs_all_cluster_page_ready to take in a folio_batch instead
> of pagevec. This does NOT support large folios. The function currently

Vishal,

It looks this patch tries to revert Fengnan's change:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=01fc4b9a6ed8eacb64e5609bab7ac963e1c7e486

How about doing some tests to evaluate its performance effect?

+Cc Fengnan Chang

Thanks,

> only utilizes folios of size 1 so this shouldn't cause any issues right
> now.
> 
> Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
> ---
>   fs/f2fs/compress.c | 13 +++++----
>   fs/f2fs/data.c     | 69 +++++++++++++++++++++++++---------------------
>   fs/f2fs/f2fs.h     |  5 ++--
>   3 files changed, 47 insertions(+), 40 deletions(-)
> 
> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> index d315c2de136f..7af6c923e0aa 100644
> --- a/fs/f2fs/compress.c
> +++ b/fs/f2fs/compress.c
> @@ -842,10 +842,11 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index)
>   	return is_page_in_cluster(cc, index);
>   }
>   
> -bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages,
> -				int index, int nr_pages, bool uptodate)
> +bool f2fs_all_cluster_page_ready(struct compress_ctx *cc,
> +				struct folio_batch *fbatch,
> +				int index, int nr_folios, bool uptodate)
>   {
> -	unsigned long pgidx = pages[index]->index;
> +	unsigned long pgidx = fbatch->folios[index]->index;
>   	int i = uptodate ? 0 : 1;
>   
>   	/*
> @@ -855,13 +856,13 @@ bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages,
>   	if (uptodate && (pgidx % cc->cluster_size))
>   		return false;
>   
> -	if (nr_pages - index < cc->cluster_size)
> +	if (nr_folios - index < cc->cluster_size)
>   		return false;
>   
>   	for (; i < cc->cluster_size; i++) {
> -		if (pages[index + i]->index != pgidx + i)
> +		if (fbatch->folios[index + i]->index != pgidx + i)
>   			return false;
> -		if (uptodate && !PageUptodate(pages[index + i]))
> +		if (uptodate && !folio_test_uptodate(fbatch->folios[index + i]))
>   			return false;
>   	}
>   
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index a71e818cd67b..7511578b73c3 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -2938,7 +2938,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
>   {
>   	int ret = 0;
>   	int done = 0, retry = 0;
> -	struct page *pages[F2FS_ONSTACK_PAGES];
> +	struct folio_batch fbatch;
>   	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
>   	struct bio *bio = NULL;
>   	sector_t last_block;
> @@ -2959,7 +2959,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
>   		.private = NULL,
>   	};
>   #endif
> -	int nr_pages;
> +	int nr_folios;
>   	pgoff_t index;
>   	pgoff_t end;		/* Inclusive */
>   	pgoff_t done_index;
> @@ -2969,6 +2969,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
>   	int submitted = 0;
>   	int i;
>   
> +	folio_batch_init(&fbatch);
> +
>   	if (get_dirty_pages(mapping->host) <=
>   				SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
>   		set_inode_flag(mapping->host, FI_HOT_DATA);
> @@ -2994,13 +2996,13 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
>   		tag_pages_for_writeback(mapping, index, end);
>   	done_index = index;
>   	while (!done && !retry && (index <= end)) {
> -		nr_pages = find_get_pages_range_tag(mapping, &index, end,
> -				tag, F2FS_ONSTACK_PAGES, pages);
> -		if (nr_pages == 0)
> +		nr_folios = filemap_get_folios_tag(mapping, &index, end,
> +				tag, &fbatch);
> +		if (nr_folios == 0)
>   			break;
>   
> -		for (i = 0; i < nr_pages; i++) {
> -			struct page *page = pages[i];
> +		for (i = 0; i < nr_folios; i++) {
> +			struct folio *folio = fbatch.folios[i];
>   			bool need_readd;
>   readd:
>   			need_readd = false;
> @@ -3017,7 +3019,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
>   				}
>   
>   				if (!f2fs_cluster_can_merge_page(&cc,
> -								page->index)) {
> +								folio->index)) {
>   					ret = f2fs_write_multi_pages(&cc,
>   						&submitted, wbc, io_type);
>   					if (!ret)
> @@ -3026,27 +3028,28 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
>   				}
>   
>   				if (unlikely(f2fs_cp_error(sbi)))
> -					goto lock_page;
> +					goto lock_folio;
>   
>   				if (!f2fs_cluster_is_empty(&cc))
> -					goto lock_page;
> +					goto lock_folio;
>   
>   				if (f2fs_all_cluster_page_ready(&cc,
> -					pages, i, nr_pages, true))
> -					goto lock_page;
> +					&fbatch, i, nr_folios, true))
> +					goto lock_folio;
>   
>   				ret2 = f2fs_prepare_compress_overwrite(
>   							inode, &pagep,
> -							page->index, &fsdata);
> +							folio->index, &fsdata);
>   				if (ret2 < 0) {
>   					ret = ret2;
>   					done = 1;
>   					break;
>   				} else if (ret2 &&
>   					(!f2fs_compress_write_end(inode,
> -						fsdata, page->index, 1) ||
> +						fsdata, folio->index, 1) ||
>   					 !f2fs_all_cluster_page_ready(&cc,
> -						pages, i, nr_pages, false))) {
> +						&fbatch, i, nr_folios,
> +						false))) {
>   					retry = 1;
>   					break;
>   				}
> @@ -3059,46 +3062,47 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
>   				break;
>   			}
>   #ifdef CONFIG_F2FS_FS_COMPRESSION
> -lock_page:
> +lock_folio:
>   #endif
> -			done_index = page->index;
> +			done_index = folio->index;
>   retry_write:
> -			lock_page(page);
> +			folio_lock(folio);
>   
> -			if (unlikely(page->mapping != mapping)) {
> +			if (unlikely(folio->mapping != mapping)) {
>   continue_unlock:
> -				unlock_page(page);
> +				folio_unlock(folio);
>   				continue;
>   			}
>   
> -			if (!PageDirty(page)) {
> +			if (!folio_test_dirty(folio)) {
>   				/* someone wrote it for us */
>   				goto continue_unlock;
>   			}
>   
> -			if (PageWriteback(page)) {
> +			if (folio_test_writeback(folio)) {
>   				if (wbc->sync_mode != WB_SYNC_NONE)
> -					f2fs_wait_on_page_writeback(page,
> +					f2fs_wait_on_page_writeback(
> +							&folio->page,
>   							DATA, true, true);
>   				else
>   					goto continue_unlock;
>   			}
>   
> -			if (!clear_page_dirty_for_io(page))
> +			if (!folio_clear_dirty_for_io(folio))
>   				goto continue_unlock;
>   
>   #ifdef CONFIG_F2FS_FS_COMPRESSION
>   			if (f2fs_compressed_file(inode)) {
> -				get_page(page);
> -				f2fs_compress_ctx_add_page(&cc, page);
> +				folio_get(folio);
> +				f2fs_compress_ctx_add_page(&cc, &folio->page);
>   				continue;
>   			}
>   #endif
> -			ret = f2fs_write_single_data_page(page, &submitted,
> -					&bio, &last_block, wbc, io_type,
> -					0, true);
> +			ret = f2fs_write_single_data_page(&folio->page,
> +					&submitted, &bio, &last_block,
> +					wbc, io_type, 0, true);
>   			if (ret == AOP_WRITEPAGE_ACTIVATE)
> -				unlock_page(page);
> +				folio_unlock(folio);
>   #ifdef CONFIG_F2FS_FS_COMPRESSION
>   result:
>   #endif
> @@ -3122,7 +3126,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
>   					}
>   					goto next;
>   				}
> -				done_index = page->index + 1;
> +				done_index = folio->index +
> +					folio_nr_pages(folio);
>   				done = 1;
>   				break;
>   			}
> @@ -3136,7 +3141,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
>   			if (need_readd)
>   				goto readd;
>   		}
> -		release_pages(pages, nr_pages);
> +		folio_batch_release(&fbatch);
>   		cond_resched();
>   	}
>   #ifdef CONFIG_F2FS_FS_COMPRESSION
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index e6355a5683b7..d7bfb88fa341 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -4226,8 +4226,9 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed,
>   				block_t blkaddr, bool in_task);
>   bool f2fs_cluster_is_empty(struct compress_ctx *cc);
>   bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
> -bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages,
> -				int index, int nr_pages, bool uptodate);
> +bool f2fs_all_cluster_page_ready(struct compress_ctx *cc,
> +		struct folio_batch *fbatch, int index, int nr_folios,
> +		bool uptodate);
>   bool f2fs_sanity_check_cluster(struct dnode_of_data *dn);
>   void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page);
>   int f2fs_write_multi_pages(struct compress_ctx *cc,
  
Vishal Moola Nov. 14, 2022, 9:38 p.m. UTC | #2
On Sun, Nov 13, 2022 at 11:02 PM Chao Yu <chao@kernel.org> wrote:
>
> On 2022/10/18 4:24, Vishal Moola (Oracle) wrote:
> > Converted the function to use a folio_batch instead of pagevec. This is in
> > preparation for the removal of find_get_pages_range_tag().
> >
> > Also modified f2fs_all_cluster_page_ready to take in a folio_batch instead
> > of pagevec. This does NOT support large folios. The function currently
>
> Vishal,
>
> It looks this patch tries to revert Fengnan's change:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=01fc4b9a6ed8eacb64e5609bab7ac963e1c7e486
>
> How about doing some tests to evaluate its performance effect?

Yeah I'll play around with it to see how much of a difference it makes.

> +Cc Fengnan Chang
>
> Thanks,
>
> > only utilizes folios of size 1 so this shouldn't cause any issues right
> > now.
> >
> > Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
> > ---
> >   fs/f2fs/compress.c | 13 +++++----
> >   fs/f2fs/data.c     | 69 +++++++++++++++++++++++++---------------------
> >   fs/f2fs/f2fs.h     |  5 ++--
> >   3 files changed, 47 insertions(+), 40 deletions(-)
> >
> > diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> > index d315c2de136f..7af6c923e0aa 100644
> > --- a/fs/f2fs/compress.c
> > +++ b/fs/f2fs/compress.c
> > @@ -842,10 +842,11 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index)
> >       return is_page_in_cluster(cc, index);
> >   }
> >
> > -bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages,
> > -                             int index, int nr_pages, bool uptodate)
> > +bool f2fs_all_cluster_page_ready(struct compress_ctx *cc,
> > +                             struct folio_batch *fbatch,
> > +                             int index, int nr_folios, bool uptodate)
> >   {
> > -     unsigned long pgidx = pages[index]->index;
> > +     unsigned long pgidx = fbatch->folios[index]->index;
> >       int i = uptodate ? 0 : 1;
> >
> >       /*
> > @@ -855,13 +856,13 @@ bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages,
> >       if (uptodate && (pgidx % cc->cluster_size))
> >               return false;
> >
> > -     if (nr_pages - index < cc->cluster_size)
> > +     if (nr_folios - index < cc->cluster_size)
> >               return false;
> >
> >       for (; i < cc->cluster_size; i++) {
> > -             if (pages[index + i]->index != pgidx + i)
> > +             if (fbatch->folios[index + i]->index != pgidx + i)
> >                       return false;
> > -             if (uptodate && !PageUptodate(pages[index + i]))
> > +             if (uptodate && !folio_test_uptodate(fbatch->folios[index + i]))
> >                       return false;
> >       }
> >
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index a71e818cd67b..7511578b73c3 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -2938,7 +2938,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
> >   {
> >       int ret = 0;
> >       int done = 0, retry = 0;
> > -     struct page *pages[F2FS_ONSTACK_PAGES];
> > +     struct folio_batch fbatch;
> >       struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
> >       struct bio *bio = NULL;
> >       sector_t last_block;
> > @@ -2959,7 +2959,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
> >               .private = NULL,
> >       };
> >   #endif
> > -     int nr_pages;
> > +     int nr_folios;
> >       pgoff_t index;
> >       pgoff_t end;            /* Inclusive */
> >       pgoff_t done_index;
> > @@ -2969,6 +2969,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
> >       int submitted = 0;
> >       int i;
> >
> > +     folio_batch_init(&fbatch);
> > +
> >       if (get_dirty_pages(mapping->host) <=
> >                               SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
> >               set_inode_flag(mapping->host, FI_HOT_DATA);
> > @@ -2994,13 +2996,13 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
> >               tag_pages_for_writeback(mapping, index, end);
> >       done_index = index;
> >       while (!done && !retry && (index <= end)) {
> > -             nr_pages = find_get_pages_range_tag(mapping, &index, end,
> > -                             tag, F2FS_ONSTACK_PAGES, pages);
> > -             if (nr_pages == 0)
> > +             nr_folios = filemap_get_folios_tag(mapping, &index, end,
> > +                             tag, &fbatch);
> > +             if (nr_folios == 0)
> >                       break;
> >
> > -             for (i = 0; i < nr_pages; i++) {
> > -                     struct page *page = pages[i];
> > +             for (i = 0; i < nr_folios; i++) {
> > +                     struct folio *folio = fbatch.folios[i];
> >                       bool need_readd;
> >   readd:
> >                       need_readd = false;
> > @@ -3017,7 +3019,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
> >                               }
> >
> >                               if (!f2fs_cluster_can_merge_page(&cc,
> > -                                                             page->index)) {
> > +                                                             folio->index)) {
> >                                       ret = f2fs_write_multi_pages(&cc,
> >                                               &submitted, wbc, io_type);
> >                                       if (!ret)
> > @@ -3026,27 +3028,28 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
> >                               }
> >
> >                               if (unlikely(f2fs_cp_error(sbi)))
> > -                                     goto lock_page;
> > +                                     goto lock_folio;
> >
> >                               if (!f2fs_cluster_is_empty(&cc))
> > -                                     goto lock_page;
> > +                                     goto lock_folio;
> >
> >                               if (f2fs_all_cluster_page_ready(&cc,
> > -                                     pages, i, nr_pages, true))
> > -                                     goto lock_page;
> > +                                     &fbatch, i, nr_folios, true))
> > +                                     goto lock_folio;
> >
> >                               ret2 = f2fs_prepare_compress_overwrite(
> >                                                       inode, &pagep,
> > -                                                     page->index, &fsdata);
> > +                                                     folio->index, &fsdata);
> >                               if (ret2 < 0) {
> >                                       ret = ret2;
> >                                       done = 1;
> >                                       break;
> >                               } else if (ret2 &&
> >                                       (!f2fs_compress_write_end(inode,
> > -                                             fsdata, page->index, 1) ||
> > +                                             fsdata, folio->index, 1) ||
> >                                        !f2fs_all_cluster_page_ready(&cc,
> > -                                             pages, i, nr_pages, false))) {
> > +                                             &fbatch, i, nr_folios,
> > +                                             false))) {
> >                                       retry = 1;
> >                                       break;
> >                               }
> > @@ -3059,46 +3062,47 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
> >                               break;
> >                       }
> >   #ifdef CONFIG_F2FS_FS_COMPRESSION
> > -lock_page:
> > +lock_folio:
> >   #endif
> > -                     done_index = page->index;
> > +                     done_index = folio->index;
> >   retry_write:
> > -                     lock_page(page);
> > +                     folio_lock(folio);
> >
> > -                     if (unlikely(page->mapping != mapping)) {
> > +                     if (unlikely(folio->mapping != mapping)) {
> >   continue_unlock:
> > -                             unlock_page(page);
> > +                             folio_unlock(folio);
> >                               continue;
> >                       }
> >
> > -                     if (!PageDirty(page)) {
> > +                     if (!folio_test_dirty(folio)) {
> >                               /* someone wrote it for us */
> >                               goto continue_unlock;
> >                       }
> >
> > -                     if (PageWriteback(page)) {
> > +                     if (folio_test_writeback(folio)) {
> >                               if (wbc->sync_mode != WB_SYNC_NONE)
> > -                                     f2fs_wait_on_page_writeback(page,
> > +                                     f2fs_wait_on_page_writeback(
> > +                                                     &folio->page,
> >                                                       DATA, true, true);
> >                               else
> >                                       goto continue_unlock;
> >                       }
> >
> > -                     if (!clear_page_dirty_for_io(page))
> > +                     if (!folio_clear_dirty_for_io(folio))
> >                               goto continue_unlock;
> >
> >   #ifdef CONFIG_F2FS_FS_COMPRESSION
> >                       if (f2fs_compressed_file(inode)) {
> > -                             get_page(page);
> > -                             f2fs_compress_ctx_add_page(&cc, page);
> > +                             folio_get(folio);
> > +                             f2fs_compress_ctx_add_page(&cc, &folio->page);
> >                               continue;
> >                       }
> >   #endif
> > -                     ret = f2fs_write_single_data_page(page, &submitted,
> > -                                     &bio, &last_block, wbc, io_type,
> > -                                     0, true);
> > +                     ret = f2fs_write_single_data_page(&folio->page,
> > +                                     &submitted, &bio, &last_block,
> > +                                     wbc, io_type, 0, true);
> >                       if (ret == AOP_WRITEPAGE_ACTIVATE)
> > -                             unlock_page(page);
> > +                             folio_unlock(folio);
> >   #ifdef CONFIG_F2FS_FS_COMPRESSION
> >   result:
> >   #endif
> > @@ -3122,7 +3126,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
> >                                       }
> >                                       goto next;
> >                               }
> > -                             done_index = page->index + 1;
> > +                             done_index = folio->index +
> > +                                     folio_nr_pages(folio);
> >                               done = 1;
> >                               break;
> >                       }
> > @@ -3136,7 +3141,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
> >                       if (need_readd)
> >                               goto readd;
> >               }
> > -             release_pages(pages, nr_pages);
> > +             folio_batch_release(&fbatch);
> >               cond_resched();
> >       }
> >   #ifdef CONFIG_F2FS_FS_COMPRESSION
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index e6355a5683b7..d7bfb88fa341 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -4226,8 +4226,9 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed,
> >                               block_t blkaddr, bool in_task);
> >   bool f2fs_cluster_is_empty(struct compress_ctx *cc);
> >   bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
> > -bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages,
> > -                             int index, int nr_pages, bool uptodate);
> > +bool f2fs_all_cluster_page_ready(struct compress_ctx *cc,
> > +             struct folio_batch *fbatch, int index, int nr_folios,
> > +             bool uptodate);
> >   bool f2fs_sanity_check_cluster(struct dnode_of_data *dn);
> >   void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page);
> >   int f2fs_write_multi_pages(struct compress_ctx *cc,
  
Vishal Moola Nov. 23, 2022, 2:26 a.m. UTC | #3
On Mon, Nov 14, 2022 at 1:38 PM Vishal Moola <vishal.moola@gmail.com> wrote:
>
> On Sun, Nov 13, 2022 at 11:02 PM Chao Yu <chao@kernel.org> wrote:
> >
> > On 2022/10/18 4:24, Vishal Moola (Oracle) wrote:
> > > Converted the function to use a folio_batch instead of pagevec. This is in
> > > preparation for the removal of find_get_pages_range_tag().
> > >
> > > Also modified f2fs_all_cluster_page_ready to take in a folio_batch instead
> > > of pagevec. This does NOT support large folios. The function currently
> >
> > Vishal,
> >
> > It looks this patch tries to revert Fengnan's change:
> >
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=01fc4b9a6ed8eacb64e5609bab7ac963e1c7e486
> >
> > How about doing some tests to evaluate its performance effect?
>
> Yeah I'll play around with it to see how much of a difference it makes.

I did some testing. Looks like reverting Fengnan's change allows for
occasional, but significant, spikes in write latency. I'll work on a variation
of the patch that maintains the use of F2FS_ONSTACK_PAGES and send
that in the next version of the patch series. Thanks for pointing that out!

How do the remaining f2fs patches in the series look to you?
Patch 16/23 f2fs_sync_meta_pages() in particular seems like it may
be prone to problems. If there are any changes that need to be made to
it I can include those in the next version as well.
  
Vishal Moola Nov. 23, 2022, 7:51 a.m. UTC | #4
On Tue, Nov 22, 2022 at 6:26 PM Vishal Moola <vishal.moola@gmail.com> wrote:
>
> On Mon, Nov 14, 2022 at 1:38 PM Vishal Moola <vishal.moola@gmail.com> wrote:
> >
> > On Sun, Nov 13, 2022 at 11:02 PM Chao Yu <chao@kernel.org> wrote:
> > >
> > > On 2022/10/18 4:24, Vishal Moola (Oracle) wrote:
> > > > Converted the function to use a folio_batch instead of pagevec. This is in
> > > > preparation for the removal of find_get_pages_range_tag().
> > > >
> > > > Also modified f2fs_all_cluster_page_ready to take in a folio_batch instead
> > > > of pagevec. This does NOT support large folios. The function currently
> > >
> > > Vishal,
> > >
> > > It looks this patch tries to revert Fengnan's change:
> > >
> > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=01fc4b9a6ed8eacb64e5609bab7ac963e1c7e486
> > >
> > > How about doing some tests to evaluate its performance effect?
> >
> > Yeah I'll play around with it to see how much of a difference it makes.
>
> I did some testing. Looks like reverting Fengnan's change allows for
> occasional, but significant, spikes in write latency. I'll work on a variation
> of the patch that maintains the use of F2FS_ONSTACK_PAGES and send
> that in the next version of the patch series. Thanks for pointing that out!

Here are some numbers for reference to performance. I'm thinking we may
want to go with the new version, but I'll let you be the judge of that.
I ran some fio random write tests with block size 64k on a system with 8 cpus.

1 job with 1 io-depth:
Baseline:
  slat (usec): min=8, max=849, avg=16.47, stdev=12.33
  clat (nsec): min=253, max=751838, avg=346.51, stdev=2452.10
  lat (usec): min=9, max=854, avg=17.00, stdev=12.74
  lat (nsec)   : 500=97.09%, 750=1.73%, 1000=0.57%
  lat (usec)   : 2=0.41%, 4=0.09%, 10=0.06%, 20=0.04%, 50=0.01%
  lat (usec)   : 100=0.01%, 1000=0.01%

This patch:
  slat (usec): min=9, max=3690, avg=16.61, stdev=17.36
  clat (nsec): min=28, max=380434, avg=336.59, stdev=1571.23
  lat (usec): min=10, max=3699, avg=17.13, stdev=17.51
  lat (nsec)   : 50=0.01%, 500=97.95%, 750=1.42%, 1000=0.33%
  lat (usec)   : 2=0.19%, 4=0.05%, 10=0.03%, 20=0.03%, 50=0.01%
  lat (usec)   : 100=0.01%, 250=0.01%, 500=0.01%

Folios w/ F2FS_ONSTACK_PAGES (next version):
  slat (usec): min=12, max=13623, avg=19.48, stdev=48.94
  clat (nsec): min=265, max=386917, avg=380.97, stdev=1679.85
  lat (usec): min=12, max=13635, avg=20.06, stdev=49.27
  lat (nsec)   : 500=93.55%, 750=4.62%, 1000=0.92%
  lat (usec)   : 2=0.65%, 4=0.09%, 10=0.10%, 20=0.06%, 50=0.01%
  lat (usec)   : 100=0.01%, 250=0.01%, 500=0.01%

1 job with 16 io-depth:
Baseline:
  slat (usec): min=8, max=3907, avg=16.89, stdev=23.39
  clat (usec): min=12, max=15160k, avg=11115.61, stdev=265051.86
  lat (usec): min=137, max=15160k, avg=11132.68, stdev=265051.75
  lat (usec)   : 20=0.01%, 250=57.66%, 500=39.56%, 750=1.96%, 1000=0.22%
  lat (msec)   : 2=0.16%, 4=0.06%, 10=0.01%, 2000=0.29%, >=2000=0.08%

This patch:
  slat (usec): min=9, max=1230, avg=17.15, stdev=12.95
  clat (usec): min=4, max=39471k, avg=14825.22, stdev=588237.30
  lat (usec): min=80, max=39471k, avg=14842.55, stdev=588237.27
  lat (usec)   : 10=0.01%, 250=38.78%, 500=59.53%, 750=1.12%, 1000=0.16%
  lat (msec)   : 2=0.04%, 2000=0.34%, >=2000=0.02%

Folios w/ F2FS_ONSTACK_PAGES (next version):
  slat (usec): min=9, max=1188, avg=18.74, stdev=14.12
  clat (usec): min=5, max=15278k, avg=8936.75, stdev=214230.09
  lat (usec): min=90, max=15278k, avg=8955.67, stdev=214230.10
  lat (usec)   : 10=0.01%, 250=9.68%, 500=86.49%, 750=2.74%, 1000=0.54%
  lat (msec)   : 2=0.18%, 2000=0.32%, >=2000=0.04%


> How do the remaining f2fs patches in the series look to you?
> Patch 16/23 f2fs_sync_meta_pages() in particular seems like it may
> be prone to problems. If there are any changes that need to be made to
> it I can include those in the next version as well.
  
Matthew Wilcox Nov. 29, 2022, 7:14 p.m. UTC | #5
On Mon, Nov 14, 2022 at 03:02:34PM +0800, Chao Yu wrote:
> On 2022/10/18 4:24, Vishal Moola (Oracle) wrote:
> > Converted the function to use a folio_batch instead of pagevec. This is in
> > preparation for the removal of find_get_pages_range_tag().
> > 
> > Also modified f2fs_all_cluster_page_ready to take in a folio_batch instead
> > of pagevec. This does NOT support large folios. The function currently
> 
> Vishal,
> 
> It looks this patch tries to revert Fengnan's change:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=01fc4b9a6ed8eacb64e5609bab7ac963e1c7e486
> 
> How about doing some tests to evaluate its performance effect?
> 
> +Cc Fengnan Chang

Thanks for reviewing this.  I think the real solution to this is
that f2fs should be using large folios.  That way, the page cache
will keep track of dirtiness on a per-folio basis, and if your folios
are at least as large as your cluster size, you won't need to do the
f2fs_prepare_compress_overwrite() dance.  And you'll get at least fifteen
dirty folios per call instead of fifteen dirty pages, so your costs will
be much lower.

Is anyone interested in doing the work to convert f2fs to support
large folios?  I can help, or you can look at the work done for XFS,
AFS and a few other filesystems.
  
李扬韬 Nov. 30, 2022, 12:48 p.m. UTC | #6
Hi,

> Thanks for reviewing this.  I think the real solution to this is
> that f2fs should be using large folios.  That way, the page cache
> will keep track of dirtiness on a per-folio basis, and if your folios
> are at least as large as your cluster size, you won't need to do the
> f2fs_prepare_compress_overwrite() dance.  And you'll get at least fifteen
> dirty folios per call instead of fifteen dirty pages, so your costs will
> be much lower.
>
> Is anyone interested in doing the work to convert f2fs to support
> large folios?  I can help, or you can look at the work done for XFS,
> AFS and a few other filesystems.

Seems like an interesting job. Not sure if I can be of any help.
What needs to be done currently to support large folio?

Are there any roadmaps and reference documents.

Thx,
Yangtao
  
李扬韬 Nov. 30, 2022, 12:51 p.m. UTC | #7
Hi,

> Thanks for reviewing this.  I think the real solution to this is
> that f2fs should be using large folios.  That way, the page cache
> will keep track of dirtiness on a per-folio basis, and if your folios
> are at least as large as your cluster size, you won't need to do the
> f2fs_prepare_compress_overwrite() dance.  And you'll get at least fifteen
> dirty folios per call instead of fifteen dirty pages, so your costs will
> be much lower.
>
> Is anyone interested in doing the work to convert f2fs to support
> large folios?  I can help, or you can look at the work done for XFS,
> AFS and a few other filesystems.

Seems like an interesting job. Not sure if I can be of any help.
What needs to be done currently to support large folio?

Are there any roadmaps and reference documents.

Thx,
Yangtao
  
Matthew Wilcox Nov. 30, 2022, 3:18 p.m. UTC | #8
On Wed, Nov 30, 2022 at 08:48:04PM +0800, Yangtao Li wrote:
> Hi,
> 
> > Thanks for reviewing this.  I think the real solution to this is
> > that f2fs should be using large folios.  That way, the page cache
> > will keep track of dirtiness on a per-folio basis, and if your folios
> > are at least as large as your cluster size, you won't need to do the
> > f2fs_prepare_compress_overwrite() dance.  And you'll get at least fifteen
> > dirty folios per call instead of fifteen dirty pages, so your costs will
> > be much lower.
> >
> > Is anyone interested in doing the work to convert f2fs to support
> > large folios?  I can help, or you can look at the work done for XFS,
> > AFS and a few other filesystems.
> 
> Seems like an interesting job. Not sure if I can be of any help.
> What needs to be done currently to support large folio?
> 
> Are there any roadmaps and reference documents.

From a filesystem point of view, you need to ensure that you handle folios
larger than PAGE_SIZE correctly.  The easiest way is to spread the use
of folios throughout the filesystem.  For example, today the first thing
we do in f2fs_read_data_folio() is convert the folio back into a page.
That works because f2fs hasn't told the kernel that it supports large
folios, so the VFS won't create large folios for it.

It's a lot of subtle things.  Here's an obvious one:
                        zero_user_segment(page, 0, PAGE_SIZE);
There's a folio equivalent that will zero an entire folio.

But then there is code which assumes the number of blocks per page (maybe
not in f2fs?) and so on.  Every filesystem will have its own challenges.

One way to approach this is to just enable large folios (see commit
6795801366da or 8549a26308f9) and see what breaks when you run xfstests
over it.  Probably quite a lot!
  
Vishal Moola Dec. 5, 2022, 8:34 p.m. UTC | #9
On Tue, Nov 22, 2022 at 6:26 PM Vishal Moola <vishal.moola@gmail.com> wrote:
>
> On Mon, Nov 14, 2022 at 1:38 PM Vishal Moola <vishal.moola@gmail.com> wrote:
> >
> > On Sun, Nov 13, 2022 at 11:02 PM Chao Yu <chao@kernel.org> wrote:
> > >
> > > On 2022/10/18 4:24, Vishal Moola (Oracle) wrote:
> > > > Converted the function to use a folio_batch instead of pagevec. This is in
> > > > preparation for the removal of find_get_pages_range_tag().
> > > >
> > > > Also modified f2fs_all_cluster_page_ready to take in a folio_batch instead
> > > > of pagevec. This does NOT support large folios. The function currently
> > >
> > > Vishal,
> > >
> > > It looks this patch tries to revert Fengnan's change:
> > >
> > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=01fc4b9a6ed8eacb64e5609bab7ac963e1c7e486
> > >
> > > How about doing some tests to evaluate its performance effect?
> >
> > Yeah I'll play around with it to see how much of a difference it makes.
>
> I did some testing. Looks like reverting Fengnan's change allows for
> occasional, but significant, spikes in write latency. I'll work on a variation
> of the patch that maintains the use of F2FS_ONSTACK_PAGES and send
> that in the next version of the patch series. Thanks for pointing that out!

Following Matthew's comment, I'm thinking we should go with this patch
as is. The numbers between both variations did not have substantial
differences with regard to latency.

While the new variant would maintain the use of F2FS_ONSTACK_PAGES,
the code becomes messier and would end up limiting the number of
folios written back once large folio support is added. This means it would
have to be converted down to this version later anyways.

Does leaving this patch as is sound good to you?

For reference, here's what the version continuing to use a page
array of size F2FS_ONSTACK_PAGES would change:

+               nr_pages = 0;
+again:
+               nr_folios = filemap_get_folios_tag(mapping, &index, end,
+                               tag, &fbatch);
+               if (nr_folios == 0) {
+                       if (nr_pages)
+                               goto write;
+                               goto write;
                        break;
+               }

+               for (i = 0; i < nr_folios; i++) {
+                       struct folio* folio = fbatch.folios[i];
+
+                       idx = 0;
+                       p = folio_nr_pages(folio);
+add_more:
+                       pages[nr_pages] = folio_page(folio,idx);
+                       folio_ref_inc(folio);
+                       if (++nr_pages == F2FS_ONSTACK_PAGES) {
+                               index = folio->index + idx + 1;
+                               folio_batch_release(&fbatch);
+                               goto write;
+                       }
+                       if (++idx < p)
+                               goto add_more;
+               }
+               folio_batch_release(&fbatch);
+               goto again;
+write:

> How do the remaining f2fs patches in the series look to you?
> Patch 16/23 f2fs_sync_meta_pages() in particular seems like it may
> be prone to problems. If there are any changes that need to be made to
> it I can include those in the next version as well.

Thanks for reviewing the patches so far. I wanted to follow up on asking
for review of the last couple of patches.
  
Luis Chamberlain Dec. 7, 2022, 8:51 p.m. UTC | #10
On Wed, Nov 30, 2022 at 03:18:41PM +0000, Matthew Wilcox wrote:
> On Wed, Nov 30, 2022 at 08:48:04PM +0800, Yangtao Li wrote:
> > Hi,
> > 
> > > Thanks for reviewing this.  I think the real solution to this is
> > > that f2fs should be using large folios.  That way, the page cache
> > > will keep track of dirtiness on a per-folio basis, and if your folios
> > > are at least as large as your cluster size, you won't need to do the
> > > f2fs_prepare_compress_overwrite() dance.  And you'll get at least fifteen
> > > dirty folios per call instead of fifteen dirty pages, so your costs will
> > > be much lower.
> > >
> > > Is anyone interested in doing the work to convert f2fs to support
> > > large folios?  I can help, or you can look at the work done for XFS,
> > > AFS and a few other filesystems.
> > 
> > Seems like an interesting job. Not sure if I can be of any help.
> > What needs to be done currently to support large folio?
> > 
> > Are there any roadmaps and reference documents.
> 
> >From a filesystem point of view, you need to ensure that you handle folios
> larger than PAGE_SIZE correctly.  The easiest way is to spread the use
> of folios throughout the filesystem.  For example, today the first thing
> we do in f2fs_read_data_folio() is convert the folio back into a page.
> That works because f2fs hasn't told the kernel that it supports large
> folios, so the VFS won't create large folios for it.
> 
> It's a lot of subtle things.  Here's an obvious one:
>                         zero_user_segment(page, 0, PAGE_SIZE);
> There's a folio equivalent that will zero an entire folio.
> 
> But then there is code which assumes the number of blocks per page (maybe
> not in f2fs?) and so on.  Every filesystem will have its own challenges.
> 
> One way to approach this is to just enable large folios (see commit
> 6795801366da or 8549a26308f9) and see what breaks when you run xfstests
> over it.  Probably quite a lot!

Me and Pankaj are very interested in helping on this front. And so we'll
start to organize and talk every week about this to see what is missing.
First order of business however will be testing so we'll have to
establish a public baseline to ensure we don't regress. For this we intend
on using kdevops so that'll be done first.

If folks have patches they want to test in consideration for folio /
iomap enhancements feel free to Cc us :)

After we establish a baseline we can move forward with taking on tasks
which will help with this conversion.

[0] https://github.com/linux-kdevops/kdevops

  Luis
  
Chao Yu Dec. 12, 2022, 2:41 p.m. UTC | #11
Hi Vishal,

Sorry for the delay reply.

On 2022/12/6 4:34, Vishal Moola wrote:
> On Tue, Nov 22, 2022 at 6:26 PM Vishal Moola <vishal.moola@gmail.com> wrote:
>>
>> On Mon, Nov 14, 2022 at 1:38 PM Vishal Moola <vishal.moola@gmail.com> wrote:
>>>
>>> On Sun, Nov 13, 2022 at 11:02 PM Chao Yu <chao@kernel.org> wrote:
>>>>
>>>> On 2022/10/18 4:24, Vishal Moola (Oracle) wrote:
>>>>> Converted the function to use a folio_batch instead of pagevec. This is in
>>>>> preparation for the removal of find_get_pages_range_tag().
>>>>>
>>>>> Also modified f2fs_all_cluster_page_ready to take in a folio_batch instead
>>>>> of pagevec. This does NOT support large folios. The function currently
>>>>
>>>> Vishal,
>>>>
>>>> It looks this patch tries to revert Fengnan's change:
>>>>
>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=01fc4b9a6ed8eacb64e5609bab7ac963e1c7e486
>>>>
>>>> How about doing some tests to evaluate its performance effect?
>>>
>>> Yeah I'll play around with it to see how much of a difference it makes.
>>
>> I did some testing. Looks like reverting Fengnan's change allows for
>> occasional, but significant, spikes in write latency. I'll work on a variation
>> of the patch that maintains the use of F2FS_ONSTACK_PAGES and send
>> that in the next version of the patch series. Thanks for pointing that out!
> 
> Following Matthew's comment, I'm thinking we should go with this patch
> as is. The numbers between both variations did not have substantial
> differences with regard to latency.
> 
> While the new variant would maintain the use of F2FS_ONSTACK_PAGES,
> the code becomes messier and would end up limiting the number of
> folios written back once large folio support is added. This means it would
> have to be converted down to this version later anyways.
> 
> Does leaving this patch as is sound good to you?
> 
> For reference, here's what the version continuing to use a page
> array of size F2FS_ONSTACK_PAGES would change:
> 
> +               nr_pages = 0;
> +again:
> +               nr_folios = filemap_get_folios_tag(mapping, &index, end,
> +                               tag, &fbatch);
> +               if (nr_folios == 0) {
> +                       if (nr_pages)
> +                               goto write;
> +                               goto write;

Duplicated code.

>                          break;
> +               }
> 
> +               for (i = 0; i < nr_folios; i++) {
> +                       struct folio* folio = fbatch.folios[i];
> +
> +                       idx = 0;
> +                       p = folio_nr_pages(folio);
> +add_more:
> +                       pages[nr_pages] = folio_page(folio,idx);
> +                       folio_ref_inc(folio);
> +                       if (++nr_pages == F2FS_ONSTACK_PAGES) {
> +                               index = folio->index + idx + 1;
> +                               folio_batch_release(&fbatch);
> +                               goto write;
> +                       }
> +                       if (++idx < p)
> +                               goto add_more;
> +               }
> +               folio_batch_release(&fbatch);
> +               goto again;
> +write:

Looks fine to me, can you please send a formal patch?

Thanks,

> 
>> How do the remaining f2fs patches in the series look to you?
>> Patch 16/23 f2fs_sync_meta_pages() in particular seems like it may
>> be prone to problems. If there are any changes that need to be made to
>> it I can include those in the next version as well.
> 
> Thanks for reviewing the patches so far. I wanted to follow up on asking
> for review of the last couple of patches.
  
Matthew Wilcox Jan. 25, 2024, 8:47 p.m. UTC | #12
On Wed, Dec 07, 2022 at 12:51:13PM -0800, Luis Chamberlain wrote:
> On Wed, Nov 30, 2022 at 03:18:41PM +0000, Matthew Wilcox wrote:
> > From a filesystem point of view, you need to ensure that you handle folios
> > larger than PAGE_SIZE correctly.  The easiest way is to spread the use
> > of folios throughout the filesystem.  For example, today the first thing
> > we do in f2fs_read_data_folio() is convert the folio back into a page.
> > That works because f2fs hasn't told the kernel that it supports large
> > folios, so the VFS won't create large folios for it.
> > 
> > It's a lot of subtle things.  Here's an obvious one:
> >                         zero_user_segment(page, 0, PAGE_SIZE);
> > There's a folio equivalent that will zero an entire folio.
> > 
> > But then there is code which assumes the number of blocks per page (maybe
> > not in f2fs?) and so on.  Every filesystem will have its own challenges.
> > 
> > One way to approach this is to just enable large folios (see commit
> > 6795801366da or 8549a26308f9) and see what breaks when you run xfstests
> > over it.  Probably quite a lot!
> 
> Me and Pankaj are very interested in helping on this front. And so we'll
> start to organize and talk every week about this to see what is missing.
> First order of business however will be testing so we'll have to
> establish a public baseline to ensure we don't regress. For this we intend
> on using kdevops so that'll be done first.
> 
> If folks have patches they want to test in consideration for folio /
> iomap enhancements feel free to Cc us :)
> 
> After we establish a baseline we can move forward with taking on tasks
> which will help with this conversion.

So ... it's been a year.  How is this project coming along?  There
weren't a lot of commits to f2fs in 2023 that were folio related.
  
Luis Chamberlain Jan. 25, 2024, 8:54 p.m. UTC | #13
On Thu, Jan 25, 2024 at 08:47:39PM +0000, Matthew Wilcox wrote:
> On Wed, Dec 07, 2022 at 12:51:13PM -0800, Luis Chamberlain wrote:
> > On Wed, Nov 30, 2022 at 03:18:41PM +0000, Matthew Wilcox wrote:
> > > From a filesystem point of view, you need to ensure that you handle folios
> > > larger than PAGE_SIZE correctly.  The easiest way is to spread the use
> > > of folios throughout the filesystem.  For example, today the first thing
> > > we do in f2fs_read_data_folio() is convert the folio back into a page.
> > > That works because f2fs hasn't told the kernel that it supports large
> > > folios, so the VFS won't create large folios for it.
> > > 
> > > It's a lot of subtle things.  Here's an obvious one:
> > >                         zero_user_segment(page, 0, PAGE_SIZE);
> > > There's a folio equivalent that will zero an entire folio.
> > > 
> > > But then there is code which assumes the number of blocks per page (maybe
> > > not in f2fs?) and so on.  Every filesystem will have its own challenges.
> > > 
> > > One way to approach this is to just enable large folios (see commit
> > > 6795801366da or 8549a26308f9) and see what breaks when you run xfstests
> > > over it.  Probably quite a lot!
> > 
> > Me and Pankaj are very interested in helping on this front. And so we'll
> > start to organize and talk every week about this to see what is missing.
> > First order of business however will be testing so we'll have to
> > establish a public baseline to ensure we don't regress. For this we intend
> > on using kdevops so that'll be done first.
> > 
> > If folks have patches they want to test in consideration for folio /
> > iomap enhancements feel free to Cc us :)
> > 
> > After we establish a baseline we can move forward with taking on tasks
> > which will help with this conversion.
> 
> So ... it's been a year.  How is this project coming along?  There
> weren't a lot of commits to f2fs in 2023 that were folio related.

The review at LSFMM revealed iomap based filesystems were the priority
and so that has been the priority. Once we tackle that and get XFS
support we can revisit which next fs to help out with. Testing has been
a *huge* part of our endeavor, and naturally getting XFS patches up to
what is required has just taken a bit more time. But you can expect
patches for that within a month or so.

  Luis
  
Matthew Wilcox Jan. 26, 2024, 9:01 p.m. UTC | #14
On Thu, Jan 25, 2024 at 12:54:47PM -0800, Luis Chamberlain wrote:
> On Thu, Jan 25, 2024 at 08:47:39PM +0000, Matthew Wilcox wrote:
> > On Wed, Dec 07, 2022 at 12:51:13PM -0800, Luis Chamberlain wrote:
> > > Me and Pankaj are very interested in helping on this front. And so we'll
> > > start to organize and talk every week about this to see what is missing.
> > > First order of business however will be testing so we'll have to
> > > establish a public baseline to ensure we don't regress. For this we intend
> > > on using kdevops so that'll be done first.
> > > 
> > > If folks have patches they want to test in consideration for folio /
> > > iomap enhancements feel free to Cc us :)
> > > 
> > > After we establish a baseline we can move forward with taking on tasks
> > > which will help with this conversion.
> > 
> > So ... it's been a year.  How is this project coming along?  There
> > weren't a lot of commits to f2fs in 2023 that were folio related.
> 
> The review at LSFMM revealed iomap based filesystems were the priority
> and so that has been the priority. Once we tackle that and get XFS
> support we can revisit which next fs to help out with. Testing has been
> a *huge* part of our endeavor, and naturally getting XFS patches up to
> what is required has just taken a bit more time. But you can expect
> patches for that within a month or so.

Is anyone working on the iomap conversion for f2fs?
  
Luis Chamberlain Jan. 26, 2024, 9:32 p.m. UTC | #15
On Fri, Jan 26, 2024 at 09:01:06PM +0000, Matthew Wilcox wrote:
> On Thu, Jan 25, 2024 at 12:54:47PM -0800, Luis Chamberlain wrote:
> > On Thu, Jan 25, 2024 at 08:47:39PM +0000, Matthew Wilcox wrote:
> > > On Wed, Dec 07, 2022 at 12:51:13PM -0800, Luis Chamberlain wrote:
> > > > Me and Pankaj are very interested in helping on this front. And so we'll
> > > > start to organize and talk every week about this to see what is missing.
> > > > First order of business however will be testing so we'll have to
> > > > establish a public baseline to ensure we don't regress. For this we intend
> > > > on using kdevops so that'll be done first.
> > > > 
> > > > If folks have patches they want to test in consideration for folio /
> > > > iomap enhancements feel free to Cc us :)
> > > > 
> > > > After we establish a baseline we can move forward with taking on tasks
> > > > which will help with this conversion.
> > > 
> > > So ... it's been a year.  How is this project coming along?  There
> > > weren't a lot of commits to f2fs in 2023 that were folio related.
> > 
> > The review at LSFMM revealed iomap based filesystems were the priority
> > and so that has been the priority. Once we tackle that and get XFS
> > support we can revisit which next fs to help out with. Testing has been
> > a *huge* part of our endeavor, and naturally getting XFS patches up to
> > what is required has just taken a bit more time. But you can expect
> > patches for that within a month or so.
> 
> Is anyone working on the iomap conversion for f2fs?

It already has been done for direct IO by Eric as per commit a1e09b03e6f5
("f2fs: use iomap for direct I/O"), not clear to me if anyone is working
on buffered-io. Then f2fs_commit_super() seems to be the last buffer-head
user, and its not clear what the replacement could be yet.

Jaegeuk, Eric, have you guys considered this?

  Luis
  
Eric Biggers Jan. 27, 2024, 7:05 a.m. UTC | #16
On Fri, Jan 26, 2024 at 01:32:05PM -0800, Luis Chamberlain wrote:
> On Fri, Jan 26, 2024 at 09:01:06PM +0000, Matthew Wilcox wrote:
> > On Thu, Jan 25, 2024 at 12:54:47PM -0800, Luis Chamberlain wrote:
> > > On Thu, Jan 25, 2024 at 08:47:39PM +0000, Matthew Wilcox wrote:
> > > > On Wed, Dec 07, 2022 at 12:51:13PM -0800, Luis Chamberlain wrote:
> > > > > Me and Pankaj are very interested in helping on this front. And so we'll
> > > > > start to organize and talk every week about this to see what is missing.
> > > > > First order of business however will be testing so we'll have to
> > > > > establish a public baseline to ensure we don't regress. For this we intend
> > > > > on using kdevops so that'll be done first.
> > > > > 
> > > > > If folks have patches they want to test in consideration for folio /
> > > > > iomap enhancements feel free to Cc us :)
> > > > > 
> > > > > After we establish a baseline we can move forward with taking on tasks
> > > > > which will help with this conversion.
> > > > 
> > > > So ... it's been a year.  How is this project coming along?  There
> > > > weren't a lot of commits to f2fs in 2023 that were folio related.
> > > 
> > > The review at LSFMM revealed iomap based filesystems were the priority
> > > and so that has been the priority. Once we tackle that and get XFS
> > > support we can revisit which next fs to help out with. Testing has been
> > > a *huge* part of our endeavor, and naturally getting XFS patches up to
> > > what is required has just taken a bit more time. But you can expect
> > > patches for that within a month or so.
> > 
> > Is anyone working on the iomap conversion for f2fs?
> 
> It already has been done for direct IO by Eric as per commit a1e09b03e6f5
> ("f2fs: use iomap for direct I/O"), not clear to me if anyone is working
> on buffered-io. Then f2fs_commit_super() seems to be the last buffer-head
> user, and its not clear what the replacement could be yet.
> 
> Jaegeuk, Eric, have you guys considered this?
> 

Sure, I've *considered* that, along with other requested filesystem
modernization projects such as converting f2fs to use the new mount API and
finishing ext4's conversion to iomap.  But, I haven't had time to work on these
projects, nor to get very involved in f2fs beyond what's needed to maintain the
fscrypt and fsverity support.  I'm not anywhere close to a full-time filesystem
developer.  I did implement the f2fs iomap direct I/O support two years ago
because it made the fscrypt direct I/O support easier.  Note that these types of
changes are fairly disruptive, and there were bugs that resulted from my
patches, despite my best efforts.  It's necessary for someone to get deeply
involved in these types of changes and follow them all the way through.

- Eric
  

Patch

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index d315c2de136f..7af6c923e0aa 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -842,10 +842,11 @@  bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index)
 	return is_page_in_cluster(cc, index);
 }
 
-bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages,
-				int index, int nr_pages, bool uptodate)
+bool f2fs_all_cluster_page_ready(struct compress_ctx *cc,
+				struct folio_batch *fbatch,
+				int index, int nr_folios, bool uptodate)
 {
-	unsigned long pgidx = pages[index]->index;
+	unsigned long pgidx = fbatch->folios[index]->index;
 	int i = uptodate ? 0 : 1;
 
 	/*
@@ -855,13 +856,13 @@  bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages,
 	if (uptodate && (pgidx % cc->cluster_size))
 		return false;
 
-	if (nr_pages - index < cc->cluster_size)
+	if (nr_folios - index < cc->cluster_size)
 		return false;
 
 	for (; i < cc->cluster_size; i++) {
-		if (pages[index + i]->index != pgidx + i)
+		if (fbatch->folios[index + i]->index != pgidx + i)
 			return false;
-		if (uptodate && !PageUptodate(pages[index + i]))
+		if (uptodate && !folio_test_uptodate(fbatch->folios[index + i]))
 			return false;
 	}
 
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index a71e818cd67b..7511578b73c3 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2938,7 +2938,7 @@  static int f2fs_write_cache_pages(struct address_space *mapping,
 {
 	int ret = 0;
 	int done = 0, retry = 0;
-	struct page *pages[F2FS_ONSTACK_PAGES];
+	struct folio_batch fbatch;
 	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
 	struct bio *bio = NULL;
 	sector_t last_block;
@@ -2959,7 +2959,7 @@  static int f2fs_write_cache_pages(struct address_space *mapping,
 		.private = NULL,
 	};
 #endif
-	int nr_pages;
+	int nr_folios;
 	pgoff_t index;
 	pgoff_t end;		/* Inclusive */
 	pgoff_t done_index;
@@ -2969,6 +2969,8 @@  static int f2fs_write_cache_pages(struct address_space *mapping,
 	int submitted = 0;
 	int i;
 
+	folio_batch_init(&fbatch);
+
 	if (get_dirty_pages(mapping->host) <=
 				SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
 		set_inode_flag(mapping->host, FI_HOT_DATA);
@@ -2994,13 +2996,13 @@  static int f2fs_write_cache_pages(struct address_space *mapping,
 		tag_pages_for_writeback(mapping, index, end);
 	done_index = index;
 	while (!done && !retry && (index <= end)) {
-		nr_pages = find_get_pages_range_tag(mapping, &index, end,
-				tag, F2FS_ONSTACK_PAGES, pages);
-		if (nr_pages == 0)
+		nr_folios = filemap_get_folios_tag(mapping, &index, end,
+				tag, &fbatch);
+		if (nr_folios == 0)
 			break;
 
-		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pages[i];
+		for (i = 0; i < nr_folios; i++) {
+			struct folio *folio = fbatch.folios[i];
 			bool need_readd;
 readd:
 			need_readd = false;
@@ -3017,7 +3019,7 @@  static int f2fs_write_cache_pages(struct address_space *mapping,
 				}
 
 				if (!f2fs_cluster_can_merge_page(&cc,
-								page->index)) {
+								folio->index)) {
 					ret = f2fs_write_multi_pages(&cc,
 						&submitted, wbc, io_type);
 					if (!ret)
@@ -3026,27 +3028,28 @@  static int f2fs_write_cache_pages(struct address_space *mapping,
 				}
 
 				if (unlikely(f2fs_cp_error(sbi)))
-					goto lock_page;
+					goto lock_folio;
 
 				if (!f2fs_cluster_is_empty(&cc))
-					goto lock_page;
+					goto lock_folio;
 
 				if (f2fs_all_cluster_page_ready(&cc,
-					pages, i, nr_pages, true))
-					goto lock_page;
+					&fbatch, i, nr_folios, true))
+					goto lock_folio;
 
 				ret2 = f2fs_prepare_compress_overwrite(
 							inode, &pagep,
-							page->index, &fsdata);
+							folio->index, &fsdata);
 				if (ret2 < 0) {
 					ret = ret2;
 					done = 1;
 					break;
 				} else if (ret2 &&
 					(!f2fs_compress_write_end(inode,
-						fsdata, page->index, 1) ||
+						fsdata, folio->index, 1) ||
 					 !f2fs_all_cluster_page_ready(&cc,
-						pages, i, nr_pages, false))) {
+						&fbatch, i, nr_folios,
+						false))) {
 					retry = 1;
 					break;
 				}
@@ -3059,46 +3062,47 @@  static int f2fs_write_cache_pages(struct address_space *mapping,
 				break;
 			}
 #ifdef CONFIG_F2FS_FS_COMPRESSION
-lock_page:
+lock_folio:
 #endif
-			done_index = page->index;
+			done_index = folio->index;
 retry_write:
-			lock_page(page);
+			folio_lock(folio);
 
-			if (unlikely(page->mapping != mapping)) {
+			if (unlikely(folio->mapping != mapping)) {
 continue_unlock:
-				unlock_page(page);
+				folio_unlock(folio);
 				continue;
 			}
 
-			if (!PageDirty(page)) {
+			if (!folio_test_dirty(folio)) {
 				/* someone wrote it for us */
 				goto continue_unlock;
 			}
 
-			if (PageWriteback(page)) {
+			if (folio_test_writeback(folio)) {
 				if (wbc->sync_mode != WB_SYNC_NONE)
-					f2fs_wait_on_page_writeback(page,
+					f2fs_wait_on_page_writeback(
+							&folio->page,
 							DATA, true, true);
 				else
 					goto continue_unlock;
 			}
 
-			if (!clear_page_dirty_for_io(page))
+			if (!folio_clear_dirty_for_io(folio))
 				goto continue_unlock;
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
 			if (f2fs_compressed_file(inode)) {
-				get_page(page);
-				f2fs_compress_ctx_add_page(&cc, page);
+				folio_get(folio);
+				f2fs_compress_ctx_add_page(&cc, &folio->page);
 				continue;
 			}
 #endif
-			ret = f2fs_write_single_data_page(page, &submitted,
-					&bio, &last_block, wbc, io_type,
-					0, true);
+			ret = f2fs_write_single_data_page(&folio->page,
+					&submitted, &bio, &last_block,
+					wbc, io_type, 0, true);
 			if (ret == AOP_WRITEPAGE_ACTIVATE)
-				unlock_page(page);
+				folio_unlock(folio);
 #ifdef CONFIG_F2FS_FS_COMPRESSION
 result:
 #endif
@@ -3122,7 +3126,8 @@  static int f2fs_write_cache_pages(struct address_space *mapping,
 					}
 					goto next;
 				}
-				done_index = page->index + 1;
+				done_index = folio->index +
+					folio_nr_pages(folio);
 				done = 1;
 				break;
 			}
@@ -3136,7 +3141,7 @@  static int f2fs_write_cache_pages(struct address_space *mapping,
 			if (need_readd)
 				goto readd;
 		}
-		release_pages(pages, nr_pages);
+		folio_batch_release(&fbatch);
 		cond_resched();
 	}
 #ifdef CONFIG_F2FS_FS_COMPRESSION
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e6355a5683b7..d7bfb88fa341 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -4226,8 +4226,9 @@  void f2fs_end_read_compressed_page(struct page *page, bool failed,
 				block_t blkaddr, bool in_task);
 bool f2fs_cluster_is_empty(struct compress_ctx *cc);
 bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
-bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages,
-				int index, int nr_pages, bool uptodate);
+bool f2fs_all_cluster_page_ready(struct compress_ctx *cc,
+		struct folio_batch *fbatch, int index, int nr_folios,
+		bool uptodate);
 bool f2fs_sanity_check_cluster(struct dnode_of_data *dn);
 void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page);
 int f2fs_write_multi_pages(struct compress_ctx *cc,