[15/24] mm/swap: avoid an duplicated swap cache lookup for SYNCHRONOUS_IO device

Message ID 20231119194740.94101-16-ryncsn@gmail.com
State New
Headers
Series Swapin path refactor for optimization and bugfix |

Commit Message

Kairui Song Nov. 19, 2023, 7:47 p.m. UTC
  From: Kairui Song <kasong@tencent.com>

When a xa_value is returned by the cache lookup, keep it to be used
later for workingset refault check instead of doing the looking up again
in swapin_no_readahead.

This does have a side effect of making swapoff also triggers workingset
check, but should be fine since swapoff does effect the workload in many
ways already.

Signed-off-by: Kairui Song <kasong@tencent.com>
---
 mm/swap_state.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)
  

Comments

Chris Li Nov. 21, 2023, 5:15 p.m. UTC | #1
On Sun, Nov 19, 2023 at 11:48 AM Kairui Song <ryncsn@gmail.com> wrote:
>
> From: Kairui Song <kasong@tencent.com>
>
> When a xa_value is returned by the cache lookup, keep it to be used
> later for workingset refault check instead of doing the looking up again
> in swapin_no_readahead.
>
> This does have a side effect of making swapoff also triggers workingset
> check, but should be fine since swapoff does effect the workload in many
> ways already.

I need to sleep on it a bit to see if this will create another problem or not.

>
> Signed-off-by: Kairui Song <kasong@tencent.com>
> ---
>  mm/swap_state.c | 10 ++++------
>  1 file changed, 4 insertions(+), 6 deletions(-)
>
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index e057c79fb06f..51de2a0412df 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -872,7 +872,6 @@ static struct page *swapin_no_readahead(swp_entry_t entry, gfp_t gfp_mask,
>  {
>         struct folio *folio;
>         struct page *page;
> -       void *shadow = NULL;
>
>         page = alloc_pages_mpol(gfp_mask, 0, mpol, ilx, numa_node_id());
>         folio = (struct folio *)page;
> @@ -888,10 +887,6 @@ static struct page *swapin_no_readahead(swp_entry_t entry, gfp_t gfp_mask,
>
>                 mem_cgroup_swapin_uncharge_swap(entry);
>
> -               shadow = get_shadow_from_swap_cache(entry);
> -               if (shadow)
> -                       workingset_refault(folio, shadow);
> -
>                 folio_add_lru(folio);
>
>                 /* To provide entry to swap_readpage() */
> @@ -922,11 +917,12 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
>         enum swap_cache_result cache_result;
>         struct swap_info_struct *si;
>         struct mempolicy *mpol;
> +       void *shadow = NULL;
>         struct folio *folio;
>         struct page *page;
>         pgoff_t ilx;
>
> -       folio = swap_cache_get_folio(entry, vmf, NULL);
> +       folio = swap_cache_get_folio(entry, vmf, &shadow);
>         if (folio) {
>                 page = folio_file_page(folio, swp_offset(entry));
>                 cache_result = SWAP_CACHE_HIT;
> @@ -938,6 +934,8 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
>         if (swap_use_no_readahead(si, swp_offset(entry))) {
>                 page = swapin_no_readahead(entry, gfp_mask, mpol, ilx, vmf->vma->vm_mm);
>                 cache_result = SWAP_CACHE_BYPASS;
> +               if (shadow)
> +                       workingset_refault(page_folio(page), shadow);

It is inconsistent why other flavors of readahead do not do the
workingset_refault here.
I suggest keeping the workingset_refault in swapin_no_readahead() and
pass the shadow argument in.

Chris

>         } else if (swap_use_vma_readahead(si)) {
>                 page = swap_vma_readahead(entry, gfp_mask, mpol, ilx, vmf);
>                 cache_result = SWAP_CACHE_MISS;
> --
> 2.42.0
>
>
  
Kairui Song Nov. 22, 2023, 6:08 p.m. UTC | #2
Chris Li <chrisl@kernel.org> 于2023年11月22日周三 01:18写道:
>
> On Sun, Nov 19, 2023 at 11:48 AM Kairui Song <ryncsn@gmail.com> wrote:
> >
> > From: Kairui Song <kasong@tencent.com>
> >
> > When a xa_value is returned by the cache lookup, keep it to be used
> > later for workingset refault check instead of doing the looking up again
> > in swapin_no_readahead.
> >
> > This does have a side effect of making swapoff also triggers workingset
> > check, but should be fine since swapoff does effect the workload in many
> > ways already.
>
> I need to sleep on it a bit to see if this will create another problem or not.
>
> >
> > Signed-off-by: Kairui Song <kasong@tencent.com>
> > ---
> >  mm/swap_state.c | 10 ++++------
> >  1 file changed, 4 insertions(+), 6 deletions(-)
> >
> > diff --git a/mm/swap_state.c b/mm/swap_state.c
> > index e057c79fb06f..51de2a0412df 100644
> > --- a/mm/swap_state.c
> > +++ b/mm/swap_state.c
> > @@ -872,7 +872,6 @@ static struct page *swapin_no_readahead(swp_entry_t entry, gfp_t gfp_mask,
> >  {
> >         struct folio *folio;
> >         struct page *page;
> > -       void *shadow = NULL;
> >
> >         page = alloc_pages_mpol(gfp_mask, 0, mpol, ilx, numa_node_id());
> >         folio = (struct folio *)page;
> > @@ -888,10 +887,6 @@ static struct page *swapin_no_readahead(swp_entry_t entry, gfp_t gfp_mask,
> >
> >                 mem_cgroup_swapin_uncharge_swap(entry);
> >
> > -               shadow = get_shadow_from_swap_cache(entry);
> > -               if (shadow)
> > -                       workingset_refault(folio, shadow);
> > -
> >                 folio_add_lru(folio);
> >
> >                 /* To provide entry to swap_readpage() */
> > @@ -922,11 +917,12 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> >         enum swap_cache_result cache_result;
> >         struct swap_info_struct *si;
> >         struct mempolicy *mpol;
> > +       void *shadow = NULL;
> >         struct folio *folio;
> >         struct page *page;
> >         pgoff_t ilx;
> >
> > -       folio = swap_cache_get_folio(entry, vmf, NULL);
> > +       folio = swap_cache_get_folio(entry, vmf, &shadow);
> >         if (folio) {
> >                 page = folio_file_page(folio, swp_offset(entry));
> >                 cache_result = SWAP_CACHE_HIT;
> > @@ -938,6 +934,8 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> >         if (swap_use_no_readahead(si, swp_offset(entry))) {
> >                 page = swapin_no_readahead(entry, gfp_mask, mpol, ilx, vmf->vma->vm_mm);
> >                 cache_result = SWAP_CACHE_BYPASS;
> > +               if (shadow)
> > +                       workingset_refault(page_folio(page), shadow);
>
> It is inconsistent why other flavors of readahead do not do the
> workingset_refault here.

Because of the readaheads and swapcache. Every readahead pages need to
be checked by workingset_refault with a different shadow (and so a
different xarray entry search is needed). And since other swapin path
need to insert page into swapcache, they will do extra xarray
search/insert anyway so this optimization won't work.

> I suggest keeping the workingset_refault in swapin_no_readahead() and
> pass the shadow argument in.

That sounds good to me.
  

Patch

diff --git a/mm/swap_state.c b/mm/swap_state.c
index e057c79fb06f..51de2a0412df 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -872,7 +872,6 @@  static struct page *swapin_no_readahead(swp_entry_t entry, gfp_t gfp_mask,
 {
 	struct folio *folio;
 	struct page *page;
-	void *shadow = NULL;
 
 	page = alloc_pages_mpol(gfp_mask, 0, mpol, ilx, numa_node_id());
 	folio = (struct folio *)page;
@@ -888,10 +887,6 @@  static struct page *swapin_no_readahead(swp_entry_t entry, gfp_t gfp_mask,
 
 		mem_cgroup_swapin_uncharge_swap(entry);
 
-		shadow = get_shadow_from_swap_cache(entry);
-		if (shadow)
-			workingset_refault(folio, shadow);
-
 		folio_add_lru(folio);
 
 		/* To provide entry to swap_readpage() */
@@ -922,11 +917,12 @@  struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	enum swap_cache_result cache_result;
 	struct swap_info_struct *si;
 	struct mempolicy *mpol;
+	void *shadow = NULL;
 	struct folio *folio;
 	struct page *page;
 	pgoff_t ilx;
 
-	folio = swap_cache_get_folio(entry, vmf, NULL);
+	folio = swap_cache_get_folio(entry, vmf, &shadow);
 	if (folio) {
 		page = folio_file_page(folio, swp_offset(entry));
 		cache_result = SWAP_CACHE_HIT;
@@ -938,6 +934,8 @@  struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	if (swap_use_no_readahead(si, swp_offset(entry))) {
 		page = swapin_no_readahead(entry, gfp_mask, mpol, ilx, vmf->vma->vm_mm);
 		cache_result = SWAP_CACHE_BYPASS;
+		if (shadow)
+			workingset_refault(page_folio(page), shadow);
 	} else if (swap_use_vma_readahead(si)) {
 		page = swap_vma_readahead(entry, gfp_mask, mpol, ilx, vmf);
 		cache_result = SWAP_CACHE_MISS;