[v7,1/3] workingset: refactor LRU refault to expose refault recency check

Message ID 20230124021118.154078-2-nphamcs@gmail.com
State New
Headers
Series cachestat: a new syscall for page cache state of files |

Commit Message

Nhat Pham Jan. 24, 2023, 2:11 a.m. UTC
  In preparation for computing recently evicted pages in cachestat,
refactor workingset_refault and lru_gen_refault to expose a helper
function that would test if an evicted page is recently evicted.

Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
 include/linux/swap.h |   1 +
 mm/workingset.c      | 142 +++++++++++++++++++++++++++++--------------
 2 files changed, 97 insertions(+), 46 deletions(-)
  

Comments

Yu Zhao Jan. 25, 2023, 9:13 p.m. UTC | #1
On Mon, Jan 23, 2023 at 7:11 PM Nhat Pham <nphamcs@gmail.com> wrote:
>
> In preparation for computing recently evicted pages in cachestat,
> refactor workingset_refault and lru_gen_refault to expose a helper
> function that would test if an evicted page is recently evicted.
>
> Signed-off-by: Nhat Pham <nphamcs@gmail.com>
> ---
>  include/linux/swap.h |   1 +
>  mm/workingset.c      | 142 +++++++++++++++++++++++++++++--------------
>  2 files changed, 97 insertions(+), 46 deletions(-)
>
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index a18cf4b7c724..dae6f6f955eb 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -361,6 +361,7 @@ static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
>  }
>
>  /* linux/mm/workingset.c */
> +bool workingset_test_recent(void *shadow, bool file, bool *workingset);
>  void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
>  void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
>  void workingset_refault(struct folio *folio, void *shadow);
> diff --git a/mm/workingset.c b/mm/workingset.c
> index 79585d55c45d..2f2d94867366 100644
> --- a/mm/workingset.c
> +++ b/mm/workingset.c
> @@ -244,6 +244,30 @@ static void *lru_gen_eviction(struct folio *folio)
>         return pack_shadow(mem_cgroup_id(memcg), pgdat, token, refs);
>  }
>
> +/*
> + * Test if the folio is recently evicted.
> + *
> + * As a side effect, also populates the references with
> + * values unpacked from the shadow of the evicted folio.
> + */
> +static bool lru_gen_test_recent(void *shadow, bool file, int *memcgid,
> +               struct pglist_data **pgdat, unsigned long *token, bool *workingset)
> +{
> +       struct mem_cgroup *eviction_memcg;
> +       struct lruvec *lruvec;
> +       struct lru_gen_struct *lrugen;
> +       unsigned long min_seq;
> +
> +       unpack_shadow(shadow, memcgid, pgdat, token, workingset);
> +       eviction_memcg = mem_cgroup_from_id(*memcgid);
> +
> +       lruvec = mem_cgroup_lruvec(eviction_memcg, *pgdat);
> +       lrugen = &lruvec->lrugen;
> +
> +       min_seq = READ_ONCE(lrugen->min_seq[file]);
> +       return (*token >> LRU_REFS_WIDTH) == (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH));
> +}
> +
>  static void lru_gen_refault(struct folio *folio, void *shadow)
>  {
>         int hist, tier, refs;
> @@ -258,23 +282,22 @@ static void lru_gen_refault(struct folio *folio, void *shadow)
>         int type = folio_is_file_lru(folio);
>         int delta = folio_nr_pages(folio);
>
> -       unpack_shadow(shadow, &memcg_id, &pgdat, &token, &workingset);
> -
> -       if (pgdat != folio_pgdat(folio))
> -               return;
> -
>         rcu_read_lock();
>
> +       if (!lru_gen_test_recent(shadow, type, &memcg_id, &pgdat, &token,
> +                       &workingset))
> +               goto unlock;
> +
>         memcg = folio_memcg_rcu(folio);
>         if (memcg_id != mem_cgroup_id(memcg))
>                 goto unlock;
>
> +       if (pgdat != folio_pgdat(folio))
> +               return;
> +
>         lruvec = mem_cgroup_lruvec(memcg, pgdat);
>         lrugen = &lruvec->lrugen;
> -
>         min_seq = READ_ONCE(lrugen->min_seq[type]);
> -       if ((token >> LRU_REFS_WIDTH) != (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH)))
> -               goto unlock;
>
>         hist = lru_hist_from_seq(min_seq);
>         /* see the comment in folio_lru_refs() */
> @@ -306,6 +329,12 @@ static void *lru_gen_eviction(struct folio *folio)
>         return NULL;
>  }
>
> +static bool lru_gen_test_recent(void *shadow, bool file, int *memcgid,
> +               struct pglist_data **pgdat, unsigned long *token, bool *workingset)
> +{
> +       return false;
> +}
> +
>  static void lru_gen_refault(struct folio *folio, void *shadow)
>  {
>  }
> @@ -373,40 +402,31 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
>                                 folio_test_workingset(folio));
>  }
>
> -/**
> - * workingset_refault - Evaluate the refault of a previously evicted folio.
> - * @folio: The freshly allocated replacement folio.
> - * @shadow: Shadow entry of the evicted folio.
> +/*
> + * Test if the folio is recently evicted by checking if
> + * refault distance of shadow exceeds workingset size.
>   *
> - * Calculates and evaluates the refault distance of the previously
> - * evicted folio in the context of the node and the memcg whose memory
> - * pressure caused the eviction.
> + * As a side effect, populate workingset with the value
> + * unpacked from shadow.
>   */
> -void workingset_refault(struct folio *folio, void *shadow)
> +bool workingset_test_recent(void *shadow, bool file, bool *workingset)
>  {
> -       bool file = folio_is_file_lru(folio);
>         struct mem_cgroup *eviction_memcg;
>         struct lruvec *eviction_lruvec;
>         unsigned long refault_distance;
>         unsigned long workingset_size;
> -       struct pglist_data *pgdat;
> -       struct mem_cgroup *memcg;
> -       unsigned long eviction;
> -       struct lruvec *lruvec;
>         unsigned long refault;
> -       bool workingset;
>         int memcgid;
> -       long nr;
> +       struct pglist_data *pgdat;
> +       unsigned long eviction;
>
> -       if (lru_gen_enabled()) {
> -               lru_gen_refault(folio, shadow);
> -               return;
> -       }
> +       if (lru_gen_enabled())
> +               lru_gen_test_recent(shadow, file, &memcgid, &pgdat, &eviction,
> +                       workingset);

Missing "return", which was correctly handled in your v2, btw.
  
Nhat Pham Jan. 26, 2023, 5:08 p.m. UTC | #2
On Wed, Jan 25, 2023 at 1:13 PM Yu Zhao <yuzhao@google.com> wrote:
>
> On Mon, Jan 23, 2023 at 7:11 PM Nhat Pham <nphamcs@gmail.com> wrote:
> >
> > In preparation for computing recently evicted pages in cachestat,
> > refactor workingset_refault and lru_gen_refault to expose a helper
> > function that would test if an evicted page is recently evicted.
> >
> > Signed-off-by: Nhat Pham <nphamcs@gmail.com>
> > ---
> >  include/linux/swap.h |   1 +
> >  mm/workingset.c      | 142 +++++++++++++++++++++++++++++--------------
> >  2 files changed, 97 insertions(+), 46 deletions(-)
> >
> > diff --git a/include/linux/swap.h b/include/linux/swap.h
> > index a18cf4b7c724..dae6f6f955eb 100644
> > --- a/include/linux/swap.h
> > +++ b/include/linux/swap.h
> > @@ -361,6 +361,7 @@ static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
> >  }
> >
> >  /* linux/mm/workingset.c */
> > +bool workingset_test_recent(void *shadow, bool file, bool *workingset);
> >  void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
> >  void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
> >  void workingset_refault(struct folio *folio, void *shadow);
> > diff --git a/mm/workingset.c b/mm/workingset.c
> > index 79585d55c45d..2f2d94867366 100644
> > --- a/mm/workingset.c
> > +++ b/mm/workingset.c
> > @@ -244,6 +244,30 @@ static void *lru_gen_eviction(struct folio *folio)
> >         return pack_shadow(mem_cgroup_id(memcg), pgdat, token, refs);
> >  }
> >
> > +/*
> > + * Test if the folio is recently evicted.
> > + *
> > + * As a side effect, also populates the references with
> > + * values unpacked from the shadow of the evicted folio.
> > + */
> > +static bool lru_gen_test_recent(void *shadow, bool file, int *memcgid,
> > +               struct pglist_data **pgdat, unsigned long *token, bool *workingset)
> > +{
> > +       struct mem_cgroup *eviction_memcg;
> > +       struct lruvec *lruvec;
> > +       struct lru_gen_struct *lrugen;
> > +       unsigned long min_seq;
> > +
> > +       unpack_shadow(shadow, memcgid, pgdat, token, workingset);
> > +       eviction_memcg = mem_cgroup_from_id(*memcgid);
> > +
> > +       lruvec = mem_cgroup_lruvec(eviction_memcg, *pgdat);
> > +       lrugen = &lruvec->lrugen;
> > +
> > +       min_seq = READ_ONCE(lrugen->min_seq[file]);
> > +       return (*token >> LRU_REFS_WIDTH) == (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH));
> > +}
> > +
> >  static void lru_gen_refault(struct folio *folio, void *shadow)
> >  {
> >         int hist, tier, refs;
> > @@ -258,23 +282,22 @@ static void lru_gen_refault(struct folio *folio, void *shadow)
> >         int type = folio_is_file_lru(folio);
> >         int delta = folio_nr_pages(folio);
> >
> > -       unpack_shadow(shadow, &memcg_id, &pgdat, &token, &workingset);
> > -
> > -       if (pgdat != folio_pgdat(folio))
> > -               return;
> > -
> >         rcu_read_lock();
> >
> > +       if (!lru_gen_test_recent(shadow, type, &memcg_id, &pgdat, &token,
> > +                       &workingset))
> > +               goto unlock;
> > +
> >         memcg = folio_memcg_rcu(folio);
> >         if (memcg_id != mem_cgroup_id(memcg))
> >                 goto unlock;
> >
> > +       if (pgdat != folio_pgdat(folio))
> > +               return;
> > +
> >         lruvec = mem_cgroup_lruvec(memcg, pgdat);
> >         lrugen = &lruvec->lrugen;
> > -
> >         min_seq = READ_ONCE(lrugen->min_seq[type]);
> > -       if ((token >> LRU_REFS_WIDTH) != (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH)))
> > -               goto unlock;
> >
> >         hist = lru_hist_from_seq(min_seq);
> >         /* see the comment in folio_lru_refs() */
> > @@ -306,6 +329,12 @@ static void *lru_gen_eviction(struct folio *folio)
> >         return NULL;
> >  }
> >
> > +static bool lru_gen_test_recent(void *shadow, bool file, int *memcgid,
> > +               struct pglist_data **pgdat, unsigned long *token, bool *workingset)
> > +{
> > +       return false;
> > +}
> > +
> >  static void lru_gen_refault(struct folio *folio, void *shadow)
> >  {
> >  }
> > @@ -373,40 +402,31 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
> >                                 folio_test_workingset(folio));
> >  }
> >
> > -/**
> > - * workingset_refault - Evaluate the refault of a previously evicted folio.
> > - * @folio: The freshly allocated replacement folio.
> > - * @shadow: Shadow entry of the evicted folio.
> > +/*
> > + * Test if the folio is recently evicted by checking if
> > + * refault distance of shadow exceeds workingset size.
> >   *
> > - * Calculates and evaluates the refault distance of the previously
> > - * evicted folio in the context of the node and the memcg whose memory
> > - * pressure caused the eviction.
> > + * As a side effect, populate workingset with the value
> > + * unpacked from shadow.
> >   */
> > -void workingset_refault(struct folio *folio, void *shadow)
> > +bool workingset_test_recent(void *shadow, bool file, bool *workingset)
> >  {
> > -       bool file = folio_is_file_lru(folio);
> >         struct mem_cgroup *eviction_memcg;
> >         struct lruvec *eviction_lruvec;
> >         unsigned long refault_distance;
> >         unsigned long workingset_size;
> > -       struct pglist_data *pgdat;
> > -       struct mem_cgroup *memcg;
> > -       unsigned long eviction;
> > -       struct lruvec *lruvec;
> >         unsigned long refault;
> > -       bool workingset;
> >         int memcgid;
> > -       long nr;
> > +       struct pglist_data *pgdat;
> > +       unsigned long eviction;
> >
> > -       if (lru_gen_enabled()) {
> > -               lru_gen_refault(folio, shadow);
> > -               return;
> > -       }
> > +       if (lru_gen_enabled())
> > +               lru_gen_test_recent(shadow, file, &memcgid, &pgdat, &eviction,
> > +                       workingset);
>
> Missing "return", which was correctly handled in your v2, btw.

Oops copy-paste hazard. I'll fix this...
  

Patch

diff --git a/include/linux/swap.h b/include/linux/swap.h
index a18cf4b7c724..dae6f6f955eb 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -361,6 +361,7 @@  static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
 }
 
 /* linux/mm/workingset.c */
+bool workingset_test_recent(void *shadow, bool file, bool *workingset);
 void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
 void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
 void workingset_refault(struct folio *folio, void *shadow);
diff --git a/mm/workingset.c b/mm/workingset.c
index 79585d55c45d..2f2d94867366 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -244,6 +244,30 @@  static void *lru_gen_eviction(struct folio *folio)
 	return pack_shadow(mem_cgroup_id(memcg), pgdat, token, refs);
 }
 
+/*
+ * Test if the folio is recently evicted.
+ *
+ * As a side effect, also populates the references with
+ * values unpacked from the shadow of the evicted folio.
+ */
+static bool lru_gen_test_recent(void *shadow, bool file, int *memcgid,
+		struct pglist_data **pgdat, unsigned long *token, bool *workingset)
+{
+	struct mem_cgroup *eviction_memcg;
+	struct lruvec *lruvec;
+	struct lru_gen_struct *lrugen;
+	unsigned long min_seq;
+
+	unpack_shadow(shadow, memcgid, pgdat, token, workingset);
+	eviction_memcg = mem_cgroup_from_id(*memcgid);
+
+	lruvec = mem_cgroup_lruvec(eviction_memcg, *pgdat);
+	lrugen = &lruvec->lrugen;
+
+	min_seq = READ_ONCE(lrugen->min_seq[file]);
+	return (*token >> LRU_REFS_WIDTH) == (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH));
+}
+
 static void lru_gen_refault(struct folio *folio, void *shadow)
 {
 	int hist, tier, refs;
@@ -258,23 +282,22 @@  static void lru_gen_refault(struct folio *folio, void *shadow)
 	int type = folio_is_file_lru(folio);
 	int delta = folio_nr_pages(folio);
 
-	unpack_shadow(shadow, &memcg_id, &pgdat, &token, &workingset);
-
-	if (pgdat != folio_pgdat(folio))
-		return;
-
 	rcu_read_lock();
 
+	if (!lru_gen_test_recent(shadow, type, &memcg_id, &pgdat, &token,
+			&workingset))
+		goto unlock;
+
 	memcg = folio_memcg_rcu(folio);
 	if (memcg_id != mem_cgroup_id(memcg))
 		goto unlock;
 
+	if (pgdat != folio_pgdat(folio))
+		return;
+
 	lruvec = mem_cgroup_lruvec(memcg, pgdat);
 	lrugen = &lruvec->lrugen;
-
 	min_seq = READ_ONCE(lrugen->min_seq[type]);
-	if ((token >> LRU_REFS_WIDTH) != (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH)))
-		goto unlock;
 
 	hist = lru_hist_from_seq(min_seq);
 	/* see the comment in folio_lru_refs() */
@@ -306,6 +329,12 @@  static void *lru_gen_eviction(struct folio *folio)
 	return NULL;
 }
 
+static bool lru_gen_test_recent(void *shadow, bool file, int *memcgid,
+		struct pglist_data **pgdat, unsigned long *token, bool *workingset)
+{
+	return false;
+}
+
 static void lru_gen_refault(struct folio *folio, void *shadow)
 {
 }
@@ -373,40 +402,31 @@  void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
 				folio_test_workingset(folio));
 }
 
-/**
- * workingset_refault - Evaluate the refault of a previously evicted folio.
- * @folio: The freshly allocated replacement folio.
- * @shadow: Shadow entry of the evicted folio.
+/*
+ * Test if the folio is recently evicted by checking if
+ * refault distance of shadow exceeds workingset size.
  *
- * Calculates and evaluates the refault distance of the previously
- * evicted folio in the context of the node and the memcg whose memory
- * pressure caused the eviction.
+ * As a side effect, populate workingset with the value
+ * unpacked from shadow.
  */
-void workingset_refault(struct folio *folio, void *shadow)
+bool workingset_test_recent(void *shadow, bool file, bool *workingset)
 {
-	bool file = folio_is_file_lru(folio);
 	struct mem_cgroup *eviction_memcg;
 	struct lruvec *eviction_lruvec;
 	unsigned long refault_distance;
 	unsigned long workingset_size;
-	struct pglist_data *pgdat;
-	struct mem_cgroup *memcg;
-	unsigned long eviction;
-	struct lruvec *lruvec;
 	unsigned long refault;
-	bool workingset;
 	int memcgid;
-	long nr;
+	struct pglist_data *pgdat;
+	unsigned long eviction;
 
-	if (lru_gen_enabled()) {
-		lru_gen_refault(folio, shadow);
-		return;
-	}
+	if (lru_gen_enabled())
+		lru_gen_test_recent(shadow, file, &memcgid, &pgdat, &eviction,
+			workingset);
 
-	unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
+	unpack_shadow(shadow, &memcgid, &pgdat, &eviction, workingset);
 	eviction <<= bucket_order;
 
-	rcu_read_lock();
 	/*
 	 * Look up the memcg associated with the stored ID. It might
 	 * have been deleted since the folio's eviction.
@@ -425,7 +445,8 @@  void workingset_refault(struct folio *folio, void *shadow)
 	 */
 	eviction_memcg = mem_cgroup_from_id(memcgid);
 	if (!mem_cgroup_disabled() && !eviction_memcg)
-		goto out;
+		return false;
+
 	eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
 	refault = atomic_long_read(&eviction_lruvec->nonresident_age);
 
@@ -447,21 +468,6 @@  void workingset_refault(struct folio *folio, void *shadow)
 	 */
 	refault_distance = (refault - eviction) & EVICTION_MASK;
 
-	/*
-	 * The activation decision for this folio is made at the level
-	 * where the eviction occurred, as that is where the LRU order
-	 * during folio reclaim is being determined.
-	 *
-	 * However, the cgroup that will own the folio is the one that
-	 * is actually experiencing the refault event.
-	 */
-	nr = folio_nr_pages(folio);
-	memcg = folio_memcg(folio);
-	pgdat = folio_pgdat(folio);
-	lruvec = mem_cgroup_lruvec(memcg, pgdat);
-
-	mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
-
 	mem_cgroup_flush_stats_delayed();
 	/*
 	 * Compare the distance to the existing workingset size. We
@@ -483,7 +489,51 @@  void workingset_refault(struct folio *folio, void *shadow)
 						     NR_INACTIVE_ANON);
 		}
 	}
-	if (refault_distance > workingset_size)
+
+	return refault_distance <= workingset_size;
+}
+
+/**
+ * workingset_refault - Evaluate the refault of a previously evicted folio.
+ * @folio: The freshly allocated replacement folio.
+ * @shadow: Shadow entry of the evicted folio.
+ *
+ * Calculates and evaluates the refault distance of the previously
+ * evicted folio in the context of the node and the memcg whose memory
+ * pressure caused the eviction.
+ */
+void workingset_refault(struct folio *folio, void *shadow)
+{
+	bool file = folio_is_file_lru(folio);
+	struct pglist_data *pgdat;
+	struct mem_cgroup *memcg;
+	struct lruvec *lruvec;
+	bool workingset;
+	long nr;
+
+	if (lru_gen_enabled()) {
+		lru_gen_refault(folio, shadow);
+		return;
+	}
+
+	rcu_read_lock();
+
+	/*
+	 * The activation decision for this folio is made at the level
+	 * where the eviction occurred, as that is where the LRU order
+	 * during folio reclaim is being determined.
+	 *
+	 * However, the cgroup that will own the folio is the one that
+	 * is actually experiencing the refault event.
+	 */
+	nr = folio_nr_pages(folio);
+	memcg = folio_memcg(folio);
+	pgdat = folio_pgdat(folio);
+	lruvec = mem_cgroup_lruvec(memcg, pgdat);
+
+	mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
+
+	if (!workingset_test_recent(shadow, file, &workingset))
 		goto out;
 
 	folio_set_active(folio);