[v7,1/3] workingset: refactor LRU refault to expose refault recency check
Commit Message
In preparation for computing recently evicted pages in cachestat,
refactor workingset_refault and lru_gen_refault to expose a helper
function that would test if an evicted page is recently evicted.
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
include/linux/swap.h | 1 +
mm/workingset.c | 142 +++++++++++++++++++++++++++++--------------
2 files changed, 97 insertions(+), 46 deletions(-)
Comments
On Mon, Jan 23, 2023 at 7:11 PM Nhat Pham <nphamcs@gmail.com> wrote:
>
> In preparation for computing recently evicted pages in cachestat,
> refactor workingset_refault and lru_gen_refault to expose a helper
> function that would test if an evicted page is recently evicted.
>
> Signed-off-by: Nhat Pham <nphamcs@gmail.com>
> ---
> include/linux/swap.h | 1 +
> mm/workingset.c | 142 +++++++++++++++++++++++++++++--------------
> 2 files changed, 97 insertions(+), 46 deletions(-)
>
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index a18cf4b7c724..dae6f6f955eb 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -361,6 +361,7 @@ static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
> }
>
> /* linux/mm/workingset.c */
> +bool workingset_test_recent(void *shadow, bool file, bool *workingset);
> void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
> void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
> void workingset_refault(struct folio *folio, void *shadow);
> diff --git a/mm/workingset.c b/mm/workingset.c
> index 79585d55c45d..2f2d94867366 100644
> --- a/mm/workingset.c
> +++ b/mm/workingset.c
> @@ -244,6 +244,30 @@ static void *lru_gen_eviction(struct folio *folio)
> return pack_shadow(mem_cgroup_id(memcg), pgdat, token, refs);
> }
>
> +/*
> + * Test if the folio is recently evicted.
> + *
> + * As a side effect, also populates the references with
> + * values unpacked from the shadow of the evicted folio.
> + */
> +static bool lru_gen_test_recent(void *shadow, bool file, int *memcgid,
> + struct pglist_data **pgdat, unsigned long *token, bool *workingset)
> +{
> + struct mem_cgroup *eviction_memcg;
> + struct lruvec *lruvec;
> + struct lru_gen_struct *lrugen;
> + unsigned long min_seq;
> +
> + unpack_shadow(shadow, memcgid, pgdat, token, workingset);
> + eviction_memcg = mem_cgroup_from_id(*memcgid);
> +
> + lruvec = mem_cgroup_lruvec(eviction_memcg, *pgdat);
> + lrugen = &lruvec->lrugen;
> +
> + min_seq = READ_ONCE(lrugen->min_seq[file]);
> + return (*token >> LRU_REFS_WIDTH) == (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH));
> +}
> +
> static void lru_gen_refault(struct folio *folio, void *shadow)
> {
> int hist, tier, refs;
> @@ -258,23 +282,22 @@ static void lru_gen_refault(struct folio *folio, void *shadow)
> int type = folio_is_file_lru(folio);
> int delta = folio_nr_pages(folio);
>
> - unpack_shadow(shadow, &memcg_id, &pgdat, &token, &workingset);
> -
> - if (pgdat != folio_pgdat(folio))
> - return;
> -
> rcu_read_lock();
>
> + if (!lru_gen_test_recent(shadow, type, &memcg_id, &pgdat, &token,
> + &workingset))
> + goto unlock;
> +
> memcg = folio_memcg_rcu(folio);
> if (memcg_id != mem_cgroup_id(memcg))
> goto unlock;
>
> + if (pgdat != folio_pgdat(folio))
> + return;
> +
> lruvec = mem_cgroup_lruvec(memcg, pgdat);
> lrugen = &lruvec->lrugen;
> -
> min_seq = READ_ONCE(lrugen->min_seq[type]);
> - if ((token >> LRU_REFS_WIDTH) != (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH)))
> - goto unlock;
>
> hist = lru_hist_from_seq(min_seq);
> /* see the comment in folio_lru_refs() */
> @@ -306,6 +329,12 @@ static void *lru_gen_eviction(struct folio *folio)
> return NULL;
> }
>
> +static bool lru_gen_test_recent(void *shadow, bool file, int *memcgid,
> + struct pglist_data **pgdat, unsigned long *token, bool *workingset)
> +{
> + return false;
> +}
> +
> static void lru_gen_refault(struct folio *folio, void *shadow)
> {
> }
> @@ -373,40 +402,31 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
> folio_test_workingset(folio));
> }
>
> -/**
> - * workingset_refault - Evaluate the refault of a previously evicted folio.
> - * @folio: The freshly allocated replacement folio.
> - * @shadow: Shadow entry of the evicted folio.
> +/*
> + * Test if the folio is recently evicted by checking if
> + * refault distance of shadow exceeds workingset size.
> *
> - * Calculates and evaluates the refault distance of the previously
> - * evicted folio in the context of the node and the memcg whose memory
> - * pressure caused the eviction.
> + * As a side effect, populate workingset with the value
> + * unpacked from shadow.
> */
> -void workingset_refault(struct folio *folio, void *shadow)
> +bool workingset_test_recent(void *shadow, bool file, bool *workingset)
> {
> - bool file = folio_is_file_lru(folio);
> struct mem_cgroup *eviction_memcg;
> struct lruvec *eviction_lruvec;
> unsigned long refault_distance;
> unsigned long workingset_size;
> - struct pglist_data *pgdat;
> - struct mem_cgroup *memcg;
> - unsigned long eviction;
> - struct lruvec *lruvec;
> unsigned long refault;
> - bool workingset;
> int memcgid;
> - long nr;
> + struct pglist_data *pgdat;
> + unsigned long eviction;
>
> - if (lru_gen_enabled()) {
> - lru_gen_refault(folio, shadow);
> - return;
> - }
> + if (lru_gen_enabled())
> + lru_gen_test_recent(shadow, file, &memcgid, &pgdat, &eviction,
> + workingset);
Missing "return", which was correctly handled in your v2, btw.
On Wed, Jan 25, 2023 at 1:13 PM Yu Zhao <yuzhao@google.com> wrote:
>
> On Mon, Jan 23, 2023 at 7:11 PM Nhat Pham <nphamcs@gmail.com> wrote:
> >
> > In preparation for computing recently evicted pages in cachestat,
> > refactor workingset_refault and lru_gen_refault to expose a helper
> > function that would test if an evicted page is recently evicted.
> >
> > Signed-off-by: Nhat Pham <nphamcs@gmail.com>
> > ---
> > include/linux/swap.h | 1 +
> > mm/workingset.c | 142 +++++++++++++++++++++++++++++--------------
> > 2 files changed, 97 insertions(+), 46 deletions(-)
> >
> > diff --git a/include/linux/swap.h b/include/linux/swap.h
> > index a18cf4b7c724..dae6f6f955eb 100644
> > --- a/include/linux/swap.h
> > +++ b/include/linux/swap.h
> > @@ -361,6 +361,7 @@ static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
> > }
> >
> > /* linux/mm/workingset.c */
> > +bool workingset_test_recent(void *shadow, bool file, bool *workingset);
> > void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
> > void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
> > void workingset_refault(struct folio *folio, void *shadow);
> > diff --git a/mm/workingset.c b/mm/workingset.c
> > index 79585d55c45d..2f2d94867366 100644
> > --- a/mm/workingset.c
> > +++ b/mm/workingset.c
> > @@ -244,6 +244,30 @@ static void *lru_gen_eviction(struct folio *folio)
> > return pack_shadow(mem_cgroup_id(memcg), pgdat, token, refs);
> > }
> >
> > +/*
> > + * Test if the folio is recently evicted.
> > + *
> > + * As a side effect, also populates the references with
> > + * values unpacked from the shadow of the evicted folio.
> > + */
> > +static bool lru_gen_test_recent(void *shadow, bool file, int *memcgid,
> > + struct pglist_data **pgdat, unsigned long *token, bool *workingset)
> > +{
> > + struct mem_cgroup *eviction_memcg;
> > + struct lruvec *lruvec;
> > + struct lru_gen_struct *lrugen;
> > + unsigned long min_seq;
> > +
> > + unpack_shadow(shadow, memcgid, pgdat, token, workingset);
> > + eviction_memcg = mem_cgroup_from_id(*memcgid);
> > +
> > + lruvec = mem_cgroup_lruvec(eviction_memcg, *pgdat);
> > + lrugen = &lruvec->lrugen;
> > +
> > + min_seq = READ_ONCE(lrugen->min_seq[file]);
> > + return (*token >> LRU_REFS_WIDTH) == (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH));
> > +}
> > +
> > static void lru_gen_refault(struct folio *folio, void *shadow)
> > {
> > int hist, tier, refs;
> > @@ -258,23 +282,22 @@ static void lru_gen_refault(struct folio *folio, void *shadow)
> > int type = folio_is_file_lru(folio);
> > int delta = folio_nr_pages(folio);
> >
> > - unpack_shadow(shadow, &memcg_id, &pgdat, &token, &workingset);
> > -
> > - if (pgdat != folio_pgdat(folio))
> > - return;
> > -
> > rcu_read_lock();
> >
> > + if (!lru_gen_test_recent(shadow, type, &memcg_id, &pgdat, &token,
> > + &workingset))
> > + goto unlock;
> > +
> > memcg = folio_memcg_rcu(folio);
> > if (memcg_id != mem_cgroup_id(memcg))
> > goto unlock;
> >
> > + if (pgdat != folio_pgdat(folio))
> > + return;
> > +
> > lruvec = mem_cgroup_lruvec(memcg, pgdat);
> > lrugen = &lruvec->lrugen;
> > -
> > min_seq = READ_ONCE(lrugen->min_seq[type]);
> > - if ((token >> LRU_REFS_WIDTH) != (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH)))
> > - goto unlock;
> >
> > hist = lru_hist_from_seq(min_seq);
> > /* see the comment in folio_lru_refs() */
> > @@ -306,6 +329,12 @@ static void *lru_gen_eviction(struct folio *folio)
> > return NULL;
> > }
> >
> > +static bool lru_gen_test_recent(void *shadow, bool file, int *memcgid,
> > + struct pglist_data **pgdat, unsigned long *token, bool *workingset)
> > +{
> > + return false;
> > +}
> > +
> > static void lru_gen_refault(struct folio *folio, void *shadow)
> > {
> > }
> > @@ -373,40 +402,31 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
> > folio_test_workingset(folio));
> > }
> >
> > -/**
> > - * workingset_refault - Evaluate the refault of a previously evicted folio.
> > - * @folio: The freshly allocated replacement folio.
> > - * @shadow: Shadow entry of the evicted folio.
> > +/*
> > + * Test if the folio is recently evicted by checking if
> > + * refault distance of shadow exceeds workingset size.
> > *
> > - * Calculates and evaluates the refault distance of the previously
> > - * evicted folio in the context of the node and the memcg whose memory
> > - * pressure caused the eviction.
> > + * As a side effect, populate workingset with the value
> > + * unpacked from shadow.
> > */
> > -void workingset_refault(struct folio *folio, void *shadow)
> > +bool workingset_test_recent(void *shadow, bool file, bool *workingset)
> > {
> > - bool file = folio_is_file_lru(folio);
> > struct mem_cgroup *eviction_memcg;
> > struct lruvec *eviction_lruvec;
> > unsigned long refault_distance;
> > unsigned long workingset_size;
> > - struct pglist_data *pgdat;
> > - struct mem_cgroup *memcg;
> > - unsigned long eviction;
> > - struct lruvec *lruvec;
> > unsigned long refault;
> > - bool workingset;
> > int memcgid;
> > - long nr;
> > + struct pglist_data *pgdat;
> > + unsigned long eviction;
> >
> > - if (lru_gen_enabled()) {
> > - lru_gen_refault(folio, shadow);
> > - return;
> > - }
> > + if (lru_gen_enabled())
> > + lru_gen_test_recent(shadow, file, &memcgid, &pgdat, &eviction,
> > + workingset);
>
> Missing "return", which was correctly handled in your v2, btw.
Oops copy-paste hazard. I'll fix this...
@@ -361,6 +361,7 @@ static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
}
/* linux/mm/workingset.c */
+bool workingset_test_recent(void *shadow, bool file, bool *workingset);
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
void workingset_refault(struct folio *folio, void *shadow);
@@ -244,6 +244,30 @@ static void *lru_gen_eviction(struct folio *folio)
return pack_shadow(mem_cgroup_id(memcg), pgdat, token, refs);
}
+/*
+ * Test if the folio is recently evicted.
+ *
+ * As a side effect, also populates the references with
+ * values unpacked from the shadow of the evicted folio.
+ */
+static bool lru_gen_test_recent(void *shadow, bool file, int *memcgid,
+ struct pglist_data **pgdat, unsigned long *token, bool *workingset)
+{
+ struct mem_cgroup *eviction_memcg;
+ struct lruvec *lruvec;
+ struct lru_gen_struct *lrugen;
+ unsigned long min_seq;
+
+ unpack_shadow(shadow, memcgid, pgdat, token, workingset);
+ eviction_memcg = mem_cgroup_from_id(*memcgid);
+
+ lruvec = mem_cgroup_lruvec(eviction_memcg, *pgdat);
+ lrugen = &lruvec->lrugen;
+
+ min_seq = READ_ONCE(lrugen->min_seq[file]);
+ return (*token >> LRU_REFS_WIDTH) == (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH));
+}
+
static void lru_gen_refault(struct folio *folio, void *shadow)
{
int hist, tier, refs;
@@ -258,23 +282,22 @@ static void lru_gen_refault(struct folio *folio, void *shadow)
int type = folio_is_file_lru(folio);
int delta = folio_nr_pages(folio);
- unpack_shadow(shadow, &memcg_id, &pgdat, &token, &workingset);
-
- if (pgdat != folio_pgdat(folio))
- return;
-
rcu_read_lock();
+ if (!lru_gen_test_recent(shadow, type, &memcg_id, &pgdat, &token,
+ &workingset))
+ goto unlock;
+
memcg = folio_memcg_rcu(folio);
if (memcg_id != mem_cgroup_id(memcg))
goto unlock;
+ if (pgdat != folio_pgdat(folio))
+ return;
+
lruvec = mem_cgroup_lruvec(memcg, pgdat);
lrugen = &lruvec->lrugen;
-
min_seq = READ_ONCE(lrugen->min_seq[type]);
- if ((token >> LRU_REFS_WIDTH) != (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH)))
- goto unlock;
hist = lru_hist_from_seq(min_seq);
/* see the comment in folio_lru_refs() */
@@ -306,6 +329,12 @@ static void *lru_gen_eviction(struct folio *folio)
return NULL;
}
+static bool lru_gen_test_recent(void *shadow, bool file, int *memcgid,
+ struct pglist_data **pgdat, unsigned long *token, bool *workingset)
+{
+ return false;
+}
+
static void lru_gen_refault(struct folio *folio, void *shadow)
{
}
@@ -373,40 +402,31 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
folio_test_workingset(folio));
}
-/**
- * workingset_refault - Evaluate the refault of a previously evicted folio.
- * @folio: The freshly allocated replacement folio.
- * @shadow: Shadow entry of the evicted folio.
+/*
+ * Test if the folio is recently evicted by checking if
+ * refault distance of shadow exceeds workingset size.
*
- * Calculates and evaluates the refault distance of the previously
- * evicted folio in the context of the node and the memcg whose memory
- * pressure caused the eviction.
+ * As a side effect, populate workingset with the value
+ * unpacked from shadow.
*/
-void workingset_refault(struct folio *folio, void *shadow)
+bool workingset_test_recent(void *shadow, bool file, bool *workingset)
{
- bool file = folio_is_file_lru(folio);
struct mem_cgroup *eviction_memcg;
struct lruvec *eviction_lruvec;
unsigned long refault_distance;
unsigned long workingset_size;
- struct pglist_data *pgdat;
- struct mem_cgroup *memcg;
- unsigned long eviction;
- struct lruvec *lruvec;
unsigned long refault;
- bool workingset;
int memcgid;
- long nr;
+ struct pglist_data *pgdat;
+ unsigned long eviction;
- if (lru_gen_enabled()) {
- lru_gen_refault(folio, shadow);
- return;
- }
+ if (lru_gen_enabled())
+ lru_gen_test_recent(shadow, file, &memcgid, &pgdat, &eviction,
+ workingset);
- unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
+ unpack_shadow(shadow, &memcgid, &pgdat, &eviction, workingset);
eviction <<= bucket_order;
- rcu_read_lock();
/*
* Look up the memcg associated with the stored ID. It might
* have been deleted since the folio's eviction.
@@ -425,7 +445,8 @@ void workingset_refault(struct folio *folio, void *shadow)
*/
eviction_memcg = mem_cgroup_from_id(memcgid);
if (!mem_cgroup_disabled() && !eviction_memcg)
- goto out;
+ return false;
+
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
refault = atomic_long_read(&eviction_lruvec->nonresident_age);
@@ -447,21 +468,6 @@ void workingset_refault(struct folio *folio, void *shadow)
*/
refault_distance = (refault - eviction) & EVICTION_MASK;
- /*
- * The activation decision for this folio is made at the level
- * where the eviction occurred, as that is where the LRU order
- * during folio reclaim is being determined.
- *
- * However, the cgroup that will own the folio is the one that
- * is actually experiencing the refault event.
- */
- nr = folio_nr_pages(folio);
- memcg = folio_memcg(folio);
- pgdat = folio_pgdat(folio);
- lruvec = mem_cgroup_lruvec(memcg, pgdat);
-
- mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
-
mem_cgroup_flush_stats_delayed();
/*
* Compare the distance to the existing workingset size. We
@@ -483,7 +489,51 @@ void workingset_refault(struct folio *folio, void *shadow)
NR_INACTIVE_ANON);
}
}
- if (refault_distance > workingset_size)
+
+ return refault_distance <= workingset_size;
+}
+
+/**
+ * workingset_refault - Evaluate the refault of a previously evicted folio.
+ * @folio: The freshly allocated replacement folio.
+ * @shadow: Shadow entry of the evicted folio.
+ *
+ * Calculates and evaluates the refault distance of the previously
+ * evicted folio in the context of the node and the memcg whose memory
+ * pressure caused the eviction.
+ */
+void workingset_refault(struct folio *folio, void *shadow)
+{
+ bool file = folio_is_file_lru(folio);
+ struct pglist_data *pgdat;
+ struct mem_cgroup *memcg;
+ struct lruvec *lruvec;
+ bool workingset;
+ long nr;
+
+ if (lru_gen_enabled()) {
+ lru_gen_refault(folio, shadow);
+ return;
+ }
+
+ rcu_read_lock();
+
+ /*
+ * The activation decision for this folio is made at the level
+ * where the eviction occurred, as that is where the LRU order
+ * during folio reclaim is being determined.
+ *
+ * However, the cgroup that will own the folio is the one that
+ * is actually experiencing the refault event.
+ */
+ nr = folio_nr_pages(folio);
+ memcg = folio_memcg(folio);
+ pgdat = folio_pgdat(folio);
+ lruvec = mem_cgroup_lruvec(memcg, pgdat);
+
+ mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
+
+ if (!workingset_test_recent(shadow, file, &workingset))
goto out;
folio_set_active(folio);