[v7,4/6] zsmalloc: Add a LRU to zs_pool to keep track of zspages in LRU order

Message ID 20221128191616.1261026-5-nphamcs@gmail.com
State New
Headers
Series Implement writeback for zsmalloc |

Commit Message

Nhat Pham Nov. 28, 2022, 7:16 p.m. UTC
  This helps determines the coldest zspages as candidates for writeback.

Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
 mm/zsmalloc.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

--
2.30.2
  

Comments

Johannes Weiner Nov. 28, 2022, 7:25 p.m. UTC | #1
On Mon, Nov 28, 2022 at 11:16:13AM -0800, Nhat Pham wrote:
> This helps determines the coldest zspages as candidates for writeback.
> 
> Signed-off-by: Nhat Pham <nphamcs@gmail.com>

Acked-by: Johannes Weiner <hannes@cmpxchg.org>
  
Sergey Senozhatsky Nov. 29, 2022, 3:50 a.m. UTC | #2
On (22/11/28 11:16), Nhat Pham wrote:
> This helps determines the coldest zspages as candidates for writeback.
> 
> Signed-off-by: Nhat Pham <nphamcs@gmail.com>

Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
  
Vitaly Wool Nov. 29, 2022, 11:53 a.m. UTC | #3
On Mon, Nov 28, 2022 at 8:16 PM Nhat Pham <nphamcs@gmail.com> wrote:
>
> This helps determines the coldest zspages as candidates for writeback.
>
> Signed-off-by: Nhat Pham <nphamcs@gmail.com>
> ---
>  mm/zsmalloc.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 50 insertions(+)
>
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 5427a00a0518..b1bc231d94a3 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -239,6 +239,11 @@ struct zs_pool {
>         /* Compact classes */
>         struct shrinker shrinker;
>
> +#ifdef CONFIG_ZPOOL
> +       /* List tracking the zspages in LRU order by most recently added object */
> +       struct list_head lru;
> +#endif
> +
>  #ifdef CONFIG_ZSMALLOC_STAT
>         struct dentry *stat_dentry;
>  #endif
> @@ -260,6 +265,12 @@ struct zspage {
>         unsigned int freeobj;
>         struct page *first_page;
>         struct list_head list; /* fullness list */
> +
> +#ifdef CONFIG_ZPOOL
> +       /* links the zspage to the lru list in the pool */
> +       struct list_head lru;
> +#endif
> +
>         struct zs_pool *pool;
>  #ifdef CONFIG_COMPACTION
>         rwlock_t lock;
> @@ -953,6 +964,9 @@ static void free_zspage(struct zs_pool *pool, struct size_class *class,
>         }
>
>         remove_zspage(class, zspage, ZS_EMPTY);
> +#ifdef CONFIG_ZPOOL
> +       list_del(&zspage->lru);
> +#endif
>         __free_zspage(pool, class, zspage);
>  }
>
> @@ -998,6 +1012,10 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)
>                 off %= PAGE_SIZE;
>         }
>
> +#ifdef CONFIG_ZPOOL
> +       INIT_LIST_HEAD(&zspage->lru);
> +#endif
> +
>         set_freeobj(zspage, 0);
>  }
>
> @@ -1270,6 +1288,31 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
>         obj_to_location(obj, &page, &obj_idx);
>         zspage = get_zspage(page);
>
> +#ifdef CONFIG_ZPOOL
> +       /*
> +        * Move the zspage to front of pool's LRU.
> +        *
> +        * Note that this is swap-specific, so by definition there are no ongoing
> +        * accesses to the memory while the page is swapped out that would make
> +        * it "hot". A new entry is hot, then ages to the tail until it gets either
> +        * written back or swaps back in.
> +        *
> +        * Furthermore, map is also called during writeback. We must not put an
> +        * isolated page on the LRU mid-reclaim.
> +        *
> +        * As a result, only update the LRU when the page is mapped for write
> +        * when it's first instantiated.
> +        *
> +        * This is a deviation from the other backends, which perform this update
> +        * in the allocation function (zbud_alloc, z3fold_alloc).
> +        */
> +       if (mm == ZS_MM_WO) {
> +               if (!list_empty(&zspage->lru))
> +                       list_del(&zspage->lru);
> +               list_add(&zspage->lru, &pool->lru);
> +       }
> +#endif
> +
>         /*
>          * migration cannot move any zpages in this zspage. Here, pool->lock
>          * is too heavy since callers would take some time until they calls
> @@ -1988,6 +2031,9 @@ static void async_free_zspage(struct work_struct *work)
>                 VM_BUG_ON(fullness != ZS_EMPTY);
>                 class = pool->size_class[class_idx];
>                 spin_lock(&pool->lock);
> +#ifdef CONFIG_ZPOOL
> +               list_del(&zspage->lru);
> +#endif
>                 __free_zspage(pool, class, zspage);
>                 spin_unlock(&pool->lock);
>         }
> @@ -2299,6 +2345,10 @@ struct zs_pool *zs_create_pool(const char *name)
>          */
>         zs_register_shrinker(pool);
>
> +#ifdef CONFIG_ZPOOL
> +       INIT_LIST_HEAD(&pool->lru);
> +#endif

I think the amount of #ifdefs here becomes absolutely overwhelming.
Not that zsmalloc code was very readable before, but now it is
starting to look like a plain disaster.

Thanks,
Vitaly

> +
>         return pool;
>
>  err:
> --
> 2.30.2
  
Sergey Senozhatsky Nov. 29, 2022, 2:03 p.m. UTC | #4
On (22/11/29 12:53), Vitaly Wool wrote:
> I think the amount of #ifdefs here becomes absolutely overwhelming.
> Not that zsmalloc code was very readable before, but now it is
> starting to look like a plain disaster.

Presumably most of them will go away once LRU moved from
allocator to upper level.
  
Johannes Weiner Nov. 29, 2022, 3:54 p.m. UTC | #5
On Tue, Nov 29, 2022 at 11:03:45PM +0900, Sergey Senozhatsky wrote:
> On (22/11/29 12:53), Vitaly Wool wrote:
> > I think the amount of #ifdefs here becomes absolutely overwhelming.
> > Not that zsmalloc code was very readable before, but now it is
> > starting to look like a plain disaster.
> 
> Presumably most of them will go away once LRU moved from
> allocator to upper level.

Yes consider it the "cut here" lines for refactoring the LRU into
zswap, which we want to do next. They're not here to stay, and that
work will remove a lot of duplication and complexity from all
backends. So I hope it's acceptable as a transitionary state.
  
Vitaly Wool Nov. 30, 2022, 3:23 p.m. UTC | #6
On Tue, Nov 29, 2022 at 4:54 PM Johannes Weiner <hannes@cmpxchg.org> wrote:
>
> On Tue, Nov 29, 2022 at 11:03:45PM +0900, Sergey Senozhatsky wrote:
> > On (22/11/29 12:53), Vitaly Wool wrote:
> > > I think the amount of #ifdefs here becomes absolutely overwhelming.
> > > Not that zsmalloc code was very readable before, but now it is
> > > starting to look like a plain disaster.
> >
> > Presumably most of them will go away once LRU moved from
> > allocator to upper level.
>
> Yes consider it the "cut here" lines for refactoring the LRU into
> zswap, which we want to do next. They're not here to stay, and that
> work will remove a lot of duplication and complexity from all
> backends. So I hope it's acceptable as a transitionary state.

I see, thanks for the explanation.

~Vitaly
  

Patch

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 5427a00a0518..b1bc231d94a3 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -239,6 +239,11 @@  struct zs_pool {
 	/* Compact classes */
 	struct shrinker shrinker;

+#ifdef CONFIG_ZPOOL
+	/* List tracking the zspages in LRU order by most recently added object */
+	struct list_head lru;
+#endif
+
 #ifdef CONFIG_ZSMALLOC_STAT
 	struct dentry *stat_dentry;
 #endif
@@ -260,6 +265,12 @@  struct zspage {
 	unsigned int freeobj;
 	struct page *first_page;
 	struct list_head list; /* fullness list */
+
+#ifdef CONFIG_ZPOOL
+	/* links the zspage to the lru list in the pool */
+	struct list_head lru;
+#endif
+
 	struct zs_pool *pool;
 #ifdef CONFIG_COMPACTION
 	rwlock_t lock;
@@ -953,6 +964,9 @@  static void free_zspage(struct zs_pool *pool, struct size_class *class,
 	}

 	remove_zspage(class, zspage, ZS_EMPTY);
+#ifdef CONFIG_ZPOOL
+	list_del(&zspage->lru);
+#endif
 	__free_zspage(pool, class, zspage);
 }

@@ -998,6 +1012,10 @@  static void init_zspage(struct size_class *class, struct zspage *zspage)
 		off %= PAGE_SIZE;
 	}

+#ifdef CONFIG_ZPOOL
+	INIT_LIST_HEAD(&zspage->lru);
+#endif
+
 	set_freeobj(zspage, 0);
 }

@@ -1270,6 +1288,31 @@  void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 	obj_to_location(obj, &page, &obj_idx);
 	zspage = get_zspage(page);

+#ifdef CONFIG_ZPOOL
+	/*
+	 * Move the zspage to front of pool's LRU.
+	 *
+	 * Note that this is swap-specific, so by definition there are no ongoing
+	 * accesses to the memory while the page is swapped out that would make
+	 * it "hot". A new entry is hot, then ages to the tail until it gets either
+	 * written back or swaps back in.
+	 *
+	 * Furthermore, map is also called during writeback. We must not put an
+	 * isolated page on the LRU mid-reclaim.
+	 *
+	 * As a result, only update the LRU when the page is mapped for write
+	 * when it's first instantiated.
+	 *
+	 * This is a deviation from the other backends, which perform this update
+	 * in the allocation function (zbud_alloc, z3fold_alloc).
+	 */
+	if (mm == ZS_MM_WO) {
+		if (!list_empty(&zspage->lru))
+			list_del(&zspage->lru);
+		list_add(&zspage->lru, &pool->lru);
+	}
+#endif
+
 	/*
 	 * migration cannot move any zpages in this zspage. Here, pool->lock
 	 * is too heavy since callers would take some time until they calls
@@ -1988,6 +2031,9 @@  static void async_free_zspage(struct work_struct *work)
 		VM_BUG_ON(fullness != ZS_EMPTY);
 		class = pool->size_class[class_idx];
 		spin_lock(&pool->lock);
+#ifdef CONFIG_ZPOOL
+		list_del(&zspage->lru);
+#endif
 		__free_zspage(pool, class, zspage);
 		spin_unlock(&pool->lock);
 	}
@@ -2299,6 +2345,10 @@  struct zs_pool *zs_create_pool(const char *name)
 	 */
 	zs_register_shrinker(pool);

+#ifdef CONFIG_ZPOOL
+	INIT_LIST_HEAD(&pool->lru);
+#endif
+
 	return pool;

 err: