[v3,1/5] mm: list_lru: allow external numa node and cgroup tracking
Commit Message
The interface of list_lru is based on the assumption that objects are
allocated on the correct node/memcg, with this change it is introduced the
possibility to explicitly specify numa node and memcgroup when adding and
removing objects. This is so that users of list_lru can track node/memcg
of the items outside of the list_lru, like in zswap, where the allocations
can be made by kswapd for data that's charged to a different cgroup.
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
include/linux/list_lru.h | 38 +++++++++++++++++++++++++++++++++++
mm/list_lru.c | 43 +++++++++++++++++++++++++++++++++++-----
2 files changed, 76 insertions(+), 5 deletions(-)
Comments
On Tue, Oct 17, 2023 at 4:21 PM Nhat Pham <nphamcs@gmail.com> wrote:
>
> The interface of list_lru is based on the assumption that objects are
> allocated on the correct node/memcg, with this change it is introduced the
> possibility to explicitly specify numa node and memcgroup when adding and
> removing objects. This is so that users of list_lru can track node/memcg
> of the items outside of the list_lru, like in zswap, where the allocations
> can be made by kswapd for data that's charged to a different cgroup.
>
> Signed-off-by: Nhat Pham <nphamcs@gmail.com>
I prefer what Johannes suggested, making list_lru_add() and friends
take in the memcg and nid, and add list_lru_add_obj() (or similar) and
friends that assume the object is on the right node and memcg. This is
clearer and more explicit imo. I am not very familiar with list_lrus
though, so I'll leave this to folks who actually are.
> ---
> include/linux/list_lru.h | 38 +++++++++++++++++++++++++++++++++++
> mm/list_lru.c | 43 +++++++++++++++++++++++++++++++++++-----
> 2 files changed, 76 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
> index b35968ee9fb5..0f5f39cacbbb 100644
> --- a/include/linux/list_lru.h
> +++ b/include/linux/list_lru.h
> @@ -89,6 +89,24 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren
> */
> bool list_lru_add(struct list_lru *lru, struct list_head *item);
>
> +/**
> + * __list_lru_add: add an element to a specific sublist.
> + * @list_lru: the lru pointer
> + * @item: the item to be added.
> + * @memcg: the cgroup of the sublist to add the item to.
> + * @nid: the node id of the sublist to add the item to.
> + *
> + * This function is similar to list_lru_add(), but it allows the caller to
> + * specify the sublist to which the item should be added. This can be useful
> + * when the list_head node is not necessarily in the same cgroup and NUMA node
> + * as the data it represents, such as zswap, where the list_head node could be
> + * from kswapd and the data from a different cgroup altogether.
> + *
> + * Return value: true if the list was updated, false otherwise
> + */
> +bool __list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
> + struct mem_cgroup *memcg);
> +
> /**
> * list_lru_del: delete an element to the lru list
> * @list_lru: the lru pointer
> @@ -102,6 +120,18 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item);
> */
> bool list_lru_del(struct list_lru *lru, struct list_head *item);
>
> +/**
> + * __list_lru_del: delete an element from a specific sublist.
> + * @list_lru: the lru pointer
> + * @item: the item to be deleted.
> + * @memcg: the cgroup of the sublist to delete the item from.
> + * @nid: the node id of the sublist to delete the item from.
> + *
> + * Return value: true if the list was updated, false otherwise.
> + */
> +bool __list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
> + struct mem_cgroup *memcg);
> +
> /**
> * list_lru_count_one: return the number of objects currently held by @lru
> * @lru: the lru pointer.
> @@ -136,6 +166,14 @@ static inline unsigned long list_lru_count(struct list_lru *lru)
> void list_lru_isolate(struct list_lru_one *list, struct list_head *item);
> void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
> struct list_head *head);
> +/*
> + * list_lru_putback: undo list_lru_isolate.
> + *
> + * Since we might have dropped the LRU lock in between, recompute list_lru_one
> + * from the node's id and memcg.
> + */
> +void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid,
> + struct mem_cgroup *memcg);
>
> typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item,
> struct list_lru_one *list, spinlock_t *lock, void *cb_arg);
> diff --git a/mm/list_lru.c b/mm/list_lru.c
> index a05e5bef3b40..63b75163c6ad 100644
> --- a/mm/list_lru.c
> +++ b/mm/list_lru.c
> @@ -119,13 +119,22 @@ list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr,
> bool list_lru_add(struct list_lru *lru, struct list_head *item)
> {
> int nid = page_to_nid(virt_to_page(item));
> + struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ?
> + mem_cgroup_from_slab_obj(item) : NULL;
> +
> + return __list_lru_add(lru, item, nid, memcg);
> +}
> +EXPORT_SYMBOL_GPL(list_lru_add);
> +
> +bool __list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
> + struct mem_cgroup *memcg)
> +{
> struct list_lru_node *nlru = &lru->node[nid];
> - struct mem_cgroup *memcg;
> struct list_lru_one *l;
>
> spin_lock(&nlru->lock);
> if (list_empty(item)) {
> - l = list_lru_from_kmem(lru, nid, item, &memcg);
> + l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
> list_add_tail(item, &l->list);
> /* Set shrinker bit if the first element was added */
> if (!l->nr_items++)
> @@ -138,17 +147,27 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item)
> spin_unlock(&nlru->lock);
> return false;
> }
> -EXPORT_SYMBOL_GPL(list_lru_add);
> +EXPORT_SYMBOL_GPL(__list_lru_add);
>
> bool list_lru_del(struct list_lru *lru, struct list_head *item)
> {
> int nid = page_to_nid(virt_to_page(item));
> + struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ?
> + mem_cgroup_from_slab_obj(item) : NULL;
> +
> + return __list_lru_del(lru, item, nid, memcg);
> +}
> +EXPORT_SYMBOL_GPL(list_lru_del);
> +
> +bool __list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
> + struct mem_cgroup *memcg)
> +{
> struct list_lru_node *nlru = &lru->node[nid];
> struct list_lru_one *l;
>
> spin_lock(&nlru->lock);
> if (!list_empty(item)) {
> - l = list_lru_from_kmem(lru, nid, item, NULL);
> + l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
> list_del_init(item);
> l->nr_items--;
> nlru->nr_items--;
> @@ -158,7 +177,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item)
> spin_unlock(&nlru->lock);
> return false;
> }
> -EXPORT_SYMBOL_GPL(list_lru_del);
> +EXPORT_SYMBOL_GPL(__list_lru_del);
>
> void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
> {
> @@ -175,6 +194,20 @@ void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
> }
> EXPORT_SYMBOL_GPL(list_lru_isolate_move);
>
> +void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid,
> + struct mem_cgroup *memcg)
> +{
> + struct list_lru_one *list =
> + list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
> +
> + if (list_empty(item)) {
> + list_add_tail(item, &list->list);
> + if (!list->nr_items++)
> + set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
> + }
> +}
> +EXPORT_SYMBOL_GPL(list_lru_putback);
> +
> unsigned long list_lru_count_one(struct list_lru *lru,
> int nid, struct mem_cgroup *memcg)
> {
> --
> 2.34.1
On Wed, Oct 18, 2023 at 3:27 PM Yosry Ahmed <yosryahmed@google.com> wrote:
>
> On Tue, Oct 17, 2023 at 4:21 PM Nhat Pham <nphamcs@gmail.com> wrote:
> >
> > The interface of list_lru is based on the assumption that objects are
> > allocated on the correct node/memcg, with this change it is introduced the
> > possibility to explicitly specify numa node and memcgroup when adding and
> > removing objects. This is so that users of list_lru can track node/memcg
> > of the items outside of the list_lru, like in zswap, where the allocations
> > can be made by kswapd for data that's charged to a different cgroup.
> >
> > Signed-off-by: Nhat Pham <nphamcs@gmail.com>
>
> I prefer what Johannes suggested, making list_lru_add() and friends
> take in the memcg and nid, and add list_lru_add_obj() (or similar) and
> friends that assume the object is on the right node and memcg. This is
> clearer and more explicit imo. I am not very familiar with list_lrus
> though, so I'll leave this to folks who actually are.
Yeah the original naming is... most unfortunate, to say the least :)
I create a new function to avoid renaming list_lru_add's usage
everywhere, but if the consensus is that everyone prefers
list_lru_add() to be the one taking memcg + nid (and the original
renamed to list_lru_add_obj()), I can go around fixing all of it :)
Seems like a separate endeavour though.
>
> > ---
> > include/linux/list_lru.h | 38 +++++++++++++++++++++++++++++++++++
> > mm/list_lru.c | 43 +++++++++++++++++++++++++++++++++++-----
> > 2 files changed, 76 insertions(+), 5 deletions(-)
> >
> > diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
> > index b35968ee9fb5..0f5f39cacbbb 100644
> > --- a/include/linux/list_lru.h
> > +++ b/include/linux/list_lru.h
> > @@ -89,6 +89,24 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren
> > */
> > bool list_lru_add(struct list_lru *lru, struct list_head *item);
> >
> > +/**
> > + * __list_lru_add: add an element to a specific sublist.
> > + * @list_lru: the lru pointer
> > + * @item: the item to be added.
> > + * @memcg: the cgroup of the sublist to add the item to.
> > + * @nid: the node id of the sublist to add the item to.
> > + *
> > + * This function is similar to list_lru_add(), but it allows the caller to
> > + * specify the sublist to which the item should be added. This can be useful
> > + * when the list_head node is not necessarily in the same cgroup and NUMA node
> > + * as the data it represents, such as zswap, where the list_head node could be
> > + * from kswapd and the data from a different cgroup altogether.
> > + *
> > + * Return value: true if the list was updated, false otherwise
> > + */
> > +bool __list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
> > + struct mem_cgroup *memcg);
> > +
> > /**
> > * list_lru_del: delete an element to the lru list
> > * @list_lru: the lru pointer
> > @@ -102,6 +120,18 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item);
> > */
> > bool list_lru_del(struct list_lru *lru, struct list_head *item);
> >
> > +/**
> > + * __list_lru_del: delete an element from a specific sublist.
> > + * @list_lru: the lru pointer
> > + * @item: the item to be deleted.
> > + * @memcg: the cgroup of the sublist to delete the item from.
> > + * @nid: the node id of the sublist to delete the item from.
> > + *
> > + * Return value: true if the list was updated, false otherwise.
> > + */
> > +bool __list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
> > + struct mem_cgroup *memcg);
> > +
> > /**
> > * list_lru_count_one: return the number of objects currently held by @lru
> > * @lru: the lru pointer.
> > @@ -136,6 +166,14 @@ static inline unsigned long list_lru_count(struct list_lru *lru)
> > void list_lru_isolate(struct list_lru_one *list, struct list_head *item);
> > void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
> > struct list_head *head);
> > +/*
> > + * list_lru_putback: undo list_lru_isolate.
> > + *
> > + * Since we might have dropped the LRU lock in between, recompute list_lru_one
> > + * from the node's id and memcg.
> > + */
> > +void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid,
> > + struct mem_cgroup *memcg);
> >
> > typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item,
> > struct list_lru_one *list, spinlock_t *lock, void *cb_arg);
> > diff --git a/mm/list_lru.c b/mm/list_lru.c
> > index a05e5bef3b40..63b75163c6ad 100644
> > --- a/mm/list_lru.c
> > +++ b/mm/list_lru.c
> > @@ -119,13 +119,22 @@ list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr,
> > bool list_lru_add(struct list_lru *lru, struct list_head *item)
> > {
> > int nid = page_to_nid(virt_to_page(item));
> > + struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ?
> > + mem_cgroup_from_slab_obj(item) : NULL;
> > +
> > + return __list_lru_add(lru, item, nid, memcg);
> > +}
> > +EXPORT_SYMBOL_GPL(list_lru_add);
> > +
> > +bool __list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
> > + struct mem_cgroup *memcg)
> > +{
> > struct list_lru_node *nlru = &lru->node[nid];
> > - struct mem_cgroup *memcg;
> > struct list_lru_one *l;
> >
> > spin_lock(&nlru->lock);
> > if (list_empty(item)) {
> > - l = list_lru_from_kmem(lru, nid, item, &memcg);
> > + l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
> > list_add_tail(item, &l->list);
> > /* Set shrinker bit if the first element was added */
> > if (!l->nr_items++)
> > @@ -138,17 +147,27 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item)
> > spin_unlock(&nlru->lock);
> > return false;
> > }
> > -EXPORT_SYMBOL_GPL(list_lru_add);
> > +EXPORT_SYMBOL_GPL(__list_lru_add);
> >
> > bool list_lru_del(struct list_lru *lru, struct list_head *item)
> > {
> > int nid = page_to_nid(virt_to_page(item));
> > + struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ?
> > + mem_cgroup_from_slab_obj(item) : NULL;
> > +
> > + return __list_lru_del(lru, item, nid, memcg);
> > +}
> > +EXPORT_SYMBOL_GPL(list_lru_del);
> > +
> > +bool __list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
> > + struct mem_cgroup *memcg)
> > +{
> > struct list_lru_node *nlru = &lru->node[nid];
> > struct list_lru_one *l;
> >
> > spin_lock(&nlru->lock);
> > if (!list_empty(item)) {
> > - l = list_lru_from_kmem(lru, nid, item, NULL);
> > + l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
> > list_del_init(item);
> > l->nr_items--;
> > nlru->nr_items--;
> > @@ -158,7 +177,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item)
> > spin_unlock(&nlru->lock);
> > return false;
> > }
> > -EXPORT_SYMBOL_GPL(list_lru_del);
> > +EXPORT_SYMBOL_GPL(__list_lru_del);
> >
> > void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
> > {
> > @@ -175,6 +194,20 @@ void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
> > }
> > EXPORT_SYMBOL_GPL(list_lru_isolate_move);
> >
> > +void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid,
> > + struct mem_cgroup *memcg)
> > +{
> > + struct list_lru_one *list =
> > + list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
> > +
> > + if (list_empty(item)) {
> > + list_add_tail(item, &list->list);
> > + if (!list->nr_items++)
> > + set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
> > + }
> > +}
> > +EXPORT_SYMBOL_GPL(list_lru_putback);
> > +
> > unsigned long list_lru_count_one(struct list_lru *lru,
> > int nid, struct mem_cgroup *memcg)
> > {
> > --
> > 2.34.1
@@ -89,6 +89,24 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren
*/
bool list_lru_add(struct list_lru *lru, struct list_head *item);
+/**
+ * __list_lru_add: add an element to a specific sublist.
+ * @list_lru: the lru pointer
+ * @item: the item to be added.
+ * @memcg: the cgroup of the sublist to add the item to.
+ * @nid: the node id of the sublist to add the item to.
+ *
+ * This function is similar to list_lru_add(), but it allows the caller to
+ * specify the sublist to which the item should be added. This can be useful
+ * when the list_head node is not necessarily in the same cgroup and NUMA node
+ * as the data it represents, such as zswap, where the list_head node could be
+ * from kswapd and the data from a different cgroup altogether.
+ *
+ * Return value: true if the list was updated, false otherwise
+ */
+bool __list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
+ struct mem_cgroup *memcg);
+
/**
* list_lru_del: delete an element to the lru list
* @list_lru: the lru pointer
@@ -102,6 +120,18 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item);
*/
bool list_lru_del(struct list_lru *lru, struct list_head *item);
+/**
+ * __list_lru_del: delete an element from a specific sublist.
+ * @list_lru: the lru pointer
+ * @item: the item to be deleted.
+ * @memcg: the cgroup of the sublist to delete the item from.
+ * @nid: the node id of the sublist to delete the item from.
+ *
+ * Return value: true if the list was updated, false otherwise.
+ */
+bool __list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
+ struct mem_cgroup *memcg);
+
/**
* list_lru_count_one: return the number of objects currently held by @lru
* @lru: the lru pointer.
@@ -136,6 +166,14 @@ static inline unsigned long list_lru_count(struct list_lru *lru)
void list_lru_isolate(struct list_lru_one *list, struct list_head *item);
void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
struct list_head *head);
+/*
+ * list_lru_putback: undo list_lru_isolate.
+ *
+ * Since we might have dropped the LRU lock in between, recompute list_lru_one
+ * from the node's id and memcg.
+ */
+void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid,
+ struct mem_cgroup *memcg);
typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item,
struct list_lru_one *list, spinlock_t *lock, void *cb_arg);
@@ -119,13 +119,22 @@ list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr,
bool list_lru_add(struct list_lru *lru, struct list_head *item)
{
int nid = page_to_nid(virt_to_page(item));
+ struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ?
+ mem_cgroup_from_slab_obj(item) : NULL;
+
+ return __list_lru_add(lru, item, nid, memcg);
+}
+EXPORT_SYMBOL_GPL(list_lru_add);
+
+bool __list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
+ struct mem_cgroup *memcg)
+{
struct list_lru_node *nlru = &lru->node[nid];
- struct mem_cgroup *memcg;
struct list_lru_one *l;
spin_lock(&nlru->lock);
if (list_empty(item)) {
- l = list_lru_from_kmem(lru, nid, item, &memcg);
+ l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
list_add_tail(item, &l->list);
/* Set shrinker bit if the first element was added */
if (!l->nr_items++)
@@ -138,17 +147,27 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item)
spin_unlock(&nlru->lock);
return false;
}
-EXPORT_SYMBOL_GPL(list_lru_add);
+EXPORT_SYMBOL_GPL(__list_lru_add);
bool list_lru_del(struct list_lru *lru, struct list_head *item)
{
int nid = page_to_nid(virt_to_page(item));
+ struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ?
+ mem_cgroup_from_slab_obj(item) : NULL;
+
+ return __list_lru_del(lru, item, nid, memcg);
+}
+EXPORT_SYMBOL_GPL(list_lru_del);
+
+bool __list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
+ struct mem_cgroup *memcg)
+{
struct list_lru_node *nlru = &lru->node[nid];
struct list_lru_one *l;
spin_lock(&nlru->lock);
if (!list_empty(item)) {
- l = list_lru_from_kmem(lru, nid, item, NULL);
+ l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
list_del_init(item);
l->nr_items--;
nlru->nr_items--;
@@ -158,7 +177,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item)
spin_unlock(&nlru->lock);
return false;
}
-EXPORT_SYMBOL_GPL(list_lru_del);
+EXPORT_SYMBOL_GPL(__list_lru_del);
void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
{
@@ -175,6 +194,20 @@ void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
}
EXPORT_SYMBOL_GPL(list_lru_isolate_move);
+void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid,
+ struct mem_cgroup *memcg)
+{
+ struct list_lru_one *list =
+ list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
+
+ if (list_empty(item)) {
+ list_add_tail(item, &list->list);
+ if (!list->nr_items++)
+ set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
+ }
+}
+EXPORT_SYMBOL_GPL(list_lru_putback);
+
unsigned long list_lru_count_one(struct list_lru *lru,
int nid, struct mem_cgroup *memcg)
{