[v7,2/6] memcontrol: add a new function to traverse online-only memcg hierarchy

Message ID 20231127234600.2971029-3-nphamcs@gmail.com
State New
Headers
Series workload-specific and memory pressure-driven zswap writeback |

Commit Message

Nhat Pham Nov. 27, 2023, 11:45 p.m. UTC
  The new zswap writeback scheme requires an online-only memcg hierarchy
traversal. Add this functionality via the new mem_cgroup_iter_online()
function - the old mem_cgroup_iter() is a special case of this new
function.

Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
 include/linux/memcontrol.h | 13 +++++++++++++
 mm/memcontrol.c            | 29 +++++++++++++++++++++++++----
 2 files changed, 38 insertions(+), 4 deletions(-)
  

Comments

Johannes Weiner Nov. 29, 2023, 3:04 p.m. UTC | #1
On Mon, Nov 27, 2023 at 03:45:56PM -0800, Nhat Pham wrote:
> The new zswap writeback scheme requires an online-only memcg hierarchy
> traversal. Add this functionality via the new mem_cgroup_iter_online()
> function - the old mem_cgroup_iter() is a special case of this new
> function.
> 
> Suggested-by: Andrew Morton <akpm@linux-foundation.org>
> Signed-off-by: Nhat Pham <nphamcs@gmail.com>

Acked-by: Johannes Weiner <hannes@cmpxchg.org>
  
Johannes Weiner Nov. 29, 2023, 5 p.m. UTC | #2
On Wed, Nov 29, 2023 at 10:04:05AM -0500, Johannes Weiner wrote:
> On Mon, Nov 27, 2023 at 03:45:56PM -0800, Nhat Pham wrote:
> > The new zswap writeback scheme requires an online-only memcg hierarchy
> > traversal. Add this functionality via the new mem_cgroup_iter_online()
> > function - the old mem_cgroup_iter() is a special case of this new
> > function.
> > 
> > Suggested-by: Andrew Morton <akpm@linux-foundation.org>
> > Signed-off-by: Nhat Pham <nphamcs@gmail.com>
> 
> Acked-by: Johannes Weiner <hannes@cmpxchg.org>

I saw Michal's reply on the other thread only after I sent this.

I agree with him it would be better to just check mem_cgroup_online()
in the shinker callsite and leave mem_cgroup_iter() as-is. If it's
offline, just continue to the next mem_cgroup_iter() invocation.
  

Patch

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 7bdcf3020d7a..5bfe41840e80 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -833,6 +833,10 @@  static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
 				   struct mem_cgroup *,
 				   struct mem_cgroup_reclaim_cookie *);
+struct mem_cgroup *mem_cgroup_iter_online(struct mem_cgroup *root,
+				   struct mem_cgroup *prev,
+				   struct mem_cgroup_reclaim_cookie *reclaim,
+				   bool online);
 void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
 void mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
 			   int (*)(struct task_struct *, void *), void *arg);
@@ -1386,6 +1390,15 @@  mem_cgroup_iter(struct mem_cgroup *root,
 	return NULL;
 }
 
+static inline struct mem_cgroup *
+mem_cgroup_iter_online(struct mem_cgroup *root,
+		struct mem_cgroup *prev,
+		struct mem_cgroup_reclaim_cookie *reclaim,
+		bool online)
+{
+	return NULL;
+}
+
 static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
 					 struct mem_cgroup *prev)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 470821d1ba1a..b1fb96233fa1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1111,10 +1111,11 @@  struct mem_cgroup *get_mem_cgroup_from_current(void)
 }
 
 /**
- * mem_cgroup_iter - iterate over memory cgroup hierarchy
+ * mem_cgroup_iter_online - iterate over memory cgroup hierarchy
  * @root: hierarchy root
  * @prev: previously returned memcg, NULL on first invocation
  * @reclaim: cookie for shared reclaim walks, NULL for full walks
+ * @online: whether to skip offline memcgs
  *
  * Returns references to children of the hierarchy below @root, or
  * @root itself, or %NULL after a full round-trip.
@@ -1123,13 +1124,16 @@  struct mem_cgroup *get_mem_cgroup_from_current(void)
  * invocations for reference counting, or use mem_cgroup_iter_break()
  * to cancel a hierarchy walk before the round-trip is complete.
  *
+ * Caller can skip offline memcgs by passing true for @online.
+ *
  * Reclaimers can specify a node in @reclaim to divide up the memcgs
  * in the hierarchy among all concurrent reclaimers operating on the
  * same node.
  */
-struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
+struct mem_cgroup *mem_cgroup_iter_online(struct mem_cgroup *root,
 				   struct mem_cgroup *prev,
-				   struct mem_cgroup_reclaim_cookie *reclaim)
+				   struct mem_cgroup_reclaim_cookie *reclaim,
+				   bool online)
 {
 	struct mem_cgroup_reclaim_iter *iter;
 	struct cgroup_subsys_state *css = NULL;
@@ -1199,7 +1203,8 @@  struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
 		 * is provided by the caller, so we know it's alive
 		 * and kicking, and don't take an extra reference.
 		 */
-		if (css == &root->css || css_tryget(css)) {
+		if (css == &root->css || (!online && css_tryget(css)) ||
+				css_tryget_online(css)) {
 			memcg = mem_cgroup_from_css(css);
 			break;
 		}
@@ -1228,6 +1233,22 @@  struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
 	return memcg;
 }
 
+/**
+ * mem_cgroup_iter - iterate over memory cgroup hierarchy
+ * @root: hierarchy root
+ * @prev: previously returned memcg, NULL on first invocation
+ * @reclaim: cookie for shared reclaim walks, NULL for full walks
+ *
+ * Perform an iteration on the memory cgroup hierarchy without skipping
+ * offline memcgs.
+ */
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
+				   struct mem_cgroup *prev,
+				   struct mem_cgroup_reclaim_cookie *reclaim)
+{
+	return mem_cgroup_iter_online(root, prev, reclaim, false);
+}
+
 /**
  * mem_cgroup_iter_break - abort a hierarchy walk prematurely
  * @root: hierarchy root