[RFC] mm: introduce object accounting via backtrace on slub

Message ID 1667550838-10639-1-git-send-email-zhaoyang.huang@unisoc.com
State New
Headers
Series [RFC] mm: introduce object accounting via backtrace on slub |

Commit Message

zhaoyang.huang Nov. 4, 2022, 8:33 a.m. UTC
  From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>

Introduce a set of methods to account object via backtrace.

Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
---
 include/linux/slub_def.h |   7 +++
 mm/slub.c                | 155 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 161 insertions(+), 1 deletion(-)
  

Comments

Vlastimil Babka Nov. 4, 2022, 4:18 p.m. UTC | #1
On 11/4/22 09:33, zhaoyang.huang wrote:
> From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> 
> Introduce a set of methods to account object via backtrace.

Can you describe what exactly it does, so we don't need to guess from 
the source code?

> Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> ---
>   include/linux/slub_def.h |   7 +++
>   mm/slub.c                | 155 ++++++++++++++++++++++++++++++++++++++++++++++-
>   2 files changed, 161 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
> index f9c68a9..c90e5fb 100644
> --- a/include/linux/slub_def.h
> +++ b/include/linux/slub_def.h
> @@ -139,9 +139,16 @@ struct kmem_cache {
>   	unsigned int useroffset;	/* Usercopy region offset */
>   	unsigned int usersize;		/* Usercopy region size */
>   
> +	struct rb_root user_hash_root;
> +
>   	struct kmem_cache_node *node[MAX_NUMNODES];
>   };
>   
> +struct hash_object {
> +	int count;
> +	depot_stack_handle_t trace_hash;
> +	struct rb_node rb_node;
> +};
>   #ifdef CONFIG_SYSFS
>   #define SLAB_SUPPORTS_SYSFS
>   void sysfs_slab_unlink(struct kmem_cache *);
> diff --git a/mm/slub.c b/mm/slub.c
> index 157527d..3ec02ff 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -337,6 +337,7 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
>    */
>   static struct workqueue_struct *flushwq;
>   
> +static struct kmem_cache *user_hash_cache;
>   /********************************************************************
>    * 			Core slab cache functions
>    *******************************************************************/
> @@ -759,12 +760,152 @@ static void set_track_update(struct kmem_cache *s, void *object,
>   	p->when = jiffies;
>   }
>   
> +static void store_user_hash(struct kmem_cache *s, void *object,
> +		depot_stack_handle_t trace_hash, enum track_item alloc)
> +{
> +	struct rb_node *rb_parent;
> +	unsigned int trace_hash_parent;
> +	struct rb_node **link = &s->user_hash_root.rb_node;
> +	struct hash_object *hash_parent = NULL;
> +	struct hash_object *hash_object = NULL;
> +	unsigned long flags;
> +	int count;
> +	int nid = slab_nid(virt_to_slab(object));
> +	struct kmem_cache_node *n = get_node(s, nid);
> +
> +	spin_lock_irqsave(&n->list_lock, flags);
> +	while (*link) {
> +		rb_parent = *link;
> +		hash_parent = rb_entry(rb_parent, struct hash_object, rb_node);
> +		trace_hash_parent = hash_parent->trace_hash;
> +		if (trace_hash < trace_hash_parent)
> +			link = &hash_parent->rb_node.rb_left;
> +		else if (trace_hash_parent < trace_hash)
> +			link = &hash_parent->rb_node.rb_right;
> +		else {
> +			hash_object = rb_entry(*link, struct hash_object, rb_node);
> +			count = (alloc == TRACK_ALLOC) ? 1 : -1;
> +			hash_object->count += count;
> +			if (!RB_EMPTY_ROOT(&s->user_hash_root)
> +				&& !hash_object->count) {
> +				rb_erase(&hash_object->rb_node, &s->user_hash_root);
> +				kmem_cache_free(user_hash_cache, hash_object);
> +			}
> +			spin_unlock_irqrestore(&n->list_lock, flags);
> +			return;
> +		}
> +	}
> +	spin_unlock_irqrestore(&n->list_lock, flags);
> +	/*
> +	 * hash_object is the 1st node represent this trace_hash
> +	 * insert it to user_hash_root
> +	 */
> +	hash_object = kmem_cache_alloc(user_hash_cache, GFP_KERNEL);
> +	if (!hash_object)
> +		return;
> +	hash_object->trace_hash = trace_hash;
> +	/* add the node to rb tree*/
> +	spin_lock_irqsave(&n->list_lock, flags);
> +	rb_link_node(&hash_object->rb_node, rb_parent, link);
> +	rb_insert_color(&hash_object->rb_node, &s->user_hash_root);
> +	spin_unlock_irqrestore(&n->list_lock, flags);
> +	return;
> +}
> +
> +static ssize_t backtrace_acc_print(struct file *file, char __user *buf, size_t count, loff_t *ppos)
> +{
> +	struct hash_object *object;
> +	struct rb_node *rb;
> +	unsigned long *entries;
> +	unsigned int nr_entries;
> +	char *kbuf;
> +	int i, ret = 0;
> +	unsigned long flags;
> +	struct kmem_cache_node *n;
> +	enum track_item alloc;
> +	int node;
> +	struct slab *slab;
> +	struct kmem_cache *s = file_inode(file)->i_private;
> +
> +	count = min_t(size_t, count, PAGE_SIZE);
> +	kbuf = kmalloc(count, GFP_KERNEL);
> +	if (!kbuf)
> +		return -ENOMEM;
> +
> +	/* iterate from node-0 */
> +	node = *ppos;
> +	if (node >= nr_node_ids)
> +		return 0;
> +
> +	if ((n = get_node(s, node))) {
> +		/* skip empty node */
> +		while (!atomic_long_read(&n->nr_slabs)) {
> +			if (++node >= nr_node_ids)
> +				return 0;
> +			n = get_node(s, node);
> +		}
> +
> +		rb = file->private_data ? (struct rb_node *)file->private_data : rb_first(&s->user_hash_root);
> +		/* current node finish, reset rb to next node's root */
> +		if(!rb) {
> +			ret += snprintf(kbuf + ret, count - ret, "\n");
> +			copy_to_user(buf, kbuf, ret);
> +			file->private_data = 0;
> +			*ppos = ++node;
> +			return ret;
> +		}
> +
> +		spin_lock_irqsave(&n->list_lock, flags);
> +		object = rb_entry(rb, struct hash_object, rb_node);
> +		if (object) {
> +			while (!object->trace_hash || object->count <= 0) {
> +				rb = rb_next(rb);
> +				object = rb ? rb_entry(rb, struct hash_object, rb_node) : NULL;
> +				if (!object) {
> +					spin_unlock_irqrestore(&n->list_lock, flags);
> +					ret += snprintf(kbuf + ret, count - ret, "\n");
> +					copy_to_user(buf, kbuf, ret);
> +					file->private_data = 0;
> +					*ppos = ++node;
> +					kfree(kbuf);
> +					return ret;
> +				}
> +			}
> +			nr_entries = stack_depot_fetch(object->trace_hash, &entries);
> +			ret += snprintf(kbuf + ret, count - ret, "count %d\n", object->count);
> +			if (ret >= count)
> +				goto err;
> +			for (i = 0; i < nr_entries; i++) {
> +				void *ptr = (void *)entries[i];
> +				ret += snprintf(kbuf + ret, count - ret, "    [<%p>] %pS\n", ptr, ptr);
> +				if (ret >= count)
> +					goto err;
> +			}
> +		}
> +		file->private_data = (void *)rb_next(rb);
> +		spin_unlock_irqrestore(&n->list_lock, flags);
> +		*ppos = ++node;
> +		if (copy_to_user(buf, kbuf, ret))
> +			ret = -EFAULT;
> +
> +		kfree(kbuf);
> +		return ret;
> +err:
> +		spin_unlock_irqrestore(&n->list_lock, flags);
> +		kfree(kbuf);
> +		return -ENOMEM;
> +	}
> +	return 0;
> +}
> +
>   static __always_inline void set_track(struct kmem_cache *s, void *object,
>   				      enum track_item alloc, unsigned long addr)
>   {
>   	depot_stack_handle_t handle = set_track_prepare();
>   
>   	set_track_update(s, object, alloc, addr, handle);
> +	if (strcmp(s->name, "user_hash_cache") != 0)
> +		store_user_hash(s, object, handle, alloc);
>   }
>   
>   static void init_tracking(struct kmem_cache *s, void *object)
> @@ -4918,8 +5059,10 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
>   		return err;
>   	}
>   
> -	if (s->flags & SLAB_STORE_USER)
> +	if (s->flags & SLAB_STORE_USER) {
> +		s->user_hash_root = RB_ROOT;
>   		debugfs_slab_add(s);
> +	}
>   
>   	return 0;
>   }
> @@ -6236,6 +6379,10 @@ static int slab_debug_trace_release(struct inode *inode, struct file *file)
>   	.release = slab_debug_trace_release,
>   };
>   
> +static const struct file_operations backtrace_accounting_operations = {
> +	.read		= backtrace_acc_print,
> +};
> +
>   static void debugfs_slab_add(struct kmem_cache *s)
>   {
>   	struct dentry *slab_cache_dir;
> @@ -6250,6 +6397,9 @@ static void debugfs_slab_add(struct kmem_cache *s)
>   
>   	debugfs_create_file("free_traces", 0400,
>   		slab_cache_dir, s, &slab_debugfs_fops);
> +
> +	debugfs_create_file("backtrace_accounting", 0400, NULL, NULL,
> +			    &backtrace_accounting_operations);
>   }
>   
>   void debugfs_slab_release(struct kmem_cache *s)
> @@ -6261,6 +6411,9 @@ static int __init slab_debugfs_init(void)
>   {
>   	struct kmem_cache *s;
>   
> +	user_hash_cache = kmem_cache_create("user_hash_cache",
> +			sizeof(struct hash_object),
> +			0, 0, NULL);
>   	slab_debugfs_root = debugfs_create_dir("slab", NULL);
>   
>   	list_for_each_entry(s, &slab_caches, list)
  

Patch

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index f9c68a9..c90e5fb 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -139,9 +139,16 @@  struct kmem_cache {
 	unsigned int useroffset;	/* Usercopy region offset */
 	unsigned int usersize;		/* Usercopy region size */
 
+	struct rb_root user_hash_root;
+
 	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
+struct hash_object {
+	int count;
+	depot_stack_handle_t trace_hash;
+	struct rb_node rb_node;
+};
 #ifdef CONFIG_SYSFS
 #define SLAB_SUPPORTS_SYSFS
 void sysfs_slab_unlink(struct kmem_cache *);
diff --git a/mm/slub.c b/mm/slub.c
index 157527d..3ec02ff 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -337,6 +337,7 @@  static inline void stat(const struct kmem_cache *s, enum stat_item si)
  */
 static struct workqueue_struct *flushwq;
 
+static struct kmem_cache *user_hash_cache;
 /********************************************************************
  * 			Core slab cache functions
  *******************************************************************/
@@ -759,12 +760,152 @@  static void set_track_update(struct kmem_cache *s, void *object,
 	p->when = jiffies;
 }
 
+static void store_user_hash(struct kmem_cache *s, void *object,
+		depot_stack_handle_t trace_hash, enum track_item alloc)
+{
+	struct rb_node *rb_parent;
+	unsigned int trace_hash_parent;
+	struct rb_node **link = &s->user_hash_root.rb_node;
+	struct hash_object *hash_parent = NULL;
+	struct hash_object *hash_object = NULL;
+	unsigned long flags;
+	int count;
+	int nid = slab_nid(virt_to_slab(object));
+	struct kmem_cache_node *n = get_node(s, nid);
+
+	spin_lock_irqsave(&n->list_lock, flags);
+	while (*link) {
+		rb_parent = *link;
+		hash_parent = rb_entry(rb_parent, struct hash_object, rb_node);
+		trace_hash_parent = hash_parent->trace_hash;
+		if (trace_hash < trace_hash_parent)
+			link = &hash_parent->rb_node.rb_left;
+		else if (trace_hash_parent < trace_hash)
+			link = &hash_parent->rb_node.rb_right;
+		else {
+			hash_object = rb_entry(*link, struct hash_object, rb_node);
+			count = (alloc == TRACK_ALLOC) ? 1 : -1;
+			hash_object->count += count;
+			if (!RB_EMPTY_ROOT(&s->user_hash_root)
+				&& !hash_object->count) {
+				rb_erase(&hash_object->rb_node, &s->user_hash_root);
+				kmem_cache_free(user_hash_cache, hash_object);
+			}
+			spin_unlock_irqrestore(&n->list_lock, flags);
+			return;
+		}
+	}
+	spin_unlock_irqrestore(&n->list_lock, flags);
+	/*
+	 * hash_object is the 1st node represent this trace_hash
+	 * insert it to user_hash_root
+	 */
+	hash_object = kmem_cache_alloc(user_hash_cache, GFP_KERNEL);
+	if (!hash_object)
+		return;
+	hash_object->trace_hash = trace_hash;
+	/* add the node to rb tree*/
+	spin_lock_irqsave(&n->list_lock, flags);
+	rb_link_node(&hash_object->rb_node, rb_parent, link);
+	rb_insert_color(&hash_object->rb_node, &s->user_hash_root);
+	spin_unlock_irqrestore(&n->list_lock, flags);
+	return;
+}
+
+static ssize_t backtrace_acc_print(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	struct hash_object *object;
+	struct rb_node *rb;
+	unsigned long *entries;
+	unsigned int nr_entries;
+	char *kbuf;
+	int i, ret = 0;
+	unsigned long flags;
+	struct kmem_cache_node *n;
+	enum track_item alloc;
+	int node;
+	struct slab *slab;
+	struct kmem_cache *s = file_inode(file)->i_private;
+
+	count = min_t(size_t, count, PAGE_SIZE);
+	kbuf = kmalloc(count, GFP_KERNEL);
+	if (!kbuf)
+		return -ENOMEM;
+
+	/* iterate from node-0 */
+	node = *ppos;
+	if (node >= nr_node_ids)
+		return 0;
+
+	if ((n = get_node(s, node))) {
+		/* skip empty node */
+		while (!atomic_long_read(&n->nr_slabs)) {
+			if (++node >= nr_node_ids)
+				return 0;
+			n = get_node(s, node);
+		}
+
+		rb = file->private_data ? (struct rb_node *)file->private_data : rb_first(&s->user_hash_root);
+		/* current node finish, reset rb to next node's root */
+		if(!rb) {
+			ret += snprintf(kbuf + ret, count - ret, "\n");
+			copy_to_user(buf, kbuf, ret);
+			file->private_data = 0;
+			*ppos = ++node;
+			return ret;
+		}
+
+		spin_lock_irqsave(&n->list_lock, flags);
+		object = rb_entry(rb, struct hash_object, rb_node);
+		if (object) {
+			while (!object->trace_hash || object->count <= 0) {
+				rb = rb_next(rb);
+				object = rb ? rb_entry(rb, struct hash_object, rb_node) : NULL;
+				if (!object) {
+					spin_unlock_irqrestore(&n->list_lock, flags);
+					ret += snprintf(kbuf + ret, count - ret, "\n");
+					copy_to_user(buf, kbuf, ret);
+					file->private_data = 0;
+					*ppos = ++node;
+					kfree(kbuf);
+					return ret;
+				}
+			}
+			nr_entries = stack_depot_fetch(object->trace_hash, &entries);
+			ret += snprintf(kbuf + ret, count - ret, "count %d\n", object->count);
+			if (ret >= count)
+				goto err;
+			for (i = 0; i < nr_entries; i++) {
+				void *ptr = (void *)entries[i];
+				ret += snprintf(kbuf + ret, count - ret, "    [<%p>] %pS\n", ptr, ptr);
+				if (ret >= count)
+					goto err;
+			}
+		}
+		file->private_data = (void *)rb_next(rb);
+		spin_unlock_irqrestore(&n->list_lock, flags);
+		*ppos = ++node;
+		if (copy_to_user(buf, kbuf, ret))
+			ret = -EFAULT;
+
+		kfree(kbuf);
+		return ret;
+err:
+		spin_unlock_irqrestore(&n->list_lock, flags);
+		kfree(kbuf);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
 static __always_inline void set_track(struct kmem_cache *s, void *object,
 				      enum track_item alloc, unsigned long addr)
 {
 	depot_stack_handle_t handle = set_track_prepare();
 
 	set_track_update(s, object, alloc, addr, handle);
+	if (strcmp(s->name, "user_hash_cache") != 0)
+		store_user_hash(s, object, handle, alloc);
 }
 
 static void init_tracking(struct kmem_cache *s, void *object)
@@ -4918,8 +5059,10 @@  int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
 		return err;
 	}
 
-	if (s->flags & SLAB_STORE_USER)
+	if (s->flags & SLAB_STORE_USER) {
+		s->user_hash_root = RB_ROOT;
 		debugfs_slab_add(s);
+	}
 
 	return 0;
 }
@@ -6236,6 +6379,10 @@  static int slab_debug_trace_release(struct inode *inode, struct file *file)
 	.release = slab_debug_trace_release,
 };
 
+static const struct file_operations backtrace_accounting_operations = {
+	.read		= backtrace_acc_print,
+};
+
 static void debugfs_slab_add(struct kmem_cache *s)
 {
 	struct dentry *slab_cache_dir;
@@ -6250,6 +6397,9 @@  static void debugfs_slab_add(struct kmem_cache *s)
 
 	debugfs_create_file("free_traces", 0400,
 		slab_cache_dir, s, &slab_debugfs_fops);
+
+	debugfs_create_file("backtrace_accounting", 0400, NULL, NULL,
+			    &backtrace_accounting_operations);
 }
 
 void debugfs_slab_release(struct kmem_cache *s)
@@ -6261,6 +6411,9 @@  static int __init slab_debugfs_init(void)
 {
 	struct kmem_cache *s;
 
+	user_hash_cache = kmem_cache_create("user_hash_cache",
+			sizeof(struct hash_object),
+			0, 0, NULL);
 	slab_debugfs_root = debugfs_create_dir("slab", NULL);
 
 	list_for_each_entry(s, &slab_caches, list)