[RFC,v2,bpf-next,1/9] mm: Store build id in inode object
Commit Message
Storing build id in file's inode object for elf executable with build
id defined. The build id is stored when file is mmaped.
This is enabled with new config option CONFIG_INODE_BUILD_ID.
The build id is valid only when the file with given inode is mmap-ed.
We store either the build id itself or the error we hit during
the retrieval.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
fs/inode.c | 12 ++++++++++++
include/linux/buildid.h | 15 +++++++++++++++
include/linux/fs.h | 7 +++++++
lib/buildid.c | 40 ++++++++++++++++++++++++++++++++++++++++
mm/Kconfig | 8 ++++++++
mm/mmap.c | 23 +++++++++++++++++++++++
6 files changed, 105 insertions(+)
Comments
On Tue, 28 Feb 2023 10:31:58 +0100 Jiri Olsa <jolsa@kernel.org> wrote:
> Storing build id in file's inode object for elf executable with build
> id defined. The build id is stored when file is mmaped.
>
> This is enabled with new config option CONFIG_INODE_BUILD_ID.
>
> The build id is valid only when the file with given inode is mmap-ed.
>
> We store either the build id itself or the error we hit during
> the retrieval.
>
> ...
>
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -699,6 +700,12 @@ struct inode {
> struct fsverity_info *i_verity_info;
> #endif
>
> +#ifdef CONFIG_INODE_BUILD_ID
> + /* Initialized and valid for executable elf files when mmap-ed. */
> + struct build_id *i_build_id;
> + spinlock_t i_build_id_lock;
> +#endif
> +
Remember we can have squillions of inodes in memory. So that's one
costly spinlock!
AFAICT this lock could be removed if mmap_region() were to use an
atomic exchange on inode->i_build_id?
If not, can we use an existing lock? i_lock would be appropriate
(don't forget to update its comment).
Also, the code in mmap_region() runs build_id_free() inside the locked
region, which seems unnecessary.
On Tue, Feb 28, 2023 at 11:13:10AM -0800, Andrew Morton wrote:
> On Tue, 28 Feb 2023 10:31:58 +0100 Jiri Olsa <jolsa@kernel.org> wrote:
>
> > Storing build id in file's inode object for elf executable with build
> > id defined. The build id is stored when file is mmaped.
> >
> > This is enabled with new config option CONFIG_INODE_BUILD_ID.
> >
> > The build id is valid only when the file with given inode is mmap-ed.
> >
> > We store either the build id itself or the error we hit during
> > the retrieval.
> >
> > ...
> >
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -699,6 +700,12 @@ struct inode {
> > struct fsverity_info *i_verity_info;
> > #endif
> >
> > +#ifdef CONFIG_INODE_BUILD_ID
> > + /* Initialized and valid for executable elf files when mmap-ed. */
> > + struct build_id *i_build_id;
> > + spinlock_t i_build_id_lock;
> > +#endif
> > +
>
> Remember we can have squillions of inodes in memory. So that's one
> costly spinlock!
>
> AFAICT this lock could be removed if mmap_region() were to use an
> atomic exchange on inode->i_build_id?
right, that should work I'll check
>
> If not, can we use an existing lock? i_lock would be appropriate
> (don't forget to update its comment).
ok
>
> Also, the code in mmap_region() runs build_id_free() inside the locked
> region, which seems unnecessary.
>
ok, if the atomic exchange is doable, it'll take care of this
thanks,
jirka
@@ -22,6 +22,7 @@
#include <linux/list_lru.h>
#include <linux/iversion.h>
#include <trace/events/writeback.h>
+#include <linux/buildid.h>
#include "internal.h"
/*
@@ -228,6 +229,10 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
#endif
inode->i_flctx = NULL;
+#ifdef CONFIG_INODE_BUILD_ID
+ inode->i_build_id = NULL;
+ spin_lock_init(&inode->i_build_id_lock);
+#endif
if (unlikely(security_inode_alloc(inode)))
return -ENOMEM;
this_cpu_inc(nr_inodes);
@@ -296,6 +301,11 @@ void __destroy_inode(struct inode *inode)
if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl))
posix_acl_release(inode->i_default_acl);
#endif
+#ifdef CONFIG_INODE_BUILD_ID
+ build_id_free(inode->i_build_id);
+ inode->i_build_id = NULL;
+#endif
+
this_cpu_dec(nr_inodes);
}
EXPORT_SYMBOL(__destroy_inode);
@@ -2242,6 +2252,8 @@ void __init inode_init(void)
SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
+ build_id_init();
+
/* Hash may have been set up in inode_init_early */
if (!hashdist)
return;
@@ -3,9 +3,15 @@
#define _LINUX_BUILDID_H
#include <linux/mm_types.h>
+#include <linux/slab.h>
#define BUILD_ID_SIZE_MAX 20
+struct build_id {
+ u32 sz;
+ char data[BUILD_ID_SIZE_MAX];
+};
+
int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
__u32 *size);
int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size);
@@ -17,4 +23,13 @@ void init_vmlinux_build_id(void);
static inline void init_vmlinux_build_id(void) { }
#endif
+#ifdef CONFIG_INODE_BUILD_ID
+void __init build_id_init(void);
+void build_id_free(struct build_id *bid);
+void vma_read_build_id(struct vm_area_struct *vma, struct build_id **bidp);
+#else
+static inline void __init build_id_init(void) { }
+static inline void build_id_free(struct build_id *bid) { }
+#endif /* CONFIG_INODE_BUILD_ID */
+
#endif
@@ -43,6 +43,7 @@
#include <linux/cred.h>
#include <linux/mnt_idmapping.h>
#include <linux/slab.h>
+#include <linux/buildid.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -699,6 +700,12 @@ struct inode {
struct fsverity_info *i_verity_info;
#endif
+#ifdef CONFIG_INODE_BUILD_ID
+ /* Initialized and valid for executable elf files when mmap-ed. */
+ struct build_id *i_build_id;
+ spinlock_t i_build_id_lock;
+#endif
+
void *i_private; /* fs or device private pointer */
} __randomize_layout;
@@ -5,6 +5,7 @@
#include <linux/elf.h>
#include <linux/kernel.h>
#include <linux/pagemap.h>
+#include <linux/slab.h>
#define BUILD_ID 3
@@ -189,3 +190,42 @@ void __init init_vmlinux_build_id(void)
build_id_parse_buf(&__start_notes, vmlinux_build_id, size);
}
#endif
+
+#ifdef CONFIG_INODE_BUILD_ID
+
+/* SLAB cache for build_id structures */
+static struct kmem_cache *build_id_cachep;
+
+void vma_read_build_id(struct vm_area_struct *vma, struct build_id **bidp)
+{
+ struct build_id *bid = ERR_PTR(-ENOMEM);
+ int err;
+
+ if (!build_id_cachep)
+ goto out;
+ bid = kmem_cache_alloc(build_id_cachep, GFP_KERNEL);
+ if (!bid)
+ goto out;
+ err = build_id_parse(vma, bid->data, &bid->sz);
+ if (err) {
+ build_id_free(bid);
+ bid = ERR_PTR(err);
+ }
+out:
+ *bidp = bid;
+}
+
+void build_id_free(struct build_id *bid)
+{
+ if (IS_ERR_OR_NULL(bid))
+ return;
+ kmem_cache_free(build_id_cachep, bid);
+}
+
+void __init build_id_init(void)
+{
+ build_id_cachep = kmem_cache_create("build_id", sizeof(struct build_id), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, NULL);
+}
+
+#endif /* CONFIG_INODE_BUILD_ID */
@@ -1183,6 +1183,14 @@ config LRU_GEN_STATS
This option has a per-memcg and per-node memory overhead.
# }
+config INODE_BUILD_ID
+ bool "Store build id in inode object"
+ default n
+ help
+ Store build id in iinode object for elf executable with build id
+ defined. The build id is stored when file for the given inode is
+ mmap-ed.
+
source "mm/damon/Kconfig"
endmenu
@@ -2530,6 +2530,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
pgoff_t vm_pgoff;
int error;
MA_STATE(mas, &mm->mm_mt, addr, end - 1);
+ struct build_id *bid = NULL;
/* Check against address space limit. */
if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
@@ -2626,6 +2627,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
if (error)
goto unmap_and_free_vma;
+#ifdef CONFIG_INODE_BUILD_ID
+ if (vma->vm_flags & VM_EXEC)
+ vma_read_build_id(vma, &bid);
+#endif
/*
* Expansion is handled above, merging is handled below.
* Drivers should not alter the address of the VMA.
@@ -2690,6 +2695,23 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
goto free_vma;
}
+#ifdef CONFIG_INODE_BUILD_ID
+ if (bid) {
+ struct inode *inode = file_inode(file);
+
+ spin_lock(&inode->i_build_id_lock);
+ /*
+ * If there's already valid build_id in inode, release it
+ * and use the new one.
+ */
+ if (inode->i_build_id)
+ build_id_free(inode->i_build_id);
+
+ inode->i_build_id = bid;
+ spin_unlock(&inode->i_build_id_lock);
+ }
+#endif
+
if (vma->vm_file)
i_mmap_lock_write(vma->vm_file->f_mapping);
@@ -2759,6 +2781,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
mapping_unmap_writable(file->f_mapping);
free_vma:
vm_area_free(vma);
+ build_id_free(bid);
unacct_error:
if (charged)
vm_unacct_memory(charged);