[3/5] objtool: Avoid O(bloody terrible) behaviour -- an ode to libelf
Commit Message
Due to how gelf_update_sym*() requires an Elf_Data pointer, and how
libelf keeps Elf_Data in a linked list per section,
elf_update_symbol() ends up having to iterate this list on each
update to find the correct Elf_Data for the index'ed symbol.
By allocating one Elf_Data per new symbol, the list grows per new
symbol, giving an effective O(n^2) insertion time. This is obviously
bloody terrible.
Therefore over-allocate the Elf_Data when an extention is needed.
Except it turns out libelf disregards Elf_Scn::sh_size in favour of
the sum of Elf_Data::d_size. IOW it will happily write out all the
unused space and fill it with:
0000000000000000 0 NOTYPE LOCAL DEFAULT UND
entries (aka zeros). Which obviously violates the STB_LOCAL placement
rule, and is a general pain in the backside for not being the desired
behaviour.
Manually fix-up the Elf_Data size to avoid this problem before calling
elf_update().
This significantly improves performance when adding a significant
number of symbols.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
tools/objtool/elf.c | 89 +++++++++++++++++++++++++++++++++---
tools/objtool/include/objtool/elf.h | 2
2 files changed, 84 insertions(+), 7 deletions(-)
Comments
On Fri, Oct 28, 2022 at 09:40:25PM +0200, Peter Zijlstra wrote:
> Due to how gelf_update_sym*() requires an Elf_Data pointer, and how
> libelf keeps Elf_Data in a linked list per section,
> elf_update_symbol() ends up having to iterate this list on each
> update to find the correct Elf_Data for the index'ed symbol.
>
> By allocating one Elf_Data per new symbol, the list grows per new
> symbol, giving an effective O(n^2) insertion time. This is obviously
> bloody terrible.
>
> Therefore over-allocate the Elf_Data when an extention is needed.
> Except it turns out libelf disregards Elf_Scn::sh_size in favour of
> the sum of Elf_Data::d_size. IOW it will happily write out all the
> unused space and fill it with:
>
> 0000000000000000 0 NOTYPE LOCAL DEFAULT UND
>
> entries (aka zeros). Which obviously violates the STB_LOCAL placement
> rule, and is a general pain in the backside for not being the desired
> behaviour.
>
> Manually fix-up the Elf_Data size to avoid this problem before calling
> elf_update().
>
> This significantly improves performance when adding a significant
> number of symbols.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Instead of going through libelf to add each symbol, and
adjusting/shifting/reallocating the d_buf one symbol at a time, it would
probably be a lot easier (and faster) to just manually do all that at
the end, just before writing the file.
See for example what kpatch does:
https://github.com/dynup/kpatch/blob/master/kpatch-build/kpatch-elf.c#L725
On Wed, Nov 02, 2022 at 03:22:22PM -0700, Josh Poimboeuf wrote:
> On Fri, Oct 28, 2022 at 09:40:25PM +0200, Peter Zijlstra wrote:
> > Due to how gelf_update_sym*() requires an Elf_Data pointer, and how
> > libelf keeps Elf_Data in a linked list per section,
> > elf_update_symbol() ends up having to iterate this list on each
> > update to find the correct Elf_Data for the index'ed symbol.
> >
> > By allocating one Elf_Data per new symbol, the list grows per new
> > symbol, giving an effective O(n^2) insertion time. This is obviously
> > bloody terrible.
> >
> > Therefore over-allocate the Elf_Data when an extention is needed.
> > Except it turns out libelf disregards Elf_Scn::sh_size in favour of
> > the sum of Elf_Data::d_size. IOW it will happily write out all the
> > unused space and fill it with:
> >
> > 0000000000000000 0 NOTYPE LOCAL DEFAULT UND
> >
> > entries (aka zeros). Which obviously violates the STB_LOCAL placement
> > rule, and is a general pain in the backside for not being the desired
> > behaviour.
> >
> > Manually fix-up the Elf_Data size to avoid this problem before calling
> > elf_update().
> >
> > This significantly improves performance when adding a significant
> > number of symbols.
> >
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>
> Instead of going through libelf to add each symbol, and
> adjusting/shifting/reallocating the d_buf one symbol at a time, it would
> probably be a lot easier (and faster) to just manually do all that at
> the end, just before writing the file.
Yeah, I've been >< close to doing that at least twice now. The only
consideration is that we then also must rewrite all relocs but I think
we end up doing that anyway.
I'll go do the patch to see what it looks like.
@@ -634,6 +634,12 @@ static int elf_update_symbol(struct elf
/* end-of-list */
if (!symtab_data) {
+ /*
+ * Over-allocate to avoid O(n^2) symbol creation
+ * behaviour. The down side is that libelf doesn't
+ * like this; see elf_truncate_section() for the fixup.
+ */
+ int num = max(1U, sym->idx/3);
void *buf;
if (idx) {
@@ -647,28 +653,34 @@ static int elf_update_symbol(struct elf
if (t)
shndx_data = elf_newdata(t);
- buf = calloc(1, entsize);
+ buf = calloc(num, entsize);
if (!buf) {
WARN("malloc");
return -1;
}
symtab_data->d_buf = buf;
- symtab_data->d_size = entsize;
+ symtab_data->d_size = num * entsize;
symtab_data->d_align = 1;
symtab_data->d_type = ELF_T_SYM;
- symtab->sh.sh_size += entsize;
symtab->changed = true;
+ symtab->truncate = true;
if (t) {
- shndx_data->d_buf = &sym->sec->idx;
- shndx_data->d_size = sizeof(Elf32_Word);
+ buf = calloc(num, sizeof(Elf32_Word));
+ if (!buf) {
+ WARN("malloc");
+ return -1;
+ }
+
+ shndx_data->d_buf = buf;
+ shndx_data->d_size = num * sizeof(Elf32_Word);
shndx_data->d_align = sizeof(Elf32_Word);
shndx_data->d_type = ELF_T_WORD;
- symtab_shndx->sh.sh_size += sizeof(Elf32_Word);
symtab_shndx->changed = true;
+ symtab_shndx->truncate = true;
}
break;
@@ -770,6 +782,14 @@ __elf_create_symbol(struct elf *elf, str
return NULL;
}
+ symtab->sh.sh_size += symtab->sh.sh_entsize;
+ symtab->changed = true;
+
+ if (symtab_shndx) {
+ symtab_shndx->sh.sh_size += sizeof(Elf32_Word);
+ symtab_shndx->changed = true;
+ }
+
return sym;
}
@@ -1286,6 +1306,60 @@ int elf_write_reloc(struct elf *elf, str
return 0;
}
+/*
+ * When Elf_Scn::sh_size is smaller than the combined Elf_Data::d_size
+ * do you:
+ *
+ * A) adhere to the section header and truncate the data, or
+ * B) ignore the section header and write out all the data you've got?
+ *
+ * Yes, libelf sucks and we need to manually truncate if we over-allocate data.
+ */
+static int elf_truncate_section(struct elf *elf, struct section *sec)
+{
+ u64 size = sec->sh.sh_size;
+ bool truncated = false;
+ Elf_Data *data = NULL;
+ Elf_Scn *s;
+
+ s = elf_getscn(elf->elf, sec->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return -1;
+ }
+
+ for (;;) {
+ /* get next data descriptor for the relevant section */
+ data = elf_getdata(s, data);
+
+ if (!data) {
+ if (size) {
+ WARN("end of section data but non-zero size left\n");
+ return -1;
+ }
+ return 0;
+ }
+
+ if (truncated) {
+ /* when we remove symbols */
+ WARN("truncated; but more data\n");
+ return -1;
+ }
+
+ if (!data->d_size) {
+ WARN("zero size data");
+ return -1;
+ }
+
+ if (data->d_size > size) {
+ truncated = true;
+ data->d_size = size;
+ }
+
+ size -= data->d_size;
+ }
+}
+
int elf_write(struct elf *elf)
{
struct section *sec;
@@ -1296,6 +1370,9 @@ int elf_write(struct elf *elf)
/* Update changed relocation sections and section headers: */
list_for_each_entry(sec, &elf->sections, list) {
+ if (sec->truncate)
+ elf_truncate_section(elf, sec);
+
if (sec->changed) {
s = elf_getscn(elf->elf, sec->idx);
if (!s) {
@@ -38,7 +38,7 @@ struct section {
Elf_Data *data;
char *name;
int idx;
- bool changed, text, rodata, noinstr, init;
+ bool changed, text, rodata, noinstr, init, truncate;
};
struct symbol {