Thanks for the review. Here is a new version.
---
bfd/coffcode.h | 448 +++++++++++++++++++++++-------
-----------------
bfd/libcoff-in.h | 11 ++
bfd/peicode.h | 21 +++
3 files changed, 256 insertions(+), 224 deletions(-)
diff --git a/bfd/coffcode.h b/bfd/coffcode.h
index 594f3e0457..ab11e957e9 100644
+ insert_flags (pe_data (abfd)->flags_hash, symname, isym.n_scnum,
+ sec_flags, (esym - esymstart) / bfd_coff_symesz (abfd));
+ if (htab_elements (pe_data (abfd)->flags_hash) == 0)
+ fill_flags_hash (abfd, hdr);
+
+ struct flags_entry *found
+ = find_flags (pe_data (abfd)->flags_hash, name, section->target_index);
+ found = find_flags (pe_data (abfd)->flags_hash,
target_name_underscore,
+ section->target_index);
+ free (target_name_underscore);
+#else
+ found = find_flags (pe_data (abfd)->flags_hash, target_name,
+ section->target_index);
+#endif
+ }
+ /* Is this the name we're looking for ? */
+ if (found != NULL)
+ {
+ insert_coff_comdat_info (abfd, section, found->name, found->symbol);
+ return sec_flags | found->sec_flags;
+ }
}
-
- breakloop:
- return sec_flags;
+ return sec_flags | SEC_LINK_ONCE;
}
diff --git a/bfd/libcoff-in.h b/bfd/libcoff-in.h
index a0d286d37f..cdd504605b 100644
--- a/bfd/libcoff-in.h
+++ b/bfd/libcoff-in.h
@@ -24,6 +24,7 @@
#include "bfdlink.h"
#include "coff-bfd.h"
+#include "hashtab.h"
#ifdef __cplusplus
extern "C" {
@@ -153,10 +154,20 @@ typedef struct pe_tdata
const char *style;
asection *sec;
} build_id;
+
+ htab_t flags_hash;
} pe_data_type;
#define pe_data(bfd) ((bfd)->tdata.pe_obj_data)
+struct flags_entry
+{
+ flagword sec_flags;
+ const char *name;
+ int target_index;
+ long symbol;
+};
+
/* Tdata for XCOFF files. */
struct xcoff_tdata
diff --git a/bfd/peicode.h b/bfd/peicode.h
index e2e2be65b5..df1e678b5a 100644
--- a/bfd/peicode.h
+++ b/bfd/peicode.h
@@ -255,6 +255,25 @@ coff_swap_scnhdr_in (bfd * abfd, void * ext, void * in)
#endif
}
+static hashval_t
+htab_hash_flags (const void *entry)
+{
+ const struct flags_entry *fe = entry;
+ hashval_t h = 0;
+ h = iterative_hash (fe->name, strlen (fe->name), h);
+ h = iterative_hash_object (fe->target_index, h);
+ return h;
+}
+
+static int
+htab_eq_flags (const void *e1, const void *e2)
+{
+ const struct flags_entry *fe1 = e1;
+ const struct flags_entry *fe2 = e2;
+ return strcmp (fe1->name, fe2->name) == 0
+ && fe1->target_index == fe2->target_index;
+}
+
static bool
pe_mkobject (bfd * abfd)
{
@@ -291,6 +310,8 @@ pe_mkobject (bfd * abfd)
pe->dos_message[14] = 0x24;
pe->dos_message[15] = 0x0;
+ pe->flags_hash = htab_create (10, htab_hash_flags, htab_eq_flags, NULL);
+
memset (& pe->pe_opthdr, 0, sizeof pe->pe_opthdr);
bfd_coff_long_section_names (abfd)
--
2.40.0.windows.1
@@ -6822,6 +6822,15 @@ struct bfd
/* For input BFDs, the build ID, if the object has one. */
const struct bfd_build_id *build_id;
+ htab_t flags_hash;
+};
+
+struct flags_entry
+{
+ flagword sec_flags;
+ const char *name;
+ int target_index;
+ long symbol;
};
static inline const char *
@@ -908,19 +908,45 @@ styp_to_sec_flags (bfd *abfd,
#else /* COFF_WITH_PE */
-static flagword
-handle_COMDAT (bfd * abfd,
- flagword sec_flags,
- void * hdr,
- const char *name,
- asection *section)
+static struct flags_entry *
+find_flags (htab_t flags_hash, const char *name, int target_index)
{
- struct internal_scnhdr *internal_s = (struct internal_scnhdr *) hdr;
- bfd_byte *esymstart, *esym, *esymend;
- int seen_state = 0;
- char *target_name = NULL;
+ struct flags_entry needle;
+ needle.target_index = target_index;
+ needle.name = name;
+
+ return htab_find (flags_hash, &needle);
+}
+
+static void
+insert_flags (htab_t flags_hash, const char *name, int target_index,
+ flagword sec_flags, long symbol)
+{
+ struct flags_entry newentry;
+ newentry.target_index = target_index;
+ newentry.name = name;
+ newentry.sec_flags = sec_flags;
+ newentry.symbol = symbol;
+
+ void **slot = htab_find_slot (flags_hash, &newentry, INSERT);
+ if (slot == NULL)
+ return;
+ if (*slot == NULL)
+ {
+ *slot = bfd_zmalloc (sizeof (struct flags_entry));
+ if (*slot != NULL)
+ memcpy (*slot, &newentry, sizeof (struct flags_entry));
+ return;
+ }
+ struct flags_entry *entry = *slot;
+ entry->symbol = symbol;
+}
- sec_flags |= SEC_LINK_ONCE;
+static void
+fill_flags_hash (bfd *abfd, void *hdr)
+{
+ struct internal_scnhdr *internal_s = (struct internal_scnhdr *)hdr;
+ bfd_byte *esymstart, *esym, *esymend;
/* Unfortunately, the PE format stores essential information in
the symbol table, of all places. We need to extract that
@@ -939,249 +965,223 @@ handle_COMDAT (bfd * abfd,
doesn't seem to be a need to, either, and it would at best be
rather messy. */
- if (! _bfd_coff_get_external_symbols (abfd))
- return sec_flags;
+ if (!_bfd_coff_get_external_symbols (abfd))
+ return;
- esymstart = esym = (bfd_byte *) obj_coff_external_syms (abfd);
+ esymstart = esym = (bfd_byte *)obj_coff_external_syms (abfd);
esymend = esym + obj_raw_syment_count (abfd) * bfd_coff_symesz (abfd);
- while (esym < esymend)
+ for (struct internal_syment isym; esym < esymend;
+ esym += (isym.n_numaux + 1) * bfd_coff_symesz (abfd))
{
- struct internal_syment isym;
char buf[SYMNMLEN + 1];
const char *symname;
+ flagword sec_flags = SEC_LINK_ONCE;
- bfd_coff_swap_sym_in (abfd, esym, & isym);
+ bfd_coff_swap_sym_in (abfd, esym, &isym);
BFD_ASSERT (sizeof (internal_s->s_name) <= SYMNMLEN);
- if (isym.n_scnum == section->target_index)
- {
- /* According to the MSVC documentation, the first
- TWO entries with the section # are both of
- interest to us. The first one is the "section
- symbol" (section name). The second is the comdat
- symbol name. Here, we've found the first
- qualifying entry; we distinguish it from the
- second with a state flag.
-
- In the case of gas-generated (at least until that
- is fixed) .o files, it isn't necessarily the
- second one. It may be some other later symbol.
-
- Since gas also doesn't follow MS conventions and
- emits the section similar to .text$<name>, where
- <something> is the name we're looking for, we
- distinguish the two as follows:
-
- If the section name is simply a section name (no
- $) we presume it's MS-generated, and look at
- precisely the second symbol for the comdat name.
- If the section name has a $, we assume it's
- gas-generated, and look for <something> (whatever
- follows the $) as the comdat symbol. */
-
- /* All 3 branches use this. */
- symname = _bfd_coff_internal_syment_name (abfd, &isym, buf);
-
- /* PR 17512 file: 078-11867-0.004 */
- if (symname == NULL)
- {
- _bfd_error_handler (_("%pB: unable to load COMDAT section name"),
- abfd);
- break;
- }
-
- switch (seen_state)
- {
- case 0:
- {
- /* The first time we've seen the symbol. */
- union internal_auxent aux;
-
- /* If it isn't the stuff we're expecting, die;
- The MS documentation is vague, but it
- appears that the second entry serves BOTH
- as the comdat symbol and the defining
- symbol record (either C_STAT or C_EXT,
- possibly with an aux entry with debug
- information if it's a function.) It
- appears the only way to find the second one
- is to count. (On Intel, they appear to be
- adjacent, but on Alpha, they have been
- found separated.)
-
- Here, we think we've found the first one,
- but there's some checking we can do to be
- sure. */
-
- if (! ((isym.n_sclass == C_STAT
- || isym.n_sclass == C_EXT)
- && BTYPE (isym.n_type) == T_NULL
- && isym.n_value == 0))
- {
- /* Malformed input files can trigger this test.
- cf PR 21781. */
- _bfd_error_handler (_("%pB: error: unexpected symbol '%s'
in COMDAT section"),
- abfd, symname);
- goto breakloop;
- }
-
- /* FIXME LATER: MSVC generates section names
- like .text for comdats. Gas generates
- names like .text$foo__Fv (in the case of a
- function). See comment above for more. */
+ /* According to the MSVC documentation, the first
+ TWO entries with the section # are both of
+ interest to us. The first one is the "section
+ symbol" (section name). The second is the comdat
+ symbol name. Here, we've found the first
+ qualifying entry; we distinguish it from the
+ second with a state flag.
+
+ In the case of gas-generated (at least until that
+ is fixed) .o files, it isn't necessarily the
+ second one. It may be some other later symbol.
+
+ Since gas also doesn't follow MS conventions and
+ emits the section similar to .text$<name>, where
+ <something> is the name we're looking for, we
+ distinguish the two as follows:
+
+ If the section name is simply a section name (no
+ $) we presume it's MS-generated, and look at
+ precisely the second symbol for the comdat name.
+ If the section name has a $, we assume it's
+ gas-generated, and look for <something> (whatever
+ follows the $) as the comdat symbol. */
+
+ /* All 3 branches use this. */
+ symname = _bfd_coff_internal_syment_name (abfd, &isym, buf);
+
+ /* PR 17512 file: 078-11867-0.004 */
+ if (symname == NULL)
+ {
+ _bfd_error_handler (_ ("%pB: unable to load COMDAT section name"),
+ abfd);
+ continue;
+ }
+
+ union internal_auxent aux;
+
+ /* If it isn't the stuff we're expecting, die;
+ The MS documentation is vague, but it
+ appears that the second entry serves BOTH
+ as the comdat symbol and the defining
+ symbol record (either C_STAT or C_EXT,
+ possibly with an aux entry with debug
+ information if it's a function.) It
+ appears the only way to find the second one
+ is to count. (On Intel, they appear to be
+ adjacent, but on Alpha, they have been
+ found separated.)
+
+ Here, we think we've found the first one,
+ but there's some checking we can do to be
+ sure. */
+
+ /* FIXME LATER: MSVC generates section names
+ like .text for comdats. Gas generates
+ names like .text$foo__Fv (in the case of a
+ function). See comment above for more. */
+
+ /* This is the section symbol. */
+ bfd_coff_swap_aux_in (abfd, (esym + bfd_coff_symesz (abfd)), isym.n_type,
+ isym.n_sclass, 0, isym.n_numaux, &aux);
+
+ /* FIXME: Microsoft uses NODUPLICATES and
+ ASSOCIATIVE, but gnu uses ANY and
+ SAME_SIZE. Unfortunately, gnu doesn't do
+ the comdat symbols right. So, until we can
+ fix it to do the right thing, we are
+ temporarily disabling comdats for the MS
+ types (they're used in DLLs and C++, but we
+ don't support *their* C++ libraries anyway
+ - DJ. */
+
+ /* Cygwin does not follow the MS style, and
+ uses ANY and SAME_SIZE where NODUPLICATES
+ and ASSOCIATIVE should be used. For
+ Interix, we just do the right thing up
+ front. */
+
+ switch (aux.x_scn.x_comdat)
+ {
+ case IMAGE_COMDAT_SELECT_NODUPLICATES:
+#ifdef STRICT_PE_FORMAT
+ sec_flags |= SEC_LINK_DUPLICATES_ONE_ONLY;
+#else
+ sec_flags &= ~SEC_LINK_ONCE;
+#endif
+ break;
- if (isym.n_sclass == C_STAT && strcmp (name, symname) != 0)
- /* xgettext:c-format */
- _bfd_error_handler (_("%pB: warning: COMDAT symbol '%s'"
- " does not match section name '%s'"),
- abfd, symname, name);
+ case IMAGE_COMDAT_SELECT_ANY:
+ sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
+ break;
- seen_state = 1;
+ case IMAGE_COMDAT_SELECT_SAME_SIZE:
+ sec_flags |= SEC_LINK_DUPLICATES_SAME_SIZE;
+ break;
- /* PR 17512: file: e2cfe54f. */
- if (esym + bfd_coff_symesz (abfd) >= esymend)
- {
- /* xgettext:c-format */
- _bfd_error_handler (_("%pB: warning: no symbol for"
- " section '%s' found"),
- abfd, symname);
- break;
- }
- /* This is the section symbol. */
- bfd_coff_swap_aux_in (abfd, (esym + bfd_coff_symesz (abfd)),
- isym.n_type, isym.n_sclass,
- 0, isym.n_numaux, & aux);
+ case IMAGE_COMDAT_SELECT_EXACT_MATCH:
+ /* Not yet fully implemented ??? */
+ sec_flags |= SEC_LINK_DUPLICATES_SAME_CONTENTS;
+ break;
- target_name = strchr (name, '$');
- if (target_name != NULL)
- {
- /* Gas mode. */
- seen_state = 2;
- /* Skip the `$'. */
- target_name += 1;
- }
+ /* debug$S gets this case; other
+ implications ??? */
- /* FIXME: Microsoft uses NODUPLICATES and
- ASSOCIATIVE, but gnu uses ANY and
- SAME_SIZE. Unfortunately, gnu doesn't do
- the comdat symbols right. So, until we can
- fix it to do the right thing, we are
- temporarily disabling comdats for the MS
- types (they're used in DLLs and C++, but we
- don't support *their* C++ libraries anyway
- - DJ. */
-
- /* Cygwin does not follow the MS style, and
- uses ANY and SAME_SIZE where NODUPLICATES
- and ASSOCIATIVE should be used. For
- Interix, we just do the right thing up
- front. */
-
- switch (aux.x_scn.x_comdat)
- {
- case IMAGE_COMDAT_SELECT_NODUPLICATES:
+ /* There may be no symbol... we'll search
+ the whole table... Is this the right
+ place to play this game? Or should we do
+ it when reading it in. */
+ case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
#ifdef STRICT_PE_FORMAT
- sec_flags |= SEC_LINK_DUPLICATES_ONE_ONLY;
+ /* FIXME: This is not currently implemented. */
+ sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
#else
- sec_flags &= ~SEC_LINK_ONCE;
+ sec_flags &= ~SEC_LINK_ONCE;
#endif
- break;
+ break;
- case IMAGE_COMDAT_SELECT_ANY:
- sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
- break;
+ default: /* 0 means "no symbol" */
+ /* debug$F gets this case; other
+ implications ??? */
+ sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
+ break;
+ }
- case IMAGE_COMDAT_SELECT_SAME_SIZE:
- sec_flags |= SEC_LINK_DUPLICATES_SAME_SIZE;
- break;
+ insert_flags (abfd->flags_hash, symname, isym.n_scnum, sec_flags,
+ (esym - esymstart) / bfd_coff_symesz (abfd));
+ }
- case IMAGE_COMDAT_SELECT_EXACT_MATCH:
- /* Not yet fully implemented ??? */
- sec_flags |= SEC_LINK_DUPLICATES_SAME_CONTENTS;
- break;
+ return;
+}
+
+static void
+insert_coff_comdat_info (bfd *abfd, asection *section, const char *symname,
+ long symbol)
+{
+ char *newname;
+ size_t amt;
- /* debug$S gets this case; other
- implications ??? */
+ /* This must the second symbol with the
+ section #. It is the actual symbol name.
+ Intel puts the two adjacent, but Alpha (at
+ least) spreads them out. */
- /* There may be no symbol... we'll search
- the whole table... Is this the right
- place to play this game? Or should we do
- it when reading it in. */
- case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
-#ifdef STRICT_PE_FORMAT
- /* FIXME: This is not currently implemented. */
- sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
-#else
- sec_flags &= ~SEC_LINK_ONCE;
-#endif
- break;
+ amt = sizeof (struct coff_comdat_info);
+ coff_section_data (abfd, section)->comdat
+ = (struct coff_comdat_info *)bfd_alloc (abfd, amt);
+ if (coff_section_data (abfd, section)->comdat == NULL)
+ abort ();
- default: /* 0 means "no symbol" */
- /* debug$F gets this case; other
- implications ??? */
- sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
- break;
- }
- }
- break;
+ coff_section_data (abfd, section)->comdat->symbol = symbol;
- case 2:
- /* Gas mode: the first matching on partial name. */
+ amt = strlen (symname) + 1;
+ newname = (char *)bfd_alloc (abfd, amt);
+ if (newname == NULL)
+ abort ();
+ strcpy (newname, symname);
+ coff_section_data (abfd, section)->comdat->name = newname;
+}
+
+static flagword
+handle_COMDAT (bfd *abfd, flagword sec_flags, void *hdr, const char *name,
+ asection *section)
+{
+ if (htab_elements (abfd->flags_hash) == 0)
+ fill_flags_hash (abfd, hdr);
+
+ struct flags_entry *found
+ = find_flags (abfd->flags_hash, name, section->target_index);
+ if (found != NULL)
+ {
+ const char *target_name = strchr (name, '$');
+ if (target_name != NULL)
+ {
+ target_name += 1;
#ifndef TARGET_UNDERSCORE
#define TARGET_UNDERSCORE 0
#endif
- /* Is this the name we're looking for ? */
- if (strcmp (target_name,
- symname + (TARGET_UNDERSCORE ? 1 : 0)) != 0)
- {
- /* Not the name we're looking for */
- esym += (isym.n_numaux + 1) * bfd_coff_symesz (abfd);
- continue;
- }
- /* Fall through. */
- case 1:
- /* MSVC mode: the lexically second symbol (or
- drop through from the above). */
- {
- char *newname;
- size_t amt;
-
- /* This must the second symbol with the
- section #. It is the actual symbol name.
- Intel puts the two adjacent, but Alpha (at
- least) spreads them out. */
-
- amt = sizeof (struct coff_comdat_info);
- coff_section_data (abfd, section)->comdat
- = (struct coff_comdat_info *) bfd_alloc (abfd, amt);
- if (coff_section_data (abfd, section)->comdat == NULL)
- abort ();
-
- coff_section_data (abfd, section)->comdat->symbol =
- (esym - esymstart) / bfd_coff_symesz (abfd);
-
- amt = strlen (symname) + 1;
- newname = (char *) bfd_alloc (abfd, amt);
- if (newname == NULL)
- abort ();
-
- strcpy (newname, symname);
- coff_section_data (abfd, section)->comdat->name
- = newname;
- }
-
- goto breakloop;
- }
- }
-
- esym += (isym.n_numaux + 1) * bfd_coff_symesz (abfd);
+#if TARGET_UNDERSCORE
+ char *target_name_underscore = bfd_zmalloc (strlen
(target_name) + 2);
+ if (target_name_underscore == NULL)
+ return sec_flags | SEC_LINK_ONCE;
+ strcpy (target_name_underscore, "_");
+ strcat (target_name_underscore, target_name);
+ found = find_flags (abfd->flags_hash, target_name_underscore,
+ section->target_index);
+ free (target_name_underscore);
+#else
+ found = find_flags (abfd->flags_hash, target_name,
+ section->target_index);
+#endif
+ }
+ /* Is this the name we're looking for ? */
+ if (found != NULL)
+ {
+ insert_coff_comdat_info (abfd, section, found->name, found->symbol);
+ return sec_flags | found->sec_flags;
+ }
}
-
- breakloop:
- return sec_flags;
+ return sec_flags | SEC_LINK_ONCE;
}
@@ -52,6 +52,25 @@ unsigned int bfd_use_reserved_id = 0;
/* fdopen is a loser -- we should use stdio exclusively. Unfortunately
if we do that we can't use fcntl. */
+static hashval_t
+htab_hash_flags (const void *entry)
+{
+ const struct flags_entry *fe = entry;
+ hashval_t h = 0;
+ h = iterative_hash (fe->name, strlen (fe->name), h);
+ h = iterative_hash_object (fe->target_index, h);
+ return h;
+}
+
+static int
+htab_eq_flags (const void *e1, const void *e2)
+{
+ const struct flags_entry *fe1 = e1;
+ const struct flags_entry *fe2 = e2;
+ return strcmp (fe1->name, fe2->name) == 0
+ && fe1->target_index == fe2->target_index;
+}
+
/* Return a new BFD. All BFD's are allocated through this routine. */
bfd *
@@ -90,6 +109,7 @@ _bfd_new_bfd (void)
}
nbfd->archive_plugin_fd = -1;
+ nbfd->flags_hash = htab_create (10, htab_hash_flags, htab_eq_flags, NULL);
return nbfd;
}