[1/8] x86: move insn mnemonics to a separate table

Message ID fa681978-5ee6-16ba-3f08-958f6d1cf805@suse.com
State Unresolved
Headers
Series [1/8] x86: move insn mnemonics to a separate table |

Checks

Context Check Description
snail/binutils-gdb-check warning Git am fail log

Commit Message

Jan Beulich Jan. 13, 2023, 11:06 a.m. UTC
  Using full pointers to reference the insn mnemonic strings is not very
efficient. With overall string size presently just slightly over 20k,
even a 16-bit value would suffice. Use "unsigned int" for now, as
there's no good use we could presently make of the otherwise saved 16
bits.

For 64-bit builds this reduces table size by 6.25% (prior to the recent
ISA extension additions it would have been 12.5%), with a similar effect
on cache occupation of table entries accessed. For PIE builds of gas
this also reduces the number of base relocations quite a bit (obviously
independent of bitness).
---
An alternative to introducing i386-mnem.h would of course be to put the
#define-s in i386-init.h. That would look like an abuse of the file to
me, but I'd be okay switching to such an approach.

As to further shrinking mnem_off (to 16 bits): I'm intending to drop
i386_opcode_modifier as a separate struct, embedding the fields directly
in insn_template. i386_opcode_modifier presently using only 6 bits from
its 3rd word will allow to shrink insn_template by another word then.
(This would also benefit readabilty of tc-i386*.c, as all the uses of
"opcode_modifier." would go away. This would additionally reduce the
apparent discrepancy between e.g. opcode_modifier.opcode_space and
opcode_modifier.opcode_prefix vs base_opcode and extension_opcode.)
  

Comments

Jan Beulich Jan. 13, 2023, 11:12 a.m. UTC | #1
On 13.01.2023 12:06, Jan Beulich via Binutils wrote:
> Using full pointers to reference the insn mnemonic strings is not very
> efficient. With overall string size presently just slightly over 20k,
> even a 16-bit value would suffice. Use "unsigned int" for now, as
> there's no good use we could presently make of the otherwise saved 16
> bits.
> 
> For 64-bit builds this reduces table size by 6.25% (prior to the recent
> ISA extension additions it would have been 12.5%), with a similar effect
> on cache occupation of table entries accessed. For PIE builds of gas
> this also reduces the number of base relocations quite a bit (obviously
> independent of bitness).
> ---
> An alternative to introducing i386-mnem.h would of course be to put the
> #define-s in i386-init.h. That would look like an abuse of the file to
> me, but I'd be okay switching to such an approach.
> 
> As to further shrinking mnem_off (to 16 bits): I'm intending to drop
> i386_opcode_modifier as a separate struct, embedding the fields directly
> in insn_template. i386_opcode_modifier presently using only 6 bits from
> its 3rd word will allow to shrink insn_template by another word then.
> (This would also benefit readabilty of tc-i386*.c, as all the uses of
> "opcode_modifier." would go away. This would additionally reduce the
> apparent discrepancy between e.g. opcode_modifier.opcode_space and
> opcode_modifier.opcode_prefix vs base_opcode and extension_opcode.)

And of course this is patch 2/8; I'm sorry for the typo.

Jan
  

Patch

--- a/gas/Makefile.am
+++ b/gas/Makefile.am
@@ -453,7 +453,7 @@  i386_tbl_deps = $(srcdir)/../opcodes/i38
 	$(srcdir)/../opcodes/i386-reg.tbl \
 	$(srcdir)/../opcodes/i386-gen.c $(srcdir)/../opcodes/i386-opc.h
 
-$(srcdir)/../opcodes/i386%init.h $(srcdir)/../opcodes/i386%tbl.h: @MAINT@ $(i386_tbl_deps)
+$(srcdir)/../opcodes/i386%init.h $(srcdir)/../opcodes/i386%tbl.h $(srcdir)/../opcodes/i386%mnem.h: @MAINT@ $(i386_tbl_deps)
 	@echo '"$@" is outdated wrt "$?"' >&2
 	@echo 'Please rebuild from the top level or in $(CURDIR)/../opcodes/' >&2
 	@false
--- a/gas/Makefile.in
+++ b/gas/Makefile.in
@@ -2070,7 +2070,7 @@  development.exp: $(BFDDIR)/development.s
 
 config/tc-i386.o: $(srcdir)/../opcodes/i386-init.h $(srcdir)/../opcodes/i386-tbl.h
 
-$(srcdir)/../opcodes/i386%init.h $(srcdir)/../opcodes/i386%tbl.h: @MAINT@ $(i386_tbl_deps)
+$(srcdir)/../opcodes/i386%init.h $(srcdir)/../opcodes/i386%tbl.h $(srcdir)/../opcodes/i386%mnem.h: @MAINT@ $(i386_tbl_deps)
 	@echo '"$@" is outdated wrt "$?"' >&2
 	@echo 'Please rebuild from the top level or in $(CURDIR)/../opcodes/' >&2
 	@false
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -34,6 +34,7 @@ 
 #include "sframe.h"
 #include "elf/x86-64.h"
 #include "opcodes/i386-init.h"
+#include "opcodes/i386-mnem.h"
 #include <limits.h>
 
 #ifndef INFER_ADDR_PREFIX
@@ -2428,7 +2429,7 @@  offset_in_range (offsetT val, int size)
 
 static INLINE const char *insn_name (const insn_template *t)
 {
-  return t->name;
+  return &i386_mnemonics[t->mnem_off];
 }
 
 enum PREFIX_GROUP
--- a/opcodes/Makefile.am
+++ b/opcodes/Makefile.am
@@ -523,7 +523,8 @@  MOSTLYCLEANFILES = aarch64-gen$(EXEEXT_F
 	z8kgen$(EXEEXT_FOR_BUILD) opc2c$(EXEEXT_FOR_BUILD)
 
 MAINTAINERCLEANFILES = $(srcdir)/aarch64-asm-2.c $(srcdir)/aarch64-dis-2.c \
-	$(srcdir)/aarch64-opc-2.c $(srcdir)/i386-tbl.h $(srcdir)/i386-init.h \
+	$(srcdir)/aarch64-opc-2.c \
+	$(srcdir)/i386-tbl.h $(srcdir)/i386-init.h $(srcdir)/i386-mnem.h \
 	$(srcdir)/ia64-asmtab.c $(srcdir)/z8k-opc.h \
 	$(srcdir)/msp430-decode.c \
 	$(srcdir)/rl78-decode.c \
@@ -552,16 +553,17 @@  i386-gen.o: i386-gen.c i386-opc.h $(srcd
 	config.h sysdep.h
 	$(COMPILE_FOR_BUILD) -c $(srcdir)/i386-gen.c
 
-# i386-gen will generate both headers in one go.  Use a pattern rule to properly
+# i386-gen will generate all headers in one go.  Use a pattern rule to properly
 # express this, with the inner dash ('-') arbitrarily chosen to be the stem.
-$(srcdir)/i386%tbl.h $(srcdir)/i386%init.h: @MAINT@ i386-gen$(EXEEXT_FOR_BUILD) i386-opc.tbl i386-reg.tbl i386-opc.h
+$(srcdir)/i386%tbl.h $(srcdir)/i386%init.h $(srcdir)/i386%mnem.h: \
+		@MAINT@ i386-gen$(EXEEXT_FOR_BUILD) i386-opc.tbl i386-reg.tbl i386-opc.h
 	$(AM_V_GEN)$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) - \
 		< $(srcdir)/i386-opc.tbl \
 		| ./i386-gen$(EXEEXT_FOR_BUILD) --srcdir $(srcdir)
 
-# While not really dependencies, specify i386-{init,tbl}.h here as well to
-# make sure they are re-generated as necessary.
-i386-dis.lo: $(srcdir)/i386-tbl.h $(srcdir)/i386-init.h
+# While not really dependencies, specify other generated i386-*.h here as well
+# to make sure they are re-generated as necessary.
+i386-dis.lo: $(srcdir)/i386-tbl.h $(srcdir)/i386-init.h $(srcdir)/i386-mnem.h
 
 ia64-gen$(EXEEXT_FOR_BUILD): ia64-gen.o $(BUILD_LIB_DEPS)
 	$(AM_V_CCLD)$(LINK_FOR_BUILD) ia64-gen.o $(BUILD_LIBS)
--- a/opcodes/Makefile.in
+++ b/opcodes/Makefile.in
@@ -758,7 +758,8 @@  MOSTLYCLEANFILES = aarch64-gen$(EXEEXT_F
 	z8kgen$(EXEEXT_FOR_BUILD) opc2c$(EXEEXT_FOR_BUILD)
 
 MAINTAINERCLEANFILES = $(srcdir)/aarch64-asm-2.c $(srcdir)/aarch64-dis-2.c \
-	$(srcdir)/aarch64-opc-2.c $(srcdir)/i386-tbl.h $(srcdir)/i386-init.h \
+	$(srcdir)/aarch64-opc-2.c \
+	$(srcdir)/i386-tbl.h $(srcdir)/i386-init.h $(srcdir)/i386-mnem.h \
 	$(srcdir)/ia64-asmtab.c $(srcdir)/z8k-opc.h \
 	$(srcdir)/msp430-decode.c \
 	$(srcdir)/rl78-decode.c \
@@ -1526,16 +1527,17 @@  i386-gen.o: i386-gen.c i386-opc.h $(srcd
 	config.h sysdep.h
 	$(COMPILE_FOR_BUILD) -c $(srcdir)/i386-gen.c
 
-# i386-gen will generate both headers in one go.  Use a pattern rule to properly
+# i386-gen will generate all headers in one go.  Use a pattern rule to properly
 # express this, with the inner dash ('-') arbitrarily chosen to be the stem.
-$(srcdir)/i386%tbl.h $(srcdir)/i386%init.h: @MAINT@ i386-gen$(EXEEXT_FOR_BUILD) i386-opc.tbl i386-reg.tbl i386-opc.h
+$(srcdir)/i386%tbl.h $(srcdir)/i386%init.h $(srcdir)/i386%mnem.h: \
+		@MAINT@ i386-gen$(EXEEXT_FOR_BUILD) i386-opc.tbl i386-reg.tbl i386-opc.h
 	$(AM_V_GEN)$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) - \
 		< $(srcdir)/i386-opc.tbl \
 		| ./i386-gen$(EXEEXT_FOR_BUILD) --srcdir $(srcdir)
 
-# While not really dependencies, specify i386-{init,tbl}.h here as well to
-# make sure they are re-generated as necessary.
-i386-dis.lo: $(srcdir)/i386-tbl.h $(srcdir)/i386-init.h
+# While not really dependencies, specify other generated i386-*.h here as well
+# to make sure they are re-generated as necessary.
+i386-dis.lo: $(srcdir)/i386-tbl.h $(srcdir)/i386-init.h $(srcdir)/i386-mnem.h
 
 ia64-gen$(EXEEXT_FOR_BUILD): ia64-gen.o $(BUILD_LIB_DEPS)
 	$(AM_V_CCLD)$(LINK_FOR_BUILD) ia64-gen.o $(BUILD_LIBS)
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -1146,12 +1146,26 @@  process_i386_operand_type (FILE *table,
 		       stage, indent);
 }
 
+static char *mkident (const char *mnem)
+{
+  char *ident = xstrdup (mnem), *p = ident;
+
+  do
+    {
+      if (!ISALNUM (*p))
+	*p = '_';
+    }
+  while (*++p);
+
+  return ident;
+}
+
 static void
 output_i386_opcode (FILE *table, const char *name, char *str,
 		    char *last, int lineno)
 {
   unsigned int i, length, prefix = 0, space = 0;
-  char *base_opcode, *extension_opcode, *end;
+  char *base_opcode, *extension_opcode, *end, *ident;
   char *cpu_flags, *opcode_modifier, *operand_types [MAX_OPERANDS];
   unsigned long long opcode;
 
@@ -1245,9 +1259,11 @@  output_i386_opcode (FILE *table, const c
     fail (_("%s:%d: %s: residual opcode (0x%0*llx) too large\n"),
 	  filename, lineno, name, 2 * length, opcode);
 
-  fprintf (table, "  { \"%s\", 0x%0*llx%s, %lu, %s,\n",
-	   name, 2 * (int)length, opcode, end, i,
+  ident = mkident (name);
+  fprintf (table, "  { MN_%s, 0x%0*llx%s, %lu, %s,\n",
+	   ident, 2 * (int)length, opcode, end, i,
 	   extension_opcode ? extension_opcode : "None");
+  free (ident);
 
   process_i386_opcode_modifier (table, opcode_modifier, space, prefix,
 				operand_types, lineno);
@@ -1565,7 +1581,7 @@  process_i386_opcodes (FILE *table)
 {
   FILE *fp;
   char buf[2048];
-  unsigned int i, j, nr;
+  unsigned int i, j, nr, offs;
   char *str, *p, *last, *name;
   htab_t opcode_hash_table;
   struct opcode_hash_entry **opcode_array = NULL;
@@ -1579,6 +1595,7 @@  process_i386_opcodes (FILE *table)
 					 opcode_hash_eq, NULL,
 					 xcalloc, free);
 
+  fprintf (table, "\n#include \"i386-mnem.h\"\n");
   fprintf (table, "\n/* i386 opcode table.  */\n\n");
   fprintf (table, "static const insn_template i386_optab[] =\n{\n");
 
@@ -1701,6 +1718,32 @@  process_i386_opcodes (FILE *table)
     }
 
   fprintf (table, "};\n");
+
+  /* Emit mnemonics and associated #define-s.  */
+  fp = fopen ("i386-mnem.h", "w");
+  if (fp == NULL)
+    fail (_("can't create i386-mnem.h, errno = %s\n"),
+	  xstrerror (errno));
+
+  process_copyright (fp);
+
+  fprintf (table, "\n/* i386 mnemonics table.  */\n\n");
+  fprintf (table, "const char i386_mnemonics[] =\n");
+  fprintf (fp, "\nextern const char i386_mnemonics[];\n\n");
+
+  for (offs = j = 0; j < i; j++)
+    {
+      name = opcode_array[j]->name;
+      fprintf (table, "  \"\\0\"\"%s\"\n", name);
+      str = mkident (name);
+      fprintf (fp, "#define MN_%s %#x\n", str, offs + 1);
+      free (str);
+      offs += strlen (name) + 1;
+    }
+
+  fprintf (table, ";\n");
+
+  fclose (fp);
 }
 
 static void
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -918,7 +918,7 @@  typedef union i386_operand_type
 typedef struct insn_template
 {
   /* instruction name sans width suffix ("mov" for movl insns) */
-  const char *name;
+  unsigned int mnem_off;
 
   /* Bitfield arrangement is such that individual fields can be easily
      extracted (in native builds at least) - either by at most a masking