[v3,1/2] x86: Cpu64 handling improvements

Message ID cf55192f-3dc7-7862-9434-75eab063768e@suse.com
State Unresolved
Headers
Series x86: insn template CPU specifier processing |

Checks

Context Check Description
snail/binutils-gdb-check warning Git am fail log

Commit Message

Jan Beulich Nov. 7, 2023, 1:07 p.m. UTC
  First of all we want to also accumulate its reverse dependencies, such
that we can use them in cpu_flags_match(). This is in particular in
preparation of APX additions, such that e.g. BMI VEX-encoding templates
can become combined VEX/EVEX ones.

Once we have the reverse dependencies, we can further leverage them to
omit explicit "&x64" from any insn templates dealing with 64-bit-mode-
only ISA extensions. Besides helping readability for several insn
templates we already have, this will also help with what is going to be
added for APX (as all of the new templates would otherwise need to have
"&x64").

Note that rather than leaving a meaningless CPU_64_FLAGS (which is
unused anyway), its emitting is now also suppressed.
---
The new "active" local variable is initialized based on just flag_code.
cpu_flags_match() being a relatively hot path may want us to consider
introducing yet another global instead, which would be updated whenever
a .code directive is processed.
---
v3: New.
  

Patch

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -804,6 +804,9 @@  static char *cpu_sub_arch_name = NULL;
 /* CPU feature flags.  */
 i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 
+/* ISA extensions available in 64-bit mode only.  */
+static const i386_cpu_flags cpu_64_flags = CPU_ANY_64_FLAGS;
+
 /* If we have selected a cpu we are generating instructions for.  */
 static int cpu_arch_tune_set = 0;
 
@@ -1874,7 +1877,12 @@  cpu_flags_match (const insn_template *t)
   else
     {
       /* This instruction is available only on some archs.  */
-      i386_cpu_flags cpu = cpu_arch_flags;
+      i386_cpu_flags active, cpu;
+
+      if (flag_code != CODE_64BIT)
+	active = cpu_flags_and_not (cpu_arch_flags, cpu_64_flags);
+      else
+	active = cpu_arch_flags;
 
       /* Dual VEX/EVEX templates may need stripping of one of the flags.  */
       if (t->opcode_modifier.vex && t->opcode_modifier.evex)
@@ -1895,14 +1903,14 @@  cpu_flags_match (const insn_template *t)
 		{
 		  x.bitfield.cpuavx512f = 0;
 		  x.bitfield.cpuavx512vl = 0;
-		  if (x.bitfield.cpufma && !cpu.bitfield.cpufma)
+		  if (x.bitfield.cpufma && !active.bitfield.cpufma)
 		    x.bitfield.cpuavx = 0;
 		}
 	    }
 	}
 
       /* AVX512VL is no standalone feature - match it and then strip it.  */
-      if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
+      if (x.bitfield.cpuavx512vl && !active.bitfield.cpuavx512vl)
 	return match;
       x.bitfield.cpuavx512vl = 0;
 
@@ -1912,7 +1920,7 @@  cpu_flags_match (const insn_template *t)
       if (x.bitfield.cpuavx && x.bitfield.cpuavx2)
 	x.bitfield.cpuavx2 = 0;
 
-      cpu = cpu_flags_and (x, cpu);
+      cpu = cpu_flags_and (x, active);
       if (!cpu_flags_all_zero (&cpu))
 	{
 	  if (t->cpu.bitfield.cpuavx && t->cpu.bitfield.cpuavx512f)
@@ -1921,7 +1929,7 @@  cpu_flags_match (const insn_template *t)
 		   ? cpu.bitfield.cpuavx512f
 		   : cpu.bitfield.cpuavx)
 		  && (!x.bitfield.cpufma || cpu.bitfield.cpufma
-		      || cpu_arch_flags.bitfield.cpuavx512f)
+		      || active.bitfield.cpuavx512f)
 		  && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
 		  && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
 		  && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -166,6 +166,10 @@  static const dependency isa_dependencies
     "AVX2" },
   { "AVX_NE_CONVERT",
     "AVX2" },
+  { "CX16",
+    "64" },
+  { "LKGS",
+    "64" },
   { "FRED",
     "LKGS" },
   { "AVX512F",
@@ -240,13 +244,13 @@  static const dependency isa_dependencies
   { "SNP",
     "SEV_ES" },
   { "RMPQUERY",
-    "SNP" },
+    "SNP|64" },
   { "TSX",
     "RTM|HLE" },
   { "TSXLDTRK",
     "RTM" },
   { "AMX_TILE",
-    "XSAVE" },
+    "XSAVE|64" },
   { "AMX_INT8",
     "AMX_TILE" },
   { "AMX_BF16",
@@ -259,6 +263,18 @@  static const dependency isa_dependencies
     "SSE2" },
   { "WIDEKL",
     "KL" },
+  { "PBNDKB",
+    "64" },
+  { "UINTR",
+    "64" },
+  { "PREFETCHI",
+    "64" },
+  { "CMPCCXADD",
+    "64" },
+  { "MSRLIST",
+    "64" },
+  { "USER_MSR",
+    "64" },
 };
 
 /* This array is populated as process_i386_initializers() walks cpu_flags[].  */
@@ -772,8 +788,10 @@  add_isa_dependencies (bitfield *flags, c
 	  }
 	free (deps);
 
-	/* ISA extensions with dependencies need CPU_ANY_*_FLAGS emitted.  */
-	if (reverse < ARRAY_SIZE (isa_reverse_deps[0]))
+	/* ISA extensions with dependencies need CPU_ANY_*_FLAGS emitted,
+	   unless the sole dependency is the "64-bit mode only" one.  */
+	if (reverse < ARRAY_SIZE (isa_reverse_deps[0])
+	    && strcmp (isa_dependencies[i].deps, "64"))
 	  isa_reverse_deps[reverse][reverse] = 1;
 
 	is_avx = orig_is_avx;
@@ -919,6 +937,15 @@  process_i386_cpu_flag (FILE *table, char
       size_t len = strlen (name);
       char *upper = xmalloc (len + 1);
 
+      /* Cpu64 is special: It specifies a mode dependency, not an ISA one.  Zap
+	 the flag from ISA initializer macros (and from CPU_ANY_64_FLAGS
+	 itself we only care about tracking its dependents.  Also don't emit the
+	 (otherwise all zero) CPU_64_FLAGS.  */
+      if (flag != NULL && reverse == Cpu64)
+	return;
+      if (is_isa || flag == NULL)
+	flags[Cpu64].value = 0;
+
       for (i = 0; i < len; ++i)
 	{
 	  /* Don't emit #define-s for auxiliary entries.  */
@@ -931,6 +958,14 @@  process_i386_cpu_flag (FILE *table, char
 	       flag != NULL ? "": "ANY_", upper);
       free (upper);
     }
+  else
+    {
+      /* Synthesize "64-bit mode only" dependencies from the dependencies we
+	 have accumulated.  */
+      for (i = 0; i < ARRAY_SIZE (isa_reverse_deps[0]); ++i)
+	if (flags[i].value && isa_reverse_deps[Cpu64][i])
+	  flags[Cpu64].value = 1;
+    }
 
   output_cpu_flags (table, flags, ARRAY_SIZE (flags), name != NULL,
 		    comma, indent, lineno);
@@ -2142,6 +2177,8 @@  main (int argc, char **argv)
   qsort (operand_types, ARRAY_SIZE (operand_types),
 	 sizeof (operand_types [0]), compare);
 
+  process_i386_initializers ();
+
   table = fopen ("i386-tbl.h", "w");
   if (table == NULL)
     fail ("can't create i386-tbl.h, errno = %s\n",
@@ -2151,7 +2188,6 @@  main (int argc, char **argv)
 
   process_i386_opcodes (table);
   process_i386_registers (table);
-  process_i386_initializers ();
 
   fclose (table);
 
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -158,6 +158,8 @@ 
 #define i287 287
 #define i387 387
 #define i687 687
+// Note: Don't add this one to any templates already specifying a 64-bit-mode-
+// only ISA extension: i386-gen takes care of adding such dependencies.
 #define x64 64
 
 ### MARKER ###
@@ -1273,7 +1275,7 @@  fisttpll, 0xdd/1, FISTTP, Modrm|NoSuf|AT
 
 // CMPXCHG16B instruction.
 
-cmpxchg16b, 0xfc7/1, CX16|x64, Modrm|NoSuf|Size64|LockPrefixOk, { Oword|Unspecified|BaseIndex }
+cmpxchg16b, 0xfc7/1, CX16, Modrm|NoSuf|Size64|LockPrefixOk, { Oword|Unspecified|BaseIndex }
 
 // MONITOR instructions.
 
@@ -3013,7 +3015,7 @@  pconfig, 0x0f01c5, PCONFIG, NoSuf, {}
 
 // PBNDKB instruction.
 
-pbndkb, 0x0f01c7, PBNDKB|x64, NoSuf, {}
+pbndkb, 0x0f01c7, PBNDKB, NoSuf, {}
 
 // PBNDKB instruction end.
 
@@ -3100,8 +3102,8 @@  rmpadjust, 0xf30f01fe, SNP|x64, AddrPref
 
 // RMPQUERY instruction
 
-rmpquery, 0xf30f01fd, RMPQUERY|x64, NoSuf, {}
-rmpquery, 0xf30f01fd, RMPQUERY|x64, AddrPrefixOpReg|NoSuf, { Acc|Dword|Qword, RegC|Qword, RegD|Qword }
+rmpquery, 0xf30f01fd, RMPQUERY, NoSuf, {}
+rmpquery, 0xf30f01fd, RMPQUERY, AddrPrefixOpReg|NoSuf, { Acc|Dword|Qword, RegC|Qword, RegD|Qword }
 
 // RMPQUERY instruction end
 
@@ -3126,26 +3128,26 @@  xresldtrk, 0xf20f01e9, TSXLDTRK, NoSuf,
 
 // AMX instructions.
 
-ldtilecfg, 0x49/0, AMX_TILE|x64, Modrm|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex }
-sttilecfg, 0x6649/0, AMX_TILE|x64, Modrm|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex }
+ldtilecfg, 0x49/0, AMX_TILE, Modrm|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex }
+sttilecfg, 0x6649/0, AMX_TILE, Modrm|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex }
 
-tcmmimfp16ps, 0x666c, AMX_COMPLEX|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tcmmrlfp16ps, 0x6c, AMX_COMPLEX|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tcmmimfp16ps, 0x666c, AMX_COMPLEX, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tcmmrlfp16ps, 0x6c, AMX_COMPLEX, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
 
-tdpbf16ps, 0xf35c, AMX_BF16|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpfp16ps, 0xf25c, AMX_FP16|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpbssd, 0xf25e, AMX_INT8|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpbuud, 0x5e, AMX_INT8|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpbusd, 0x665e, AMX_INT8|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpbsud, 0xf35e, AMX_INT8|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-
-tileloadd, 0xf24b, AMX_TILE|x64, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM }
-tileloaddt1, 0x664b, AMX_TILE|x64, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM }
-tilestored, 0xf34b, AMX_TILE|x64, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { RegTMM, Unspecified|BaseIndex }
+tdpbf16ps, 0xf35c, AMX_BF16, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpfp16ps, 0xf25c, AMX_FP16, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbssd, 0xf25e, AMX_INT8, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbuud, 0x5e, AMX_INT8, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbusd, 0x665e, AMX_INT8, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbsud, 0xf35e, AMX_INT8, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+
+tileloadd, 0xf24b, AMX_TILE, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM }
+tileloaddt1, 0x664b, AMX_TILE, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM }
+tilestored, 0xf34b, AMX_TILE, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { RegTMM, Unspecified|BaseIndex }
 
-tilerelease, 0x49c0, AMX_TILE|x64, Vex128|Space0F38|VexW0|NoSuf, {}
+tilerelease, 0x49c0, AMX_TILE, Vex128|Space0F38|VexW0|NoSuf, {}
 
-tilezero, 0xf249, AMX_TILE|x64, Modrm|Vex128|Space0F38|VexW0|NoSuf, { RegTMM }
+tilezero, 0xf249, AMX_TILE, Modrm|Vex128|Space0F38|VexW0|NoSuf, { RegTMM }
 
 // AMX instructions end.
 
@@ -3176,11 +3178,11 @@  seamcall, 0x660f01cf, TDX|x64, NoSuf, {}
 
 // UINTR instructions.
 
-uiret, 0xf30f01ec, UINTR|x64, NoSuf, {}
-clui, 0xf30f01ee, UINTR|x64, NoSuf, {}
-stui, 0xf30f01ef, UINTR|x64, NoSuf, {}
-testui, 0xf30f01ed, UINTR|x64, NoSuf, {}
-senduipi, 0xf30fc7/6, UINTR|x64, Modrm|NoSuf|NoRex64, { Reg64 }
+uiret, 0xf30f01ec, UINTR, NoSuf, {}
+clui, 0xf30f01ee, UINTR, NoSuf, {}
+stui, 0xf30f01ef, UINTR, NoSuf, {}
+testui, 0xf30f01ed, UINTR, NoSuf, {}
+senduipi, 0xf30fc7/6, UINTR, Modrm|NoSuf|NoRex64, { Reg64 }
 
 // UINTR instructions end.
 
@@ -3302,14 +3304,14 @@  vrsqrtsh, 0x664f, AVX512_FP16, Modrm|EVe
 
 // PREFETCHI instructions.
 
-prefetchit0, 0xf18/7, PREFETCHI|x64, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
-prefetchit1, 0xf18/6, PREFETCHI|x64, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
+prefetchit0, 0xf18/7, PREFETCHI, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
+prefetchit1, 0xf18/6, PREFETCHI, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
 
 // PREFETCHI instructions end.
 
 // CMPCCXADD instructions.
 
-cmp<cc>xadd, 0x66e<cc:opc>, CMPCCXADD|x64, Modrm|Vex|Space0F38|VexVVVV|SwapSources|CheckOperandSize|NoSuf, { Reg32|Reg64, Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
+cmp<cc>xadd, 0x66e<cc:opc>, CMPCCXADD, Modrm|Vex|Space0F38|VexVVVV|SwapSources|CheckOperandSize|NoSuf, { Reg32|Reg64, Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
 
 // CMPCCXADD instructions end.
 
@@ -3321,8 +3323,8 @@  wrmsrns, 0x0f01c6, WRMSRNS, NoSuf, {}
 
 // MSRLIST instructions.
 
-rdmsrlist, 0xf20f01c6, MSRLIST|x64, NoSuf, {}
-wrmsrlist, 0xf30f01c6, MSRLIST|x64, NoSuf, {}
+rdmsrlist, 0xf20f01c6, MSRLIST, NoSuf, {}
+wrmsrlist, 0xf30f01c6, MSRLIST, NoSuf, {}
 
 // MSRLIST instructions end.
 
@@ -3337,23 +3339,23 @@  axor, 0xf30f38fc, RAO_INT, Modrm|IgnoreS
 
 // LKGS instruction.
 
-lkgs, 0xf20f00/6, LKGS|x64, Modrm|IgnoreSize|No_bSuf|No_sSuf|NoRex64, { Reg16|Reg32|Reg64 }
-lkgs, 0xf20f00/6, LKGS|x64, Modrm|IgnoreSize|No_bSuf|No_lSuf|No_sSuf|No_qSuf, { Word|Unspecified|BaseIndex }
+lkgs, 0xf20f00/6, LKGS, Modrm|IgnoreSize|No_bSuf|No_sSuf|NoRex64, { Reg16|Reg32|Reg64 }
+lkgs, 0xf20f00/6, LKGS, Modrm|IgnoreSize|No_bSuf|No_lSuf|No_sSuf|No_qSuf, { Word|Unspecified|BaseIndex }
 
 // LKGS instruction end.
 
 // FRED instructions.
 
-erets, 0xf20f01ca, FRED|x64, NoSuf, {}
-eretu, 0xf30f01ca, FRED|x64, NoSuf, {}
+erets, 0xf20f01ca, FRED, NoSuf, {}
+eretu, 0xf30f01ca, FRED, NoSuf, {}
 
 // FRED instructions end.
 
 // USER_MSR instructions.
 
-urdmsr, 0xf20f38f8, USER_MSR|x64, RegMem|NoSuf|NoRex64, { Reg64, Reg64 }
-urdmsr, 0xf2f8/0, USER_MSR|x64, Modrm|Vex128|VexMap7|VexW0|NoSuf, { Imm32, Reg64 }
-uwrmsr, 0xf30f38f8, USER_MSR|x64, Modrm|NoSuf|NoRex64, { Reg64, Reg64 }
-uwrmsr, 0xf3f8/0, USER_MSR|x64, Modrm|Vex128|VexMap7|VexW0|NoSuf, { Reg64, Imm32 }
+urdmsr, 0xf20f38f8, USER_MSR, RegMem|NoSuf|NoRex64, { Reg64, Reg64 }
+urdmsr, 0xf2f8/0, USER_MSR, Modrm|Vex128|VexMap7|VexW0|NoSuf, { Imm32, Reg64 }
+uwrmsr, 0xf30f38f8, USER_MSR, Modrm|NoSuf|NoRex64, { Reg64, Reg64 }
+uwrmsr, 0xf3f8/0, USER_MSR, Modrm|Vex128|VexMap7|VexW0|NoSuf, { Reg64, Imm32 }
 
 // USER_MSR instructions end.