[v2] x86/APX: optimize MOVBE

Message ID 2831b090-2787-4b5c-b5ab-2197bed110dd@suse.com
State Unresolved
Headers
Series [v2] x86/APX: optimize MOVBE |

Checks

Context Check Description
snail/binutils-gdb-check warning Git am fail log

Commit Message

Jan Beulich Jan. 19, 2024, 10:51 a.m. UTC
  With identical source and destination it can be covered by the NDD-to-
legacy conversion logic as well, even if in this case the original insn
doesn't use an NDD encoding. The size savings are even better here, for
the replacement (BSWAP) not having a ModR/M byte.
  

Patch

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -7858,10 +7858,11 @@  match_template (char mnem_suffix)
       if (optimize
 	  && !i.no_optimize
 	  && i.vec_encoding != vex_encoding_evex
-	  && t + 1 < current_templates.end
-	  && !t[1].opcode_modifier.evex
-	  && t[1].opcode_space <= SPACE_0F38
-	  && t->opcode_modifier.vexvvvv == VexVVVV_DST
+	  && ((t + 1 < current_templates.end
+	       && !t[1].opcode_modifier.evex
+	       && t[1].opcode_space <= SPACE_0F38
+	       && t->opcode_modifier.vexvvvv == VexVVVV_DST)
+	      || t->mnem_off == MN_movbe)
 	  && (i.types[i.operands - 1].bitfield.dword
 	      || i.types[i.operands - 1].bitfield.qword))
 	{
@@ -7898,6 +7899,12 @@  match_template (char mnem_suffix)
 		  --i.operands;
 		  --i.reg_operands;
 
+		  if (t->mnem_off == MN_movbe)
+		    {
+		      gas_assert (t[1].mnem_off == MN_bswap);
+		      ++current_templates.end;
+		    }
+
 		  specific_error = progress (internal_error);
 		  continue;
 		}
--- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
+++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
@@ -118,6 +118,9 @@  Disassembly of section .text:
 \s*[a-f0-9]+:\s*67 0f 4d 90 90 90 90 90 	cmovge -0x6f6f6f70\(%eax\),%edx
 \s*[a-f0-9]+:\s*67 0f 4e 90 90 90 90 90 	cmovle -0x6f6f6f70\(%eax\),%edx
 \s*[a-f0-9]+:\s*67 0f 4f 90 90 90 90 90 	cmovg  -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*62 f4 7d 08 60 c0    	movbe  %ax,%ax
+\s*[a-f0-9]+:\s*49 0f c8             	bswap  %r8
+\s*[a-f0-9]+:\s*d5 98 c8             	bswap  %r16
 \s*[a-f0-9]+:\s*66 0f 38 f6 c3       	adcx   %ebx,%eax
 \s*[a-f0-9]+:\s*66 0f 38 f6 c3       	adcx   %ebx,%eax
 \s*[a-f0-9]+:\s*62 f4 fd 18 66 c3    	adcx   %rbx,%rax,%rax
--- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
+++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
@@ -111,6 +111,9 @@  cmovl  0x90909090(%eax),%edx,%edx
 cmovge 0x90909090(%eax),%edx,%edx
 cmovle 0x90909090(%eax),%edx,%edx
 cmovg  0x90909090(%eax),%edx,%edx
+movbe  %ax,%ax
+movbe  %r8,%r8
+movbe  %r16,%r16
 adcx   %ebx,%eax,%eax
 adcx   %eax,%ebx,%eax
 adcx   %rbx,%rax,%rax
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -210,6 +210,9 @@  mov, 0xf24, i386&No64, D|RegMem|IgnoreSi
 // Move after swapping the bytes
 movbe, 0x0f38f0, Movbe, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 movbe, 0x60, Movbe&APX_F, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4, { Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
+// This needs to live here for easy EVEX -> REX2 conversion, which wants to
+// restart with the next sequential template.
+bswap, 0xfc8, i486, No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64 }
 
 // Move with sign extend.
 movsb, 0xfbe, i386, Modrm|No_bSuf|No_sSuf, { Reg8|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
@@ -980,9 +983,8 @@  rex.wrxb, 0x4f, x64, NoSuf|IsPrefix, {}
 
 {<pseudopfx>}, PSEUDO_PREFIX/Prefix_<pseudopfx:ident>, <pseudopfx:cpu>, NoSuf|IsPrefix, {}
 
-// 486 extensions.
+// 486 extensions (BSWAP moved elsewhere).
 
-bswap, 0xfc8, i486, No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64 }
 xadd, 0xfc0, i486, W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
 cmpxchg, 0xfb0, i486, W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
 invd, 0xf08, i486, NoSuf, {}