[x86_64] Allow any immediate constant in *cmp<dwi>_doubleword splitter.

Message ID 014501d8a8cd$43413aa0$c9c3afe0$@nextmovesoftware.com
State New, archived
Headers
Series [x86_64] Allow any immediate constant in *cmp<dwi>_doubleword splitter. |

Commit Message

Roger Sayle Aug. 5, 2022, 1:14 p.m. UTC
  This patch tweaks i386.md's *cmp<dwi>_doubleword splitter's predicate to
allow general_operand, not just x86_64_hilo_general_operand, to improve
code generation.  As a general rule, i386.md's _doubleword splitters should
be post-reload splitters that require integer immediate operands to be
x86_64_hilo_int_operand, so that each part is a valid word mode immediate
constant.  As an exception to this rule, doubleword patterns that must be
split before reload, because they require additional scratch registers,
can take advantage of this ability to create new pseudos, to accept
any immediate constant, and call force_reg on the high and/or low parts
if they are not suitable immediate operands in word mode.

The benefit is shown in the new cmpti3.c test case below.

__int128 x;
int foo()
{
    __int128 t = 0x1234567890abcdefLL;
    return x == t;
}

where GCC with -O2 currently generates:

        movabsq $1311768467294899695, %rax
        xorl    %edx, %edx
        xorq    x(%rip), %rax
        xorq    x+8(%rip), %rdx
        orq     %rdx, %rax
        sete    %al
        movzbl  %al, %eax
        ret

but with this patch now generates:

        movabsq $1311768467294899695, %rax
        xorq    x(%rip), %rax
        orq     x+8(%rip), %rax
        sete    %al
        movzbl  %al, %eax
        ret

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32},
with no new failures.  The first two new test cases aren't affected
by this patch, but as I had them in my directory, it seemed reasonable to
increase the testsuite's coverage of TImode comparison code generation.
Ok for mainline?

2022-08-05  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
        * config/i386/i386.md (*cmp<dwi>_doubleword): Change predicate
        for x86_64_hilo_general_operand to general operand.  Call
        force_reg on parts that are not x86_64_immediate_operand.

gcc/testsuite/ChangeLog
        * gcc.target/i386/cmpti1.c: New test case.
        * gcc.target/i386/cmpti2.c: Likewise.
        * gcc.target/i386/cmpti3.c: Likewise.

Thanks in advance,
Roger
--
  

Comments

Uros Bizjak Aug. 6, 2022, 9:51 a.m. UTC | #1
On Fri, Aug 5, 2022 at 3:14 PM Roger Sayle <roger@nextmovesoftware.com> wrote:
>
>
> This patch tweaks i386.md's *cmp<dwi>_doubleword splitter's predicate to
> allow general_operand, not just x86_64_hilo_general_operand, to improve
> code generation.  As a general rule, i386.md's _doubleword splitters should
> be post-reload splitters that require integer immediate operands to be
> x86_64_hilo_int_operand, so that each part is a valid word mode immediate
> constant.  As an exception to this rule, doubleword patterns that must be
> split before reload, because they require additional scratch registers,
> can take advantage of this ability to create new pseudos, to accept
> any immediate constant, and call force_reg on the high and/or low parts
> if they are not suitable immediate operands in word mode.
>
> The benefit is shown in the new cmpti3.c test case below.
>
> __int128 x;
> int foo()
> {
>     __int128 t = 0x1234567890abcdefLL;
>     return x == t;
> }
>
> where GCC with -O2 currently generates:
>
>         movabsq $1311768467294899695, %rax
>         xorl    %edx, %edx
>         xorq    x(%rip), %rax
>         xorq    x+8(%rip), %rdx
>         orq     %rdx, %rax
>         sete    %al
>         movzbl  %al, %eax
>         ret
>
> but with this patch now generates:
>
>         movabsq $1311768467294899695, %rax
>         xorq    x(%rip), %rax
>         orq     x+8(%rip), %rax
>         sete    %al
>         movzbl  %al, %eax
>         ret
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32},
> with no new failures.  The first two new test cases aren't affected
> by this patch, but as I had them in my directory, it seemed reasonable to
> increase the testsuite's coverage of TImode comparison code generation.
> Ok for mainline?
>
> 2022-08-05  Roger Sayle  <roger@nextmovesoftware.com>
>
> gcc/ChangeLog
>         * config/i386/i386.md (*cmp<dwi>_doubleword): Change predicate
>         for x86_64_hilo_general_operand to general operand.  Call
>         force_reg on parts that are not x86_64_immediate_operand.
>
> gcc/testsuite/ChangeLog
>         * gcc.target/i386/cmpti1.c: New test case.
>         * gcc.target/i386/cmpti2.c: Likewise.
>         * gcc.target/i386/cmpti3.c: Likewise.

OK.

Thanks,
Uros.
  

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 298e4b3..fd30c57 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1510,7 +1510,7 @@ 
 (define_insn_and_split "*cmp<dwi>_doubleword"
   [(set (reg:CCZ FLAGS_REG)
 	(compare:CCZ (match_operand:<DWI> 0 "nonimmediate_operand")
-		     (match_operand:<DWI> 1 "x86_64_hilo_general_operand")))]
+		     (match_operand:<DWI> 1 "general_operand")))]
   "ix86_pre_reload_split ()"
   "#"
   "&& 1"
@@ -1544,7 +1544,12 @@ 
   else if (operands[0] == constm1_rtx)
     emit_insn (gen_one_cmpl<mode>2 (operands[4], operands[1]));
   else
-    emit_insn (gen_xor<mode>3 (operands[4], operands[0], operands[1]));
+    {
+      if (CONST_SCALAR_INT_P (operands[1])
+	  && !x86_64_immediate_operand (operands[1], <MODE>mode))
+	operands[1] = force_reg (<MODE>mode, operands[1]);
+      emit_insn (gen_xor<mode>3 (operands[4], operands[0], operands[1]));
+    }
 
   if (operands[3] == const0_rtx)
     operands[5] = operands[2];
@@ -1558,7 +1563,12 @@ 
       else if (operands[2] == constm1_rtx)
 	emit_insn (gen_one_cmpl<mode>2 (operands[5], operands[3]));
       else
-	emit_insn (gen_xor<mode>3 (operands[5], operands[2], operands[3]));
+	{
+	  if (CONST_SCALAR_INT_P (operands[3])
+	      && !x86_64_immediate_operand (operands[3], <MODE>mode))
+	    operands[3] = force_reg (<MODE>mode, operands[3]);
+	  emit_insn (gen_xor<mode>3 (operands[5], operands[2], operands[3]));
+	}
     }
 })
 
diff --git a/gcc/testsuite/gcc.target/i386/cmpti1.c b/gcc/testsuite/gcc.target/i386/cmpti1.c
new file mode 100644
index 0000000..1c5f121
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/cmpti1.c
@@ -0,0 +1,8 @@ 
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2" } */
+int eq(__int128 x, __int128 y) { return x == y; }
+int ne(__int128 x, __int128 y) { return x != y; }
+/* { dg-final { scan-assembler-times "xorq" 4 } } */
+/* { dg-final { scan-assembler-times "setne" 1 } } */
+/* { dg-final { scan-assembler-times "sete" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/i386/cmpti2.c b/gcc/testsuite/gcc.target/i386/cmpti2.c
new file mode 100644
index 0000000..ad95729
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/cmpti2.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2" } */
+
+__int128 x;
+__int128 y;
+
+int eq() { return x == y; }
+int ne() { return x != y; }
+
+/* { dg-final { scan-assembler-times "xorq" 4 } } */
+/* { dg-final { scan-assembler-times "setne" 1 } } */
+/* { dg-final { scan-assembler-times "sete" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/cmpti3.c b/gcc/testsuite/gcc.target/i386/cmpti3.c
new file mode 100644
index 0000000..302efd2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/cmpti3.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2" } */
+
+__int128 x;
+int foo()
+{
+    __int128 t = 0x1234567890abcdefLL;
+    return x == t;
+}
+
+/* { dg-final { scan-assembler-times "movabsq" 1 } } */
+/* { dg-final { scan-assembler-times "xorq" 1 } } */
+/* { dg-final { scan-assembler-not "xorl" } } */