[x86] Improve ix86_expand_int_movcc to allow condition (mask) sharing.

Message ID 00c101d91e98$0fc74480$2f55cd80$@nextmovesoftware.com
State Accepted
Headers
Series [x86] Improve ix86_expand_int_movcc to allow condition (mask) sharing. |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Roger Sayle Jan. 2, 2023, 10:50 a.m. UTC
  This patch modifies the way that ix86_expand_int_movcc generates RTL,
to allow the condition mask to be shared/reused between multiple
conditional move sequences.  Such redundancy is common when RTL
if-conversion transforms non-trivial basic blocks.

As a motivating example, consider the new test case:

int a, b, c, d;
int foo(int x)
{
    if (x == 0) {
        a = 3;
        b = 1;
        c = 4;
        d = 1;
    } else {
        a = 5;
        b = 9;
        c = 2;
        d = 7;
    }
    return x;
}

This is currently compiled, with -O2, to:

foo:    cmpl    $1, %edi
        movl    %edi, %eax
        sbbl    %edi, %edi
        andl    $-2, %edi
        addl    $5, %edi
        cmpl    $1, %eax
        sbbl    %esi, %esi
        movl    %edi, a(%rip)
        andl    $-8, %esi
        addl    $9, %esi
        cmpl    $1, %eax
        sbbl    %ecx, %ecx
        movl    %esi, b(%rip)
        andl    $2, %ecx
        addl    $2, %ecx
        cmpl    $1, %eax
        sbbl    %edx, %edx
        movl    %ecx, c(%rip)
        andl    $-6, %edx
        addl    $7, %edx
        movl    %edx, d(%rip)
        ret

Notice that the if-then-else blocks have been if-converted into four
conditional move sequences/assignments, each consisting of cmpl, sbbl,
andl and addl.  However, as the conditions are the same, the cmpl and
sbbl instructions used to generate the mask could be shared by CSE.

This patch enables that, so we now generate:

foo:    cmpl    $1, %edi
        movl    %edi, %eax
        sbbl    %edx, %edx
        movl    %edx, %edi
        movl    %edx, %esi
        movl    %edx, %ecx
        andl    $-6, %edx
        andl    $-2, %edi
        andl    $-8, %esi
        andl    $2, %ecx
        addl    $7, %edx
        addl    $5, %edi
        addl    $9, %esi
        addl    $2, %ecx
        movl    %edx, d(%rip)
        movl    %edi, a(%rip)
        movl    %esi, b(%rip)
        movl    %ecx, c(%rip)
        ret

Notice, the code now contains only a single cmpl and a single sbbl,
with their result being shared (via movl).

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32},
with no new failures.  Ok for mainline?


2023-01-02  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
	* config/i386/i386-expand.cc (ix86_expand_int_movcc): Rewrite
	RTL expansion to allow condition (mask) to be shared/reused,
	by avoiding overwriting pseudos and adding REG_EQUAL notes.

gcc/testsuite/ChangeLog
	* gcc.target/i386/cmov10.c: New test case.


Thanks in advance,
Roger
--
  

Comments

Uros Bizjak Jan. 3, 2023, 9:14 a.m. UTC | #1
On Mon, Jan 2, 2023 at 11:50 AM Roger Sayle <roger@nextmovesoftware.com> wrote:
>
>
> This patch modifies the way that ix86_expand_int_movcc generates RTL,
> to allow the condition mask to be shared/reused between multiple
> conditional move sequences.  Such redundancy is common when RTL
> if-conversion transforms non-trivial basic blocks.
>
> As a motivating example, consider the new test case:
>
> int a, b, c, d;
> int foo(int x)
> {
>     if (x == 0) {
>         a = 3;
>         b = 1;
>         c = 4;
>         d = 1;
>     } else {
>         a = 5;
>         b = 9;
>         c = 2;
>         d = 7;
>     }
>     return x;
> }
>
> This is currently compiled, with -O2, to:
>
> foo:    cmpl    $1, %edi
>         movl    %edi, %eax
>         sbbl    %edi, %edi
>         andl    $-2, %edi
>         addl    $5, %edi
>         cmpl    $1, %eax
>         sbbl    %esi, %esi
>         movl    %edi, a(%rip)
>         andl    $-8, %esi
>         addl    $9, %esi
>         cmpl    $1, %eax
>         sbbl    %ecx, %ecx
>         movl    %esi, b(%rip)
>         andl    $2, %ecx
>         addl    $2, %ecx
>         cmpl    $1, %eax
>         sbbl    %edx, %edx
>         movl    %ecx, c(%rip)
>         andl    $-6, %edx
>         addl    $7, %edx
>         movl    %edx, d(%rip)
>         ret
>
> Notice that the if-then-else blocks have been if-converted into four
> conditional move sequences/assignments, each consisting of cmpl, sbbl,
> andl and addl.  However, as the conditions are the same, the cmpl and
> sbbl instructions used to generate the mask could be shared by CSE.
>
> This patch enables that, so we now generate:
>
> foo:    cmpl    $1, %edi
>         movl    %edi, %eax
>         sbbl    %edx, %edx
>         movl    %edx, %edi
>         movl    %edx, %esi
>         movl    %edx, %ecx
>         andl    $-6, %edx
>         andl    $-2, %edi
>         andl    $-8, %esi
>         andl    $2, %ecx
>         addl    $7, %edx
>         addl    $5, %edi
>         addl    $9, %esi
>         addl    $2, %ecx
>         movl    %edx, d(%rip)
>         movl    %edi, a(%rip)
>         movl    %esi, b(%rip)
>         movl    %ecx, c(%rip)
>         ret
>
> Notice, the code now contains only a single cmpl and a single sbbl,
> with their result being shared (via movl).
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32},
> with no new failures.  Ok for mainline?
>
>
> 2023-01-02  Roger Sayle  <roger@nextmovesoftware.com>
>
> gcc/ChangeLog
>         * config/i386/i386-expand.cc (ix86_expand_int_movcc): Rewrite
>         RTL expansion to allow condition (mask) to be shared/reused,
>         by avoiding overwriting pseudos and adding REG_EQUAL notes.
>
> gcc/testsuite/ChangeLog
>         * gcc.target/i386/cmov10.c: New test case.

OK.

Thanks,
Uros.

>
>
> Thanks in advance,
> Roger
> --
>
  

Patch

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 3eddbc9..4fd7c3c 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -3284,8 +3284,8 @@  ix86_expand_int_movcc (rtx operands[])
 	  || negate_cc_compare_p
 	  || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
 	{
-	  /* Detect overlap between destination and compare sources.  */
-	  rtx tmp = out;
+	  /* Place comparison result in its own pseudo.  */
+	  rtx tmp = gen_reg_rtx (mode);
 
 	  if (negate_cc_compare_p)
 	    {
@@ -3295,7 +3295,6 @@  ix86_expand_int_movcc (rtx operands[])
 		emit_insn (gen_x86_negsi_ccc (gen_reg_rtx (SImode),
 					      gen_lowpart (SImode, op0)));
 
-	      tmp = gen_reg_rtx (mode);
 	      if (mode == DImode)
 		emit_insn (gen_x86_movdicc_0_m1_neg (tmp));
 	      else
@@ -3337,9 +3336,6 @@  ix86_expand_int_movcc (rtx operands[])
 		}
 	      diff = ct - cf;
 
-	      if (reg_overlap_mentioned_p (out, compare_op))
-		tmp = gen_reg_rtx (mode);
-
 	      if (mode == DImode)
 		emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
 	      else
@@ -3358,6 +3354,11 @@  ix86_expand_int_movcc (rtx operands[])
 	      tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
 	    }
 
+	  /* Add a REG_EQUAL note to allow condition to be shared.  */
+	  rtx note = gen_rtx_fmt_ee (code, mode, op0, op1);
+	  set_unique_reg_note (get_last_insn (), REG_EQUAL,
+			       gen_rtx_NEG (mode, note));
+
 	  if (diff == 1)
 	    {
 	      /*
@@ -3368,9 +3369,8 @@  ix86_expand_int_movcc (rtx operands[])
 	       * Size 5 - 8.
 	       */
 	      if (ct)
-		tmp = expand_simple_binop (mode, PLUS,
-					   tmp, GEN_INT (ct),
-					   copy_rtx (tmp), 1, OPTAB_DIRECT);
+		tmp = expand_simple_binop (mode, PLUS, tmp, GEN_INT (ct),
+					   NULL_RTX, 1, OPTAB_DIRECT);
 	    }
 	  else if (cf == -1)
 	    {
@@ -3381,9 +3381,8 @@  ix86_expand_int_movcc (rtx operands[])
 	       *
 	       * Size 8.
 	       */
-	      tmp = expand_simple_binop (mode, IOR,
-					 tmp, GEN_INT (ct),
-					 copy_rtx (tmp), 1, OPTAB_DIRECT);
+	      tmp = expand_simple_binop (mode, IOR, tmp, GEN_INT (ct),
+					 NULL_RTX, 1, OPTAB_DIRECT);
 	    }
 	  else if (diff == -1 && ct)
 	    {
@@ -3395,11 +3394,10 @@  ix86_expand_int_movcc (rtx operands[])
 	       *
 	       * Size 8 - 11.
 	       */
-	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
+	      tmp = expand_simple_unop (mode, NOT, tmp, NULL_RTX, 1);
 	      if (cf)
-		tmp = expand_simple_binop (mode, PLUS,
-					   copy_rtx (tmp), GEN_INT (cf),
-					   copy_rtx (tmp), 1, OPTAB_DIRECT);
+		tmp = expand_simple_binop (mode, PLUS, tmp, GEN_INT (cf),
+					   NULL_RTX, 1, OPTAB_DIRECT);
 	    }
 	  else
 	    {
@@ -3417,22 +3415,18 @@  ix86_expand_int_movcc (rtx operands[])
 		{
 		  cf = ct;
 		  ct = 0;
-		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
+		  tmp = expand_simple_unop (mode, NOT, tmp, NULL_RTX, 1);
 		}
 
-	      tmp = expand_simple_binop (mode, AND,
-					 copy_rtx (tmp),
+	      tmp = expand_simple_binop (mode, AND, tmp,
 					 gen_int_mode (cf - ct, mode),
-					 copy_rtx (tmp), 1, OPTAB_DIRECT);
+					 NULL_RTX, 1, OPTAB_DIRECT);
 	      if (ct)
-		tmp = expand_simple_binop (mode, PLUS,
-					   copy_rtx (tmp), GEN_INT (ct),
-					   copy_rtx (tmp), 1, OPTAB_DIRECT);
+		tmp = expand_simple_binop (mode, PLUS, tmp, GEN_INT (ct),
+					   NULL_RTX, 1, OPTAB_DIRECT);
 	    }
 
-	  if (!rtx_equal_p (tmp, out))
-	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
-
+	  emit_move_insn (out, tmp);
 	  return true;
 	}
 
diff --git a/gcc/testsuite/gcc.target/i386/cmov10.c b/gcc/testsuite/gcc.target/i386/cmov10.c
new file mode 100644
index 0000000..142b4d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/cmov10.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+int a, b, c, d;
+
+int foo(int x)
+{
+    if (x == 0) {
+        a = 3;
+        b = 1;
+        c = 4;
+        d = 1;
+    } else {
+        a = 5;
+        b = 9;
+        c = 2;
+        d = 7;
+    }
+    return x;
+}
+/* { dg-final { scan-assembler-times "cmpl" 1 } } */
+/* { dg-final { scan-assembler-times "sbbl" 1 } } */