Generating all-ones zmm needs dep-breaking pxor before ternlog (PR target/110438)

Message ID 8f73371d732237ed54ede44b7bd88624@ispras.ru
State Accepted
Headers
Series Generating all-ones zmm needs dep-breaking pxor before ternlog (PR target/110438) |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Li, Pan2 via Gcc-patches July 4, 2023, 6:25 p.m. UTC
  PR target/110438 requests to emit PXOR before VPTERNLOG. This patch 
implements that.
  

Comments

Hongtao Liu July 5, 2023, 1:26 a.m. UTC | #1
On Wed, Jul 5, 2023 at 2:25 AM simonaytes.yan--- via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> PR target/110438 requests to emit PXOR before VPTERNLOG. This patch
> implements that.

I prefer using UNSPEC_INSN_FALSE_DEP like we did for lzcnt/tzcnt/popcnt.

i.e.
18866; False dependency happens when destination is only updated by tzcnt,
18867; lzcnt or popcnt.  There is no false dependency when destination is
18868; also used in source.
18869(define_insn "*popcountsi2_zext_falsedep"
18870  [(set (match_operand:DI 0 "register_operand" "=r")
18871        (and:DI
18872          (subreg:DI
18873            (popcount:SI
18874              (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
18875          (const_int 63)))
18876   (unspec [(match_operand:DI 2 "register_operand" "0")]
18877           UNSPEC_INSN_FALSE_DEP)
18878   (clobber (reg:CC FLAGS_REG))]
18879  "TARGET_POPCNT && TARGET_64BIT"


BTW, I also posted a patch for this issue at
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/623523.html
  

Patch

From 815779936d0ca213b4c9ec798ed6acf8179fc2e7 Mon Sep 17 00:00:00 2001
From: Yan Simonaytes <simonaytes.yan@ispras.ru>
Date: Tue, 4 Jul 2023 21:11:04 +0300
Subject: [PATCH] Generating all-ones zmm needs dep-breaking pxor before
 ternlog

	PR target/110438

gcc/ChangeLog:

        * config/i386/i386.cc (standard_sse_constant_opcode): Emit PXOR before VPTERNLOG.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/pr110438-1.c: New test.
        * gcc.target/i386/pr110438-2.c: New test.
        * gcc.target/i386/pr110438-3.c: New test.
---
 gcc/config/i386/i386.cc                    | 23 +++++++++++++++++++---
 gcc/testsuite/gcc.target/i386/pr110438-1.c | 12 +++++++++++
 gcc/testsuite/gcc.target/i386/pr110438-2.c | 12 +++++++++++
 gcc/testsuite/gcc.target/i386/pr110438-3.c | 12 +++++++++++
 4 files changed, 56 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110438-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110438-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110438-3.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 8989985700a..89e0072caa1 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -5329,6 +5329,13 @@  standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 	case MODE_V8DF:
 	case MODE_V16SF:
 	  gcc_assert (TARGET_AVX512F);
+	  if (optimize_insn_for_speed_p ())
+	    {
+	      if (TARGET_AVX512VL)
+		output_asm_insn ("vpxor\t%x0, %x0, %x0", operands);
+	      else
+		output_asm_insn ("vpxor\t%g0, %g0, %g0", operands);
+	    }
 	  return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
 
 	case MODE_OI:
@@ -5344,10 +5351,20 @@  standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 	    return (TARGET_AVX
 		    ? "vpcmpeqd\t%0, %0, %0"
 		    : "pcmpeqd\t%0, %0");
-	  else if (TARGET_AVX512VL)
-	    return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
 	  else
-	    return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
+	    { 
+	      if (optimize_insn_for_speed_p ())
+		{
+		  if (TARGET_AVX512VL)
+		    output_asm_insn ("vpxor\t%x0, %x0, %x0", operands);
+		  else
+		    output_asm_insn ("vpxor\t%g0, %g0, %g0", operands);
+		}
+	      if (TARGET_AVX512VL)
+		return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
+	      else
+		return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
+	    }
 
 	default:
 	  gcc_unreachable ();
diff --git a/gcc/testsuite/gcc.target/i386/pr110438-1.c b/gcc/testsuite/gcc.target/i386/pr110438-1.c
new file mode 100644
index 00000000000..0c5f4470e9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110438-1.c
@@ -0,0 +1,12 @@ 
+/*  PR target/110438 generating all-ones zmm needs dep-breaking pxor before ternlog */
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+
+typedef int v64 __attribute__((vector_size(64)));
+
+v64 g(void)
+{
+	return (v64){0} - 1;
+}
+
+/* { dg-final { scan-assembler "vpxor\t%z" } }*/
diff --git a/gcc/testsuite/gcc.target/i386/pr110438-2.c b/gcc/testsuite/gcc.target/i386/pr110438-2.c
new file mode 100644
index 00000000000..14770a972e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110438-2.c
@@ -0,0 +1,12 @@ 
+/*  PR target/110438 generating all-ones zmm needs dep-breaking pxor before ternlog */
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+
+typedef int v64 __attribute__((vector_size(64)));
+
+v64 g(void)
+{
+	return (v64){0} - 1;
+}
+
+/* { dg-final { scan-assembler "vpxor\t%x" } }*/
diff --git a/gcc/testsuite/gcc.target/i386/pr110438-3.c b/gcc/testsuite/gcc.target/i386/pr110438-3.c
new file mode 100644
index 00000000000..fb07997839c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110438-3.c
@@ -0,0 +1,12 @@ 
+/*  PR target/110438 generating all-ones zmm needs dep-breaking pxor before ternlog */
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -Os" } */
+
+typedef int v64 __attribute__((vector_size(64)));
+
+v64 g(void)
+{
+	return (v64){0} - 1;
+}
+
+/* { dg-final { scan-assembler-not "vpxor" } }*/
-- 
2.34.1