rs6000: load high and low part of 128bit vector independently [PR110040]

Message ID 1a62a215-98e2-4e3b-9059-681189157d0a@linux.vnet.ibm.com
State Accepted
Headers
Series rs6000: load high and low part of 128bit vector independently [PR110040] |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

jeevitha Feb. 26, 2024, 5:43 a.m. UTC
  Hi All,

The following patch has been bootstrapped and regtested on powerpc64le-linux.

PR110040 exposes an issue concerning moves from vector registers to GPRs.
There are two moves, one for upper 64 bits and the other for the lower
64 bits.  In the problematic test case, we are only interested in storing
the lower 64 bits.  However, the instruction for copying the upper 64 bits
is still emitted and is dead code.  This patch adds a splitter that splits
apart the two move instructions so that DCE can remove the dead code after
splitting.

2024-02-26  Jeevitha Palanisamy  <jeevitha@linux.ibm.com>

gcc/
	PR target/110040
	* config/rs6000/vsx.md (split pattern for V1TI to DI move): Defined.

gcc/testsuite/
	PR target/110040
	* gcc.target/powerpc/pr110040-1.c: New testcase.
	* gcc.target/powerpc/pr110040-2.c: New testcase.
  

Patch

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 6111cc90eb7..78457f8fb14 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6706,3 +6706,19 @@ 
   "vmsumcud %0,%1,%2,%3"
   [(set_attr "type" "veccomplex")]
 )
+
+(define_split
+  [(set (match_operand:V1TI 0 "int_reg_operand")
+       (match_operand:V1TI 1 "vsx_register_operand"))]
+  "reload_completed
+   && TARGET_DIRECT_MOVE_64BIT"
+   [(pc)]
+{
+  rtx op0 = gen_rtx_REG (DImode, REGNO (operands[0]));
+  rtx op1 = gen_rtx_REG (V2DImode, REGNO (operands[1]));
+  rtx op2 = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
+  rtx op3 = gen_rtx_REG (V2DImode, REGNO (operands[1]));
+  emit_insn (gen_vsx_extract_v2di (op0, op1, GEN_INT (0)));
+  emit_insn (gen_vsx_extract_v2di (op2, op3, GEN_INT (1)));
+  DONE;
+})
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-1.c b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c
new file mode 100644
index 00000000000..fb3bd254636
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110040-1.c
@@ -0,0 +1,14 @@ 
+/* PR target/110040 */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
+/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
+
+#include <altivec.h>
+
+void
+foo (signed long *dst, vector signed __int128 src)
+{
+  *dst = (signed long) src[0];
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110040-2.c b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c
new file mode 100644
index 00000000000..f3aa22be4e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110040-2.c
@@ -0,0 +1,13 @@ 
+/* PR target/110040 */
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
+
+#include <altivec.h>
+
+void
+foo (signed int *dst, vector signed __int128 src)
+{
+  __builtin_vec_xst_trunc (src, 0, dst);
+}