amdgcn: Add instruction pattern for conditional shift operations

Message ID b1efd091-471f-ed79-ad14-64946f2e5565@codesourcery.com
State Accepted
Headers
Series amdgcn: Add instruction pattern for conditional shift operations |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Paul-Antoine Arras Feb. 1, 2023, 3:35 p.m. UTC
  This patch introduces an instruction pattern for conditional shift 
operations (cond_{ashl|ashr|lshr}) in the GCN machine description.
Tested on GCN3 Fiji gfx803.

OK to commit?
  

Comments

Andrew Stubbs Feb. 2, 2023, 11:27 a.m. UTC | #1
On 01/02/2023 15:35, Paul-Antoine Arras wrote:
> This patch introduces an instruction pattern for conditional shift 
> operations (cond_{ashl|ashr|lshr}) in the GCN machine description.
> Tested on GCN3 Fiji gfx803.
> 
> OK to commit?

The changelog will need to be wrapped to 80 columns.

OK otherwise.

Andrew
  

Patch

From e9c974670e8d37f725098eea97e22be5e2e9fd21 Mon Sep 17 00:00:00 2001
From: Paul-Antoine Arras <pa@codesourcery.com>
Date: Wed, 1 Feb 2023 16:13:23 +0100
Subject: [PATCH] amdgcn: Add instruction pattern for conditional shift
 operations

gcc/ChangeLog:

	* config/gcn/gcn-valu.md (cond_<expander><mode>): Add cond_{ashl|ashr|lshr}

gcc/testsuite/ChangeLog:

	* gcc.target/gcn/cond_shift_3.c: New test.
	* gcc.target/gcn/cond_shift_3_run.c: New test.
	* gcc.target/gcn/cond_shift_4.c: New test.
	* gcc.target/gcn/cond_shift_4_run.c: New test.
	* gcc.target/gcn/cond_shift_8.c: New test.
	* gcc.target/gcn/cond_shift_8_run.c: New test.
	* gcc.target/gcn/cond_shift_9.c: New test.
	* gcc.target/gcn/cond_shift_9_run.c: New test.
---
 gcc/config/gcn/gcn-valu.md                    | 23 +++++++++++
 gcc/testsuite/gcc.target/gcn/cond_shift_3.c   | 37 ++++++++++++++++++
 .../gcc.target/gcn/cond_shift_3_run.c         | 27 +++++++++++++
 gcc/testsuite/gcc.target/gcn/cond_shift_4.c   | 38 +++++++++++++++++++
 .../gcc.target/gcn/cond_shift_4_run.c         | 27 +++++++++++++
 gcc/testsuite/gcc.target/gcn/cond_shift_8.c   | 35 +++++++++++++++++
 .../gcc.target/gcn/cond_shift_8_run.c         | 28 ++++++++++++++
 gcc/testsuite/gcc.target/gcn/cond_shift_9.c   | 36 ++++++++++++++++++
 .../gcc.target/gcn/cond_shift_9_run.c         | 28 ++++++++++++++
 9 files changed, 279 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/gcn/cond_shift_3.c
 create mode 100644 gcc/testsuite/gcc.target/gcn/cond_shift_3_run.c
 create mode 100644 gcc/testsuite/gcc.target/gcn/cond_shift_4.c
 create mode 100644 gcc/testsuite/gcc.target/gcn/cond_shift_4_run.c
 create mode 100644 gcc/testsuite/gcc.target/gcn/cond_shift_8.c
 create mode 100644 gcc/testsuite/gcc.target/gcn/cond_shift_8_run.c
 create mode 100644 gcc/testsuite/gcc.target/gcn/cond_shift_9.c
 create mode 100644 gcc/testsuite/gcc.target/gcn/cond_shift_9_run.c

diff --git gcc/config/gcn/gcn-valu.md gcc/config/gcn/gcn-valu.md
index 44b04c222f7..47d9d87d58a 100644
--- gcc/config/gcn/gcn-valu.md
+++ gcc/config/gcn/gcn-valu.md
@@ -3489,6 +3489,29 @@  (define_expand "cond_<expander><mode>"
     DONE;
   })
 
+(define_code_iterator cond_shiftop [ashift lshiftrt ashiftrt])
+
+(define_expand "cond_<expander><mode>"
+  [(match_operand:V_INT_noHI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (cond_shiftop:V_INT_noHI
+     (match_operand:V_INT_noHI 2 "gcn_alu_operand")
+     (match_operand:V_INT_noHI 3 "gcn_alu_operand"))
+   (match_operand:V_INT_noHI 4 "register_operand")]
+  ""
+  {
+    operands[1] = force_reg (DImode, operands[1]);
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+
+    rtx shiftby = gen_reg_rtx (<VnSI>mode);
+    convert_move (shiftby, operands[3], 0);
+
+    emit_insn (gen_v<expander><mode>3_exec (operands[0], operands[2],
+                                            shiftby, operands[4],
+                                            operands[1]));
+    DONE;
+  })
+
 ;; }}}
 ;; {{{ Vector reductions
 
diff --git gcc/testsuite/gcc.target/gcn/cond_shift_3.c gcc/testsuite/gcc.target/gcn/cond_shift_3.c
new file mode 100644
index 00000000000..983386c1464
--- /dev/null
+++ gcc/testsuite/gcc.target/gcn/cond_shift_3.c
@@ -0,0 +1,37 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -dp" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP)                                               \
+  void __attribute__ ((noipa))                                                 \
+  test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a,                \
+			TYPE *__restrict b, int n)                             \
+  {                                                                            \
+    for (int i = 0; i < n; ++i)                                                \
+      r[i] = a[i] > 20 ? b[i] OP 3 : 72;                                       \
+  }
+
+#define TEST_TYPE(T, TYPE)                                                     \
+  T (TYPE, shl, <<)                                                            \
+  T (TYPE, shr, >>)
+
+#define TEST_ALL(T)                                                            \
+  TEST_TYPE (T, int32_t)                                                       \
+  TEST_TYPE (T, uint32_t)                                                      \
+  TEST_TYPE (T, int64_t)                                                       \
+  TEST_TYPE (T, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tv_lshlrev_b32\tv[0-9]+, 3, v[0-9]+} 10 } } */
+/* { dg-final { scan-assembler-times {\tv_ashrrev_i32\tv[0-9]+, 3, v[0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {vashlv64si3_exec} 18 } } */
+/* { dg-final { scan-assembler-times {vashrv64si3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vashlv64di3_exec} 2 } } */
+/* { dg-final { scan-assembler-times {vashrv64di3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vlshrv64si3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vlshrv64di3_exec} 1 } } */
+
+/* { dg-final { scan-assembler-not {v_cndmask_b32} } } */
+/* { dg-final { scan-assembler-not {movv64di_exec/2} } } */
diff --git gcc/testsuite/gcc.target/gcn/cond_shift_3_run.c gcc/testsuite/gcc.target/gcn/cond_shift_3_run.c
new file mode 100644
index 00000000000..8f89918e8ac
--- /dev/null
+++ gcc/testsuite/gcc.target/gcn/cond_shift_3_run.c
@@ -0,0 +1,27 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_3.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP)				\
+  {								\
+    TYPE r[N], a[N], b[N];					\
+    for (int i = 0; i < N; ++i)					\
+      {								\
+	a[i] = (i & 1 ? i : 3 * i);				\
+	b[i] = (i >> 4) << (i & 15);				\
+	asm volatile ("" ::: "memory");				\
+      }								\
+    test_##TYPE##_##NAME (r, a, b, N);				\
+    for (int i = 0; i < N; ++i)					\
+      if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : 72))		\
+	__builtin_abort ();					\
+  }
+
+int main ()
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}
diff --git gcc/testsuite/gcc.target/gcn/cond_shift_4.c gcc/testsuite/gcc.target/gcn/cond_shift_4.c
new file mode 100644
index 00000000000..c610363d9df
--- /dev/null
+++ gcc/testsuite/gcc.target/gcn/cond_shift_4.c
@@ -0,0 +1,38 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -dp" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP)					\
+  void __attribute__ ((noipa))						\
+  test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a,		\
+			TYPE *__restrict b, int n)			\
+  {									\
+    for (int i = 0; i < n; ++i)						\
+      r[i] = a[i] > 20 ? b[i] OP 3 : 0;					\
+  }
+
+#define TEST_TYPE(T, TYPE) \
+  T (TYPE, shl, <<) \
+  T (TYPE, shr, >>)
+
+#define TEST_ALL(T) \
+  TEST_TYPE (T, int32_t) \
+  TEST_TYPE (T, uint32_t) \
+  TEST_TYPE (T, int64_t) \
+  TEST_TYPE (T, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tv_lshlrev_b32\tv[0-9]+, 3, v[0-9]+} 10 } } */
+/* { dg-final { scan-assembler-times {\tv_ashrrev_i32\tv[0-9]+, 3, v[0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {vashlv64si3_exec} 18 } } */
+/* { dg-final { scan-assembler-times {vashrv64si3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vashlv64di3_exec} 2 } } */
+/* { dg-final { scan-assembler-times {vashrv64di3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vlshrv64si3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vlshrv64di3_exec} 1 } } */
+
+/* { dg-final { scan-assembler-not {v_cndmask_b32} } } */
+/* { dg-final { scan-assembler-not {movv64di_exec/2} } } */
+
diff --git gcc/testsuite/gcc.target/gcn/cond_shift_4_run.c gcc/testsuite/gcc.target/gcn/cond_shift_4_run.c
new file mode 100644
index 00000000000..6017d68e820
--- /dev/null
+++ gcc/testsuite/gcc.target/gcn/cond_shift_4_run.c
@@ -0,0 +1,27 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_4.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP)				\
+  {								\
+    TYPE r[N], a[N], b[N];					\
+    for (int i = 0; i < N; ++i)					\
+      {								\
+	a[i] = (i & 1 ? i : 3 * i);				\
+	b[i] = (i >> 4) << (i & 15);				\
+	asm volatile ("" ::: "memory");				\
+      }								\
+    test_##TYPE##_##NAME (r, a, b, N);				\
+    for (int i = 0; i < N; ++i)					\
+      if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : 0))		\
+	__builtin_abort ();					\
+  }
+
+int main ()
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}
diff --git gcc/testsuite/gcc.target/gcn/cond_shift_8.c gcc/testsuite/gcc.target/gcn/cond_shift_8.c
new file mode 100644
index 00000000000..0749e2e5e53
--- /dev/null
+++ gcc/testsuite/gcc.target/gcn/cond_shift_8.c
@@ -0,0 +1,35 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -dp" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP)					\
+  void __attribute__ ((noipa))						\
+  test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a,		\
+			TYPE *__restrict b, TYPE *__restrict c, int n)	\
+  {									\
+    for (int i = 0; i < n; ++i)						\
+      r[i] = a[i] > 20 ? b[i] OP c[i] : 91;				\
+  }
+
+#define TEST_TYPE(T, TYPE) \
+  T (TYPE, shl, <<) \
+  T (TYPE, shr, >>)
+
+#define TEST_ALL(T) \
+  TEST_TYPE (T, int32_t) \
+  TEST_TYPE (T, uint32_t) \
+  TEST_TYPE (T, int64_t) \
+  TEST_TYPE (T, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {vashlv64si3_exec} 18 } } */
+/* { dg-final { scan-assembler-times {vashrv64si3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vashlv64di3_exec} 2 } } */
+/* { dg-final { scan-assembler-times {vashrv64di3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vlshrv64si3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vlshrv64di3_exec} 1 } } */
+
+/* { dg-final { scan-assembler-not {movv64si_exec/0} } } */
+/* { dg-final { scan-assembler-not {movv64di_exec/0} } } */
diff --git gcc/testsuite/gcc.target/gcn/cond_shift_8_run.c gcc/testsuite/gcc.target/gcn/cond_shift_8_run.c
new file mode 100644
index 00000000000..13da0197569
--- /dev/null
+++ gcc/testsuite/gcc.target/gcn/cond_shift_8_run.c
@@ -0,0 +1,28 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_8.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP)				\
+  {								\
+    TYPE r[N], a[N], b[N], c[N];				\
+    for (int i = 0; i < N; ++i)					\
+      {								\
+	a[i] = (i & 1 ? i : 3 * i);				\
+	b[i] = (i >> 4) << (i & 15);				\
+	c[i] = ~i & 7;						\
+	asm volatile ("" ::: "memory");				\
+      }								\
+    test_##TYPE##_##NAME (r, a, b, c, N);			\
+    for (int i = 0; i < N; ++i)					\
+      if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : 91))	\
+	__builtin_abort ();					\
+  }
+
+int main ()
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}
diff --git gcc/testsuite/gcc.target/gcn/cond_shift_9.c gcc/testsuite/gcc.target/gcn/cond_shift_9.c
new file mode 100644
index 00000000000..61aba27504e
--- /dev/null
+++ gcc/testsuite/gcc.target/gcn/cond_shift_9.c
@@ -0,0 +1,36 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -dp" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP)					\
+  void __attribute__ ((noipa))						\
+  test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a,		\
+			TYPE *__restrict b, TYPE *__restrict c, int n)	\
+  {									\
+    for (int i = 0; i < n; ++i)						\
+      r[i] = a[i] > 20 ? b[i] OP c[i] : 0;				\
+  }
+
+#define TEST_TYPE(T, TYPE) \
+  T (TYPE, shl, <<) \
+  T (TYPE, shr, >>)
+
+#define TEST_ALL(T) \
+  TEST_TYPE (T, int32_t) \
+  TEST_TYPE (T, uint32_t) \
+  TEST_TYPE (T, int64_t) \
+  TEST_TYPE (T, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {vashlv64si3_exec} 18 } } */
+/* { dg-final { scan-assembler-times {vashrv64si3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vashlv64di3_exec} 2 } } */
+/* { dg-final { scan-assembler-times {vashrv64di3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vlshrv64si3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vlshrv64di3_exec} 1 } } */
+
+/* { dg-final { scan-assembler-not {v_cndmask_b32} } } */
+/* { dg-final { scan-assembler-not {movv64si_exec/2} } } */
+/* { dg-final { scan-assembler-not {movv64di_exec/1} } } */
diff --git gcc/testsuite/gcc.target/gcn/cond_shift_9_run.c gcc/testsuite/gcc.target/gcn/cond_shift_9_run.c
new file mode 100644
index 00000000000..de8e010bdab
--- /dev/null
+++ gcc/testsuite/gcc.target/gcn/cond_shift_9_run.c
@@ -0,0 +1,28 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_9.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP)				\
+  {								\
+    TYPE r[N], a[N], b[N], c[N];				\
+    for (int i = 0; i < N; ++i)					\
+      {								\
+	a[i] = (i & 1 ? i : 3 * i);				\
+	b[i] = (i >> 4) << (i & 15);				\
+	c[i] = ~i & 7;						\
+	asm volatile ("" ::: "memory");				\
+      }								\
+    test_##TYPE##_##NAME (r, a, b, c, N);			\
+    for (int i = 0; i < N; ++i)					\
+      if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : 0))	\
+	__builtin_abort ();					\
+  }
+
+int main ()
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}
-- 
2.39.1