From e9c974670e8d37f725098eea97e22be5e2e9fd21 Mon Sep 17 00:00:00 2001
From: Paul-Antoine Arras <pa@codesourcery.com>
Date: Wed, 1 Feb 2023 16:13:23 +0100
Subject: [PATCH] amdgcn: Add instruction pattern for conditional shift
operations
gcc/ChangeLog:
* config/gcn/gcn-valu.md (cond_<expander><mode>): Add cond_{ashl|ashr|lshr}
gcc/testsuite/ChangeLog:
* gcc.target/gcn/cond_shift_3.c: New test.
* gcc.target/gcn/cond_shift_3_run.c: New test.
* gcc.target/gcn/cond_shift_4.c: New test.
* gcc.target/gcn/cond_shift_4_run.c: New test.
* gcc.target/gcn/cond_shift_8.c: New test.
* gcc.target/gcn/cond_shift_8_run.c: New test.
* gcc.target/gcn/cond_shift_9.c: New test.
* gcc.target/gcn/cond_shift_9_run.c: New test.
---
gcc/config/gcn/gcn-valu.md | 23 +++++++++++
gcc/testsuite/gcc.target/gcn/cond_shift_3.c | 37 ++++++++++++++++++
.../gcc.target/gcn/cond_shift_3_run.c | 27 +++++++++++++
gcc/testsuite/gcc.target/gcn/cond_shift_4.c | 38 +++++++++++++++++++
.../gcc.target/gcn/cond_shift_4_run.c | 27 +++++++++++++
gcc/testsuite/gcc.target/gcn/cond_shift_8.c | 35 +++++++++++++++++
.../gcc.target/gcn/cond_shift_8_run.c | 28 ++++++++++++++
gcc/testsuite/gcc.target/gcn/cond_shift_9.c | 36 ++++++++++++++++++
.../gcc.target/gcn/cond_shift_9_run.c | 28 ++++++++++++++
9 files changed, 279 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/gcn/cond_shift_3.c
create mode 100644 gcc/testsuite/gcc.target/gcn/cond_shift_3_run.c
create mode 100644 gcc/testsuite/gcc.target/gcn/cond_shift_4.c
create mode 100644 gcc/testsuite/gcc.target/gcn/cond_shift_4_run.c
create mode 100644 gcc/testsuite/gcc.target/gcn/cond_shift_8.c
create mode 100644 gcc/testsuite/gcc.target/gcn/cond_shift_8_run.c
create mode 100644 gcc/testsuite/gcc.target/gcn/cond_shift_9.c
create mode 100644 gcc/testsuite/gcc.target/gcn/cond_shift_9_run.c
@@ -3489,6 +3489,29 @@ (define_expand "cond_<expander><mode>"
DONE;
})
+(define_code_iterator cond_shiftop [ashift lshiftrt ashiftrt])
+
+(define_expand "cond_<expander><mode>"
+ [(match_operand:V_INT_noHI 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (cond_shiftop:V_INT_noHI
+ (match_operand:V_INT_noHI 2 "gcn_alu_operand")
+ (match_operand:V_INT_noHI 3 "gcn_alu_operand"))
+ (match_operand:V_INT_noHI 4 "register_operand")]
+ ""
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ operands[2] = force_reg (<MODE>mode, operands[2]);
+
+ rtx shiftby = gen_reg_rtx (<VnSI>mode);
+ convert_move (shiftby, operands[3], 0);
+
+ emit_insn (gen_v<expander><mode>3_exec (operands[0], operands[2],
+ shiftby, operands[4],
+ operands[1]));
+ DONE;
+ })
+
;; }}}
;; {{{ Vector reductions
new file mode 100644
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -dp" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
+ TYPE *__restrict b, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ r[i] = a[i] > 20 ? b[i] OP 3 : 72; \
+ }
+
+#define TEST_TYPE(T, TYPE) \
+ T (TYPE, shl, <<) \
+ T (TYPE, shr, >>)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tv_lshlrev_b32\tv[0-9]+, 3, v[0-9]+} 10 } } */
+/* { dg-final { scan-assembler-times {\tv_ashrrev_i32\tv[0-9]+, 3, v[0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {vashlv64si3_exec} 18 } } */
+/* { dg-final { scan-assembler-times {vashrv64si3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vashlv64di3_exec} 2 } } */
+/* { dg-final { scan-assembler-times {vashrv64di3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vlshrv64si3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vlshrv64di3_exec} 1 } } */
+
+/* { dg-final { scan-assembler-not {v_cndmask_b32} } } */
+/* { dg-final { scan-assembler-not {movv64di_exec/2} } } */
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_3.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP) \
+ { \
+ TYPE r[N], a[N], b[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = (i & 1 ? i : 3 * i); \
+ b[i] = (i >> 4) << (i & 15); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##NAME (r, a, b, N); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : 72)) \
+ __builtin_abort (); \
+ }
+
+int main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -dp" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
+ TYPE *__restrict b, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ r[i] = a[i] > 20 ? b[i] OP 3 : 0; \
+ }
+
+#define TEST_TYPE(T, TYPE) \
+ T (TYPE, shl, <<) \
+ T (TYPE, shr, >>)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tv_lshlrev_b32\tv[0-9]+, 3, v[0-9]+} 10 } } */
+/* { dg-final { scan-assembler-times {\tv_ashrrev_i32\tv[0-9]+, 3, v[0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {vashlv64si3_exec} 18 } } */
+/* { dg-final { scan-assembler-times {vashrv64si3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vashlv64di3_exec} 2 } } */
+/* { dg-final { scan-assembler-times {vashrv64di3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vlshrv64si3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vlshrv64di3_exec} 1 } } */
+
+/* { dg-final { scan-assembler-not {v_cndmask_b32} } } */
+/* { dg-final { scan-assembler-not {movv64di_exec/2} } } */
+
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_4.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP) \
+ { \
+ TYPE r[N], a[N], b[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = (i & 1 ? i : 3 * i); \
+ b[i] = (i >> 4) << (i & 15); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##NAME (r, a, b, N); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : 0)) \
+ __builtin_abort (); \
+ }
+
+int main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -dp" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
+ TYPE *__restrict b, TYPE *__restrict c, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ r[i] = a[i] > 20 ? b[i] OP c[i] : 91; \
+ }
+
+#define TEST_TYPE(T, TYPE) \
+ T (TYPE, shl, <<) \
+ T (TYPE, shr, >>)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {vashlv64si3_exec} 18 } } */
+/* { dg-final { scan-assembler-times {vashrv64si3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vashlv64di3_exec} 2 } } */
+/* { dg-final { scan-assembler-times {vashrv64di3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vlshrv64si3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vlshrv64di3_exec} 1 } } */
+
+/* { dg-final { scan-assembler-not {movv64si_exec/0} } } */
+/* { dg-final { scan-assembler-not {movv64di_exec/0} } } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_8.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP) \
+ { \
+ TYPE r[N], a[N], b[N], c[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = (i & 1 ? i : 3 * i); \
+ b[i] = (i >> 4) << (i & 15); \
+ c[i] = ~i & 7; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##NAME (r, a, b, c, N); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : 91)) \
+ __builtin_abort (); \
+ }
+
+int main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -dp" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
+ TYPE *__restrict b, TYPE *__restrict c, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ r[i] = a[i] > 20 ? b[i] OP c[i] : 0; \
+ }
+
+#define TEST_TYPE(T, TYPE) \
+ T (TYPE, shl, <<) \
+ T (TYPE, shr, >>)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {vashlv64si3_exec} 18 } } */
+/* { dg-final { scan-assembler-times {vashrv64si3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vashlv64di3_exec} 2 } } */
+/* { dg-final { scan-assembler-times {vashrv64di3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vlshrv64si3_exec} 1 } } */
+/* { dg-final { scan-assembler-times {vlshrv64di3_exec} 1 } } */
+
+/* { dg-final { scan-assembler-not {v_cndmask_b32} } } */
+/* { dg-final { scan-assembler-not {movv64si_exec/2} } } */
+/* { dg-final { scan-assembler-not {movv64di_exec/1} } } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_9.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP) \
+ { \
+ TYPE r[N], a[N], b[N], c[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = (i & 1 ? i : 3 * i); \
+ b[i] = (i >> 4) << (i & 15); \
+ c[i] = ~i & 7; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##NAME (r, a, b, c, N); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : 0)) \
+ __builtin_abort (); \
+ }
+
+int main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
--
2.39.1