RISC-V: Disallow poly (1,1) VLA SLP interleave vectorization
Checks
Commit Message
This patch fixes all following ICE in zve64d:
FAIL: gcc.dg/vect/pr71259.c -flto -ffat-lto-objects (internal compiler error: in SET_TYPE_VECTOR_SUBPARTS, at tree.h:4248)
FAIL: gcc.dg/vect/pr71259.c -flto -ffat-lto-objects (test for excess errors)
FAIL: gcc.dg/vect/vect-alias-check-14.c (internal compiler error: in SET_TYPE_VECTOR_SUBPARTS, at tree.h:4248)
FAIL: gcc.dg/vect/vect-alias-check-14.c (test for excess errors)
FAIL: gcc.dg/vect/vect-alias-check-14.c -flto -ffat-lto-objects (internal compiler error: in SET_TYPE_VECTOR_SUBPARTS, at tree.h:4248)
FAIL: gcc.dg/vect/vect-alias-check-14.c -flto -ffat-lto-objects (test for excess errors)
FAIL: gcc.dg/vect/vect-alias-check-9.c (internal compiler error: in SET_TYPE_VECTOR_SUBPARTS, at tree.h:4248)
FAIL: gcc.dg/vect/vect-alias-check-9.c (test for excess errors)
FAIL: gcc.dg/vect/vect-alias-check-9.c -flto -ffat-lto-objects (internal compiler error: in SET_TYPE_VECTOR_SUBPARTS, at tree.h:4248)
FAIL: gcc.dg/vect/vect-alias-check-9.c -flto -ffat-lto-objects (test for excess errors)
FAIL: gcc.dg/vect/vect-cond-arith-6.c (internal compiler error: in SET_TYPE_VECTOR_SUBPARTS, at tree.h:4248)
FAIL: gcc.dg/vect/vect-cond-arith-6.c (test for excess errors)
FAIL: gcc.dg/vect/vect-cond-arith-6.c -flto -ffat-lto-objects (internal compiler error: in SET_TYPE_VECTOR_SUBPARTS, at tree.h:4248)
FAIL: gcc.dg/vect/vect-cond-arith-6.c -flto -ffat-lto-objects (test for excess errors)
FAIL: gcc.dg/vect/vect-gather-5.c (internal compiler error: in SET_TYPE_VECTOR_SUBPARTS, at tree.h:4248)
FAIL: gcc.dg/vect/vect-gather-5.c (test for excess errors)
FAIL: gcc.dg/vect/vect-gather-5.c -flto -ffat-lto-objects (internal compiler error: in SET_TYPE_VECTOR_SUBPARTS, at tree.h:4248)
FAIL: gcc.dg/vect/vect-gather-5.c -flto -ffat-lto-objects (test for excess errors)
poly size (1, 1) vectors can not be allowed to interleave VLA SLP since interleave VLA SLP suppose VF at least hold 2 elements,
whereas, poly size (1,1) may possible only have 1 element.
PR target/112694
gcc/ChangeLog:
* config/riscv/riscv-v.cc (expand_vec_perm_const): Disallow poly size (1, 1) VLA SLP.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/pr112694-2.c: New test.
* gcc.target/riscv/rvv/autovec/pr112694-3.c: New test.
---
gcc/config/riscv/riscv-v.cc | 9 +++++
.../gcc.target/riscv/rvv/autovec/pr112694-2.c | 35 ++++++++++++++++++
.../gcc.target/riscv/rvv/autovec/pr112694-3.c | 37 +++++++++++++++++++
3 files changed, 81 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112694-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112694-3.c
@@ -3364,6 +3364,15 @@ expand_vec_perm_const (machine_mode vmode, machine_mode op_mode, rtx target,
mask to do the iteration loop control. Just disable it directly. */
if (GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL)
return false;
+ /* FIXME: Explicitly disable VLA interleave SLP vectorization when we
+ may encounter ICE for poly size (1, 1) vectors in loop vectorizer.
+ Ideally, middle-end loop vectorizer should be able to disable it
+ itself, We can remove the codes here when middle-end code is able
+ to disable VLA SLP vectorization for poly size (1, 1) VF. */
+ if (!BYTES_PER_RISCV_VECTOR.is_constant ()
+ && maybe_lt (BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL,
+ poly_int64 (16, 16)))
+ return false;
struct expand_vec_perm_d d;
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zve64d_zvfh_zfh -mabi=lp64d -O3 -fno-vect-cost-model" } */
+
+long a[100], b[100], c[100];
+
+void g1 ()
+{
+ for (int i = 0; i < 100; i += 2)
+ {
+ c[i] += a[b[i]] + 1;
+ c[i + 1] += a[b[i + 1]] + 2;
+ }
+}
+
+long g2 ()
+{
+ long res = 0;
+ for (int i = 0; i < 100; i += 2)
+ {
+ res += a[b[i + 1]];
+ res += a[b[i]];
+ }
+ return res;
+}
+
+long g3 ()
+{
+ long res = 0;
+ for (int i = 0; i < 100; i += 2)
+ {
+ res += a[b[i]];
+ res += a[b[i + 1]];
+ }
+ return res;
+}
new file mode 100644
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zve64d_zvfh_zfh -mabi=lp64d -fdiagnostics-plain-output -flto -ffat-lto-objects -ftree-vectorize -fno-tree-loop-distribute-patterns -fno-vect-cost-model -fno-common -O3" } */
+
+#define VECTOR_BITS 512
+#define N (VECTOR_BITS * 11 / 64 + 4)
+
+#define add(A, B) ((A) + (B))
+
+#define DEF(OP) \
+ void __attribute__ ((noipa)) \
+ f_##OP (double *restrict a, double *restrict b, double x) \
+ { \
+ for (int i = 0; i < N; i += 2) \
+ { \
+ a[i] = b[i] < 100 ? OP (b[i], x) : b[i]; \
+ a[i + 1] = b[i + 1] < 70 ? OP (b[i + 1], x) : b[i + 1]; \
+ } \
+ }
+
+#define TEST(OP) \
+ { \
+ f_##OP (a, b, 10); \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) \
+ { \
+ int bval = (i % 17) * 10; \
+ int truev = OP (bval, 10); \
+ if (a[i] != (bval < (i & 1 ? 70 : 100) ? truev : bval)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+#define FOR_EACH_OP(T) \
+ T (add) \
+
+FOR_EACH_OP (DEF)