[V2] RISC-V: Support const vector expansion with step vector with base != 0
Checks
Commit Message
Currently, we are able to generate step vector with base == 0:
{ 0, 0, 2, 2, 4, 4, ... }
ASM:
vid
vand
However, we do wrong for step vector with base != 0:
{ 1, 1, 3, 3, 5, 5, ... }
Before this patch, such case will run fail.
After this patch, we are able to pass the testcase and generate the step vector with asm:
vid
vand
vadd
gcc/ChangeLog:
* config/riscv/riscv-v.cc (expand_const_vector): Fix stepped vector with base != 0.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/partial/slp-17.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-18.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-19.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-17.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-18.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-19.c: New test.
---
gcc/config/riscv/riscv-v.cc | 14 +++-
.../riscv/rvv/autovec/partial/slp-17.c | 34 ++++++++
.../riscv/rvv/autovec/partial/slp-18.c | 26 ++++++
.../riscv/rvv/autovec/partial/slp-19.c | 26 ++++++
.../riscv/rvv/autovec/partial/slp_run-17.c | 84 +++++++++++++++++++
.../riscv/rvv/autovec/partial/slp_run-18.c | 69 +++++++++++++++
.../riscv/rvv/autovec/partial/slp_run-19.c | 69 +++++++++++++++
7 files changed, 320 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-17.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-18.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-19.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-17.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-18.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-19.c
Comments
Ping. This patch is a simple fix here. Ok for trunk ?
juzhe.zhong@rivai.ai
From: Juzhe-Zhong
Date: 2023-06-26 20:18
To: gcc-patches
CC: kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw; rdapp.gcc; Juzhe-Zhong
Subject: [PATCH V2] RISC-V: Support const vector expansion with step vector with base != 0
Currently, we are able to generate step vector with base == 0:
{ 0, 0, 2, 2, 4, 4, ... }
ASM:
vid
vand
However, we do wrong for step vector with base != 0:
{ 1, 1, 3, 3, 5, 5, ... }
Before this patch, such case will run fail.
After this patch, we are able to pass the testcase and generate the step vector with asm:
vid
vand
vadd
gcc/ChangeLog:
* config/riscv/riscv-v.cc (expand_const_vector): Fix stepped vector with base != 0.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/partial/slp-17.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-18.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-19.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-17.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-18.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-19.c: New test.
---
gcc/config/riscv/riscv-v.cc | 14 +++-
.../riscv/rvv/autovec/partial/slp-17.c | 34 ++++++++
.../riscv/rvv/autovec/partial/slp-18.c | 26 ++++++
.../riscv/rvv/autovec/partial/slp-19.c | 26 ++++++
.../riscv/rvv/autovec/partial/slp_run-17.c | 84 +++++++++++++++++++
.../riscv/rvv/autovec/partial/slp_run-18.c | 69 +++++++++++++++
.../riscv/rvv/autovec/partial/slp_run-19.c | 69 +++++++++++++++
7 files changed, 320 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-17.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-18.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-19.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-17.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-18.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-19.c
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 5518394be1e..cd3422bf711 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1258,7 +1258,6 @@ expand_const_vector (rtx target, rtx src)
}
emit_move_insn (target, tmp);
}
- return;
}
else if (CONST_VECTOR_STEPPED_P (src))
{
@@ -1287,9 +1286,20 @@ expand_const_vector (rtx target, rtx src)
*/
rtx imm
= gen_int_mode (-builder.npatterns (), builder.inner_mode ());
- rtx and_ops[] = {target, vid, imm};
+ rtx tmp = gen_reg_rtx (builder.mode ());
+ rtx and_ops[] = {tmp, vid, imm};
icode = code_for_pred_scalar (AND, builder.mode ());
emit_vlmax_insn (icode, RVV_BINOP, and_ops);
+ HOST_WIDE_INT init_val = INTVAL (builder.elt (0));
+ if (init_val == 0)
+ emit_move_insn (target, tmp);
+ else
+ {
+ rtx dup = gen_const_vector_dup (builder.mode (), init_val);
+ rtx add_ops[] = {target, tmp, dup};
+ icode = code_for_pred (PLUS, builder.mode ());
+ emit_vlmax_insn (icode, RVV_BINOP, add_ops);
+ }
}
else
{
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-17.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-17.c
new file mode 100644
index 00000000000..2f2c3d11c2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-17.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include <stdint-gcc.h>
+
+void
+f (uint8_t *restrict a, uint8_t *restrict b,
+ uint8_t *restrict c, uint8_t *restrict d,
+ int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ a[i * 8] = c[i * 8] + d[i * 8];
+ a[i * 8 + 1] = c[i * 8] + d[i * 8 + 1];
+ a[i * 8 + 2] = c[i * 8 + 2] + d[i * 8 + 2];
+ a[i * 8 + 3] = c[i * 8 + 2] + d[i * 8 + 3];
+ a[i * 8 + 4] = c[i * 8 + 4] + d[i * 8 + 4];
+ a[i * 8 + 5] = c[i * 8 + 4] + d[i * 8 + 5];
+ a[i * 8 + 6] = c[i * 8 + 6] + d[i * 8 + 6];
+ a[i * 8 + 7] = c[i * 8 + 6] + d[i * 8 + 7];
+ b[i * 8] = c[i * 8 + 1] + d[i * 8];
+ b[i * 8 + 1] = c[i * 8 + 1] + d[i * 8 + 1];
+ b[i * 8 + 2] = c[i * 8 + 3] + d[i * 8 + 2];
+ b[i * 8 + 3] = c[i * 8 + 3] + d[i * 8 + 3];
+ b[i * 8 + 4] = c[i * 8 + 5] + d[i * 8 + 4];
+ b[i * 8 + 5] = c[i * 8 + 5] + d[i * 8 + 5];
+ b[i * 8 + 6] = c[i * 8 + 7] + d[i * 8 + 6];
+ b[i * 8 + 7] = c[i * 8 + 7] + d[i * 8 + 7];
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 2 "optimized" } } */
+/* { dg-final { scan-assembler {\tvid\.v} } } */
+/* { dg-final { scan-assembler-not {\tvmul} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-18.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-18.c
new file mode 100644
index 00000000000..72103314b1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-18.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include <stdint-gcc.h>
+
+void
+f (float *restrict a, float *restrict b,
+ float *restrict c, float *restrict d,
+ int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ a[i * 4] = c[i * 4] + d[i * 4];
+ a[i * 4 + 1] = c[i * 4] + d[i * 4 + 1];
+ a[i * 4 + 2] = c[i * 4 + 2] + d[i * 4 + 2];
+ a[i * 4 + 3] = c[i * 4 + 2] + d[i * 4 + 3];
+ b[i * 4] = c[i * 4 + 1] + d[i * 4];
+ b[i * 4 + 1] = c[i * 4 + 1] + d[i * 4 + 1];
+ b[i * 4 + 2] = c[i * 4 + 3] + d[i * 4 + 2];
+ b[i * 4 + 3] = c[i * 4 + 3] + d[i * 4 + 3];
+ }
+}
+
+/* { dg-final { scan-tree-dump "\.VEC_PERM" "optimized" } } */
+/* { dg-final { scan-assembler {\tvid\.v} } } */
+/* { dg-final { scan-assembler-not {\tvmul} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-19.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-19.c
new file mode 100644
index 00000000000..41ce0fc5767
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-19.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include <stdint-gcc.h>
+
+void
+f (float *restrict a, float *restrict b,
+ float *restrict c, float *restrict d,
+ int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ a[i * 4] = c[i * 4] + d[i * 4];
+ a[i * 4 + 1] = c[i * 4] + d[i * 4 + 1];
+ a[i * 4 + 2] = c[i * 4 + 2] + d[i * 4 + 2];
+ a[i * 4 + 3] = c[i * 4 + 3] + d[i * 4 + 3];
+ b[i * 4] = c[i * 4 + 2] + d[i * 4];
+ b[i * 4 + 1] = c[i * 4 + 1] + d[i * 4 + 1];
+ b[i * 4 + 2] = c[i * 4 + 3] + d[i * 4 + 2];
+ b[i * 4 + 3] = c[i * 4 + 3] + d[i * 4 + 3];
+ }
+}
+
+/* { dg-final { scan-tree-dump "\.VEC_PERM" "optimized" } } */
+/* { dg-final { scan-assembler {\tvid\.v} } } */
+/* { dg-final { scan-assembler-not {\tvmul} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-17.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-17.c
new file mode 100644
index 00000000000..224db4e3173
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-17.c
@@ -0,0 +1,84 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=scalable" } */
+
+#include "slp-17.c"
+
+#define LIMIT 256
+void __attribute__ ((optimize (0)))
+f_golden (uint8_t *restrict a, uint8_t *restrict b, uint8_t *restrict c,
+ uint8_t *restrict d, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ a[i * 8] = c[i * 8] + d[i * 8];
+ a[i * 8 + 1] = c[i * 8] + d[i * 8 + 1];
+ a[i * 8 + 2] = c[i * 8 + 2] + d[i * 8 + 2];
+ a[i * 8 + 3] = c[i * 8 + 2] + d[i * 8 + 3];
+ a[i * 8 + 4] = c[i * 8 + 4] + d[i * 8 + 4];
+ a[i * 8 + 5] = c[i * 8 + 4] + d[i * 8 + 5];
+ a[i * 8 + 6] = c[i * 8 + 6] + d[i * 8 + 6];
+ a[i * 8 + 7] = c[i * 8 + 6] + d[i * 8 + 7];
+ b[i * 8] = c[i * 8 + 1] + d[i * 8];
+ b[i * 8 + 1] = c[i * 8 + 1] + d[i * 8 + 1];
+ b[i * 8 + 2] = c[i * 8 + 3] + d[i * 8 + 2];
+ b[i * 8 + 3] = c[i * 8 + 3] + d[i * 8 + 3];
+ b[i * 8 + 4] = c[i * 8 + 5] + d[i * 8 + 4];
+ b[i * 8 + 5] = c[i * 8 + 5] + d[i * 8 + 5];
+ b[i * 8 + 6] = c[i * 8 + 7] + d[i * 8 + 6];
+ b[i * 8 + 7] = c[i * 8 + 7] + d[i * 8 + 7];
+ }
+}
+
+int
+main (void)
+{
+#define RUN(NUM) \
+ uint8_t a_##NUM[NUM * 8 + 8] = {0}; \
+ uint8_t a_golden_##NUM[NUM * 8 + 8] = {0}; \
+ uint8_t b_##NUM[NUM * 8 + 8] = {0}; \
+ uint8_t b_golden_##NUM[NUM * 8 + 8] = {0}; \
+ uint8_t c_##NUM[NUM * 8 + 8] = {0}; \
+ uint8_t d_##NUM[NUM * 8 + 8] = {0}; \
+ for (int i = 0; i < NUM * 8 + 8; i++) \
+ { \
+ if (i % NUM == 0) \
+ c_##NUM[i] = (i + NUM) % LIMIT; \
+ else \
+ c_##NUM[i] = (i * 3) % LIMIT; \
+ if (i % 2 == 0) \
+ d_##NUM[i] = i % LIMIT; \
+ else \
+ d_##NUM[i] = (i * 7) % LIMIT; \
+ } \
+ f (a_##NUM, b_##NUM, c_##NUM, d_##NUM, NUM); \
+ f_golden (a_golden_##NUM, b_golden_##NUM, c_##NUM, d_##NUM, NUM); \
+ for (int i = 0; i < NUM * 8 + 8; i++) \
+ { \
+ if (a_##NUM[i] != a_golden_##NUM[i]) \
+ __builtin_abort (); \
+ if (b_##NUM[i] != b_golden_##NUM[i]) \
+ __builtin_abort (); \
+ }
+
+ RUN (3);
+ RUN (5);
+ RUN (15);
+ RUN (16);
+ RUN (17);
+ RUN (31);
+ RUN (32);
+ RUN (33);
+ RUN (63);
+ RUN (64);
+ RUN (65);
+ RUN (127);
+ RUN (128);
+ RUN (129);
+ RUN (239);
+ RUN (359);
+ RUN (498);
+ RUN (799);
+ RUN (977);
+ RUN (5789);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-18.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-18.c
new file mode 100644
index 00000000000..7d22e1fd88e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-18.c
@@ -0,0 +1,69 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=scalable" } */
+
+#include "slp-18.c"
+
+void __attribute__ ((optimize (0)))
+f_golden (float *restrict a, float *restrict b, float *restrict c,
+ float *restrict d, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ a[i * 4] = c[i * 4] + d[i * 4];
+ a[i * 4 + 1] = c[i * 4] + d[i * 4 + 1];
+ a[i * 4 + 2] = c[i * 4 + 2] + d[i * 4 + 2];
+ a[i * 4 + 3] = c[i * 4 + 2] + d[i * 4 + 3];
+ b[i * 4] = c[i * 4 + 1] + d[i * 4];
+ b[i * 4 + 1] = c[i * 4 + 1] + d[i * 4 + 1];
+ b[i * 4 + 2] = c[i * 4 + 3] + d[i * 4 + 2];
+ b[i * 4 + 3] = c[i * 4 + 3] + d[i * 4 + 3];
+ }
+}
+
+int
+main (void)
+{
+#define RUN(NUM) \
+ float a_##NUM[NUM * 4 + 4] = {0}; \
+ float a_golden_##NUM[NUM * 4 + 4] = {0}; \
+ float b_##NUM[NUM * 4 + 4] = {0}; \
+ float b_golden_##NUM[NUM * 4 + 4] = {0}; \
+ float c_##NUM[NUM * 4 + 4] = {0}; \
+ float d_##NUM[NUM * 4 + 4] = {0}; \
+ for (int i = 0; i < NUM * 4 + 4; i++) \
+ { \
+ c_##NUM[i] = i * 3.789 - 987.135; \
+ d_##NUM[i] = i * -13.789 + 1987.135; \
+ } \
+ f (a_##NUM, b_##NUM, c_##NUM, d_##NUM, NUM); \
+ f_golden (a_golden_##NUM, b_golden_##NUM, c_##NUM, d_##NUM, NUM); \
+ for (int i = 0; i < NUM * 4 + 4; i++) \
+ { \
+ if (a_##NUM[i] != a_golden_##NUM[i]) \
+ __builtin_abort (); \
+ if (b_##NUM[i] != b_golden_##NUM[i]) \
+ __builtin_abort (); \
+ }
+
+ RUN (3);
+ RUN (5);
+ RUN (15);
+ RUN (16);
+ RUN (17);
+ RUN (31);
+ RUN (32);
+ RUN (33);
+ RUN (63);
+ RUN (64);
+ RUN (65);
+ RUN (127);
+ RUN (128);
+ RUN (129);
+ RUN (239);
+ RUN (359);
+ RUN (498);
+ RUN (799);
+ RUN (977);
+ RUN (5789);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-19.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-19.c
new file mode 100644
index 00000000000..5cd7156e3d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp_run-19.c
@@ -0,0 +1,69 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=scalable" } */
+
+#include "slp-19.c"
+
+void __attribute__ ((optimize (0)))
+f_golden (float *restrict a, float *restrict b, float *restrict c,
+ float *restrict d, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ a[i * 4] = c[i * 4] + d[i * 4];
+ a[i * 4 + 1] = c[i * 4] + d[i * 4 + 1];
+ a[i * 4 + 2] = c[i * 4 + 2] + d[i * 4 + 2];
+ a[i * 4 + 3] = c[i * 4 + 3] + d[i * 4 + 3];
+ b[i * 4] = c[i * 4 + 2] + d[i * 4];
+ b[i * 4 + 1] = c[i * 4 + 1] + d[i * 4 + 1];
+ b[i * 4 + 2] = c[i * 4 + 3] + d[i * 4 + 2];
+ b[i * 4 + 3] = c[i * 4 + 3] + d[i * 4 + 3];
+ }
+}
+
+int
+main (void)
+{
+#define RUN(NUM) \
+ float a_##NUM[NUM * 4 + 4] = {0}; \
+ float a_golden_##NUM[NUM * 4 + 4] = {0}; \
+ float b_##NUM[NUM * 4 + 4] = {0}; \
+ float b_golden_##NUM[NUM * 4 + 4] = {0}; \
+ float c_##NUM[NUM * 4 + 4] = {0}; \
+ float d_##NUM[NUM * 4 + 4] = {0}; \
+ for (int i = 0; i < NUM * 4 + 4; i++) \
+ { \
+ c_##NUM[i] = i * 3.789 - 987.135; \
+ d_##NUM[i] = i * -13.789 + 1987.135; \
+ } \
+ f (a_##NUM, b_##NUM, c_##NUM, d_##NUM, NUM); \
+ f_golden (a_golden_##NUM, b_golden_##NUM, c_##NUM, d_##NUM, NUM); \
+ for (int i = 0; i < NUM * 4 + 4; i++) \
+ { \
+ if (a_##NUM[i] != a_golden_##NUM[i]) \
+ __builtin_abort (); \
+ if (b_##NUM[i] != b_golden_##NUM[i]) \
+ __builtin_abort (); \
+ }
+
+ RUN (3);
+ RUN (5);
+ RUN (15);
+ RUN (16);
+ RUN (17);
+ RUN (31);
+ RUN (32);
+ RUN (33);
+ RUN (63);
+ RUN (64);
+ RUN (65);
+ RUN (127);
+ RUN (128);
+ RUN (129);
+ RUN (239);
+ RUN (359);
+ RUN (498);
+ RUN (799);
+ RUN (977);
+ RUN (5789);
+ return 0;
+}
--
2.36.1
On 6/26/23 06:18, Juzhe-Zhong wrote:
> Currently, we are able to generate step vector with base == 0:
> { 0, 0, 2, 2, 4, 4, ... }
>
> ASM:
>
> vid
> vand
>
> However, we do wrong for step vector with base != 0:
> { 1, 1, 3, 3, 5, 5, ... }
>
> Before this patch, such case will run fail.
>
> After this patch, we are able to pass the testcase and generate the step vector with asm:
>
> vid
> vand
> vadd
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-v.cc (expand_const_vector): Fix stepped vector with base != 0.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/partial/slp-17.c: New test.
> * gcc.target/riscv/rvv/autovec/partial/slp-18.c: New test.
> * gcc.target/riscv/rvv/autovec/partial/slp-19.c: New test.
> * gcc.target/riscv/rvv/autovec/partial/slp_run-17.c: New test.
> * gcc.target/riscv/rvv/autovec/partial/slp_run-18.c: New test.
> * gcc.target/riscv/rvv/autovec/partial/slp_run-19.c: New test.
>
>
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 5518394be1e..cd3422bf711 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -1258,7 +1258,6 @@ expand_const_vector (rtx target, rtx src)
> }
> emit_move_insn (target, tmp);
> }
> - return;
> }
> else if (CONST_VECTOR_STEPPED_P (src))
> {
Was removal of the "return" intentional here? I'm not real familiar
with this code, but it doesn't look related to the case you're trying to
fix.
The rest of the code looks quite sensible.
Jeff
On 6/26/23 17:36, juzhe.zhong wrote:
> Yes. I found the “return” is redundant so I removed it.
OK. Just wanted to be sure.
OK for the trunk.
jeff
Committed, thanks Jeff.
Pan
-----Original Message-----
From: Gcc-patches <gcc-patches-bounces+pan2.li=intel.com@gcc.gnu.org> On Behalf Of Jeff Law via Gcc-patches
Sent: Tuesday, June 27, 2023 7:50 AM
To: juzhe.zhong <juzhe.zhong@rivai.ai>
Cc: gcc-patches@gcc.gnu.org; kito.cheng@gmail.com; kito.cheng@sifive.com; palmer@dabbelt.com; palmer@rivosinc.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH V2] RISC-V: Support const vector expansion with step vector with base != 0
On 6/26/23 17:36, juzhe.zhong wrote:
> Yes. I found the “return” is redundant so I removed it.
OK. Just wanted to be sure.
OK for the trunk.
jeff
@@ -1258,7 +1258,6 @@ expand_const_vector (rtx target, rtx src)
}
emit_move_insn (target, tmp);
}
- return;
}
else if (CONST_VECTOR_STEPPED_P (src))
{
@@ -1287,9 +1286,20 @@ expand_const_vector (rtx target, rtx src)
*/
rtx imm
= gen_int_mode (-builder.npatterns (), builder.inner_mode ());
- rtx and_ops[] = {target, vid, imm};
+ rtx tmp = gen_reg_rtx (builder.mode ());
+ rtx and_ops[] = {tmp, vid, imm};
icode = code_for_pred_scalar (AND, builder.mode ());
emit_vlmax_insn (icode, RVV_BINOP, and_ops);
+ HOST_WIDE_INT init_val = INTVAL (builder.elt (0));
+ if (init_val == 0)
+ emit_move_insn (target, tmp);
+ else
+ {
+ rtx dup = gen_const_vector_dup (builder.mode (), init_val);
+ rtx add_ops[] = {target, tmp, dup};
+ icode = code_for_pred (PLUS, builder.mode ());
+ emit_vlmax_insn (icode, RVV_BINOP, add_ops);
+ }
}
else
{
new file mode 100644
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include <stdint-gcc.h>
+
+void
+f (uint8_t *restrict a, uint8_t *restrict b,
+ uint8_t *restrict c, uint8_t *restrict d,
+ int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ a[i * 8] = c[i * 8] + d[i * 8];
+ a[i * 8 + 1] = c[i * 8] + d[i * 8 + 1];
+ a[i * 8 + 2] = c[i * 8 + 2] + d[i * 8 + 2];
+ a[i * 8 + 3] = c[i * 8 + 2] + d[i * 8 + 3];
+ a[i * 8 + 4] = c[i * 8 + 4] + d[i * 8 + 4];
+ a[i * 8 + 5] = c[i * 8 + 4] + d[i * 8 + 5];
+ a[i * 8 + 6] = c[i * 8 + 6] + d[i * 8 + 6];
+ a[i * 8 + 7] = c[i * 8 + 6] + d[i * 8 + 7];
+ b[i * 8] = c[i * 8 + 1] + d[i * 8];
+ b[i * 8 + 1] = c[i * 8 + 1] + d[i * 8 + 1];
+ b[i * 8 + 2] = c[i * 8 + 3] + d[i * 8 + 2];
+ b[i * 8 + 3] = c[i * 8 + 3] + d[i * 8 + 3];
+ b[i * 8 + 4] = c[i * 8 + 5] + d[i * 8 + 4];
+ b[i * 8 + 5] = c[i * 8 + 5] + d[i * 8 + 5];
+ b[i * 8 + 6] = c[i * 8 + 7] + d[i * 8 + 6];
+ b[i * 8 + 7] = c[i * 8 + 7] + d[i * 8 + 7];
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 2 "optimized" } } */
+/* { dg-final { scan-assembler {\tvid\.v} } } */
+/* { dg-final { scan-assembler-not {\tvmul} } } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include <stdint-gcc.h>
+
+void
+f (float *restrict a, float *restrict b,
+ float *restrict c, float *restrict d,
+ int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ a[i * 4] = c[i * 4] + d[i * 4];
+ a[i * 4 + 1] = c[i * 4] + d[i * 4 + 1];
+ a[i * 4 + 2] = c[i * 4 + 2] + d[i * 4 + 2];
+ a[i * 4 + 3] = c[i * 4 + 2] + d[i * 4 + 3];
+ b[i * 4] = c[i * 4 + 1] + d[i * 4];
+ b[i * 4 + 1] = c[i * 4 + 1] + d[i * 4 + 1];
+ b[i * 4 + 2] = c[i * 4 + 3] + d[i * 4 + 2];
+ b[i * 4 + 3] = c[i * 4 + 3] + d[i * 4 + 3];
+ }
+}
+
+/* { dg-final { scan-tree-dump "\.VEC_PERM" "optimized" } } */
+/* { dg-final { scan-assembler {\tvid\.v} } } */
+/* { dg-final { scan-assembler-not {\tvmul} } } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include <stdint-gcc.h>
+
+void
+f (float *restrict a, float *restrict b,
+ float *restrict c, float *restrict d,
+ int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ a[i * 4] = c[i * 4] + d[i * 4];
+ a[i * 4 + 1] = c[i * 4] + d[i * 4 + 1];
+ a[i * 4 + 2] = c[i * 4 + 2] + d[i * 4 + 2];
+ a[i * 4 + 3] = c[i * 4 + 3] + d[i * 4 + 3];
+ b[i * 4] = c[i * 4 + 2] + d[i * 4];
+ b[i * 4 + 1] = c[i * 4 + 1] + d[i * 4 + 1];
+ b[i * 4 + 2] = c[i * 4 + 3] + d[i * 4 + 2];
+ b[i * 4 + 3] = c[i * 4 + 3] + d[i * 4 + 3];
+ }
+}
+
+/* { dg-final { scan-tree-dump "\.VEC_PERM" "optimized" } } */
+/* { dg-final { scan-assembler {\tvid\.v} } } */
+/* { dg-final { scan-assembler-not {\tvmul} } } */
new file mode 100644
@@ -0,0 +1,84 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=scalable" } */
+
+#include "slp-17.c"
+
+#define LIMIT 256
+void __attribute__ ((optimize (0)))
+f_golden (uint8_t *restrict a, uint8_t *restrict b, uint8_t *restrict c,
+ uint8_t *restrict d, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ a[i * 8] = c[i * 8] + d[i * 8];
+ a[i * 8 + 1] = c[i * 8] + d[i * 8 + 1];
+ a[i * 8 + 2] = c[i * 8 + 2] + d[i * 8 + 2];
+ a[i * 8 + 3] = c[i * 8 + 2] + d[i * 8 + 3];
+ a[i * 8 + 4] = c[i * 8 + 4] + d[i * 8 + 4];
+ a[i * 8 + 5] = c[i * 8 + 4] + d[i * 8 + 5];
+ a[i * 8 + 6] = c[i * 8 + 6] + d[i * 8 + 6];
+ a[i * 8 + 7] = c[i * 8 + 6] + d[i * 8 + 7];
+ b[i * 8] = c[i * 8 + 1] + d[i * 8];
+ b[i * 8 + 1] = c[i * 8 + 1] + d[i * 8 + 1];
+ b[i * 8 + 2] = c[i * 8 + 3] + d[i * 8 + 2];
+ b[i * 8 + 3] = c[i * 8 + 3] + d[i * 8 + 3];
+ b[i * 8 + 4] = c[i * 8 + 5] + d[i * 8 + 4];
+ b[i * 8 + 5] = c[i * 8 + 5] + d[i * 8 + 5];
+ b[i * 8 + 6] = c[i * 8 + 7] + d[i * 8 + 6];
+ b[i * 8 + 7] = c[i * 8 + 7] + d[i * 8 + 7];
+ }
+}
+
+int
+main (void)
+{
+#define RUN(NUM) \
+ uint8_t a_##NUM[NUM * 8 + 8] = {0}; \
+ uint8_t a_golden_##NUM[NUM * 8 + 8] = {0}; \
+ uint8_t b_##NUM[NUM * 8 + 8] = {0}; \
+ uint8_t b_golden_##NUM[NUM * 8 + 8] = {0}; \
+ uint8_t c_##NUM[NUM * 8 + 8] = {0}; \
+ uint8_t d_##NUM[NUM * 8 + 8] = {0}; \
+ for (int i = 0; i < NUM * 8 + 8; i++) \
+ { \
+ if (i % NUM == 0) \
+ c_##NUM[i] = (i + NUM) % LIMIT; \
+ else \
+ c_##NUM[i] = (i * 3) % LIMIT; \
+ if (i % 2 == 0) \
+ d_##NUM[i] = i % LIMIT; \
+ else \
+ d_##NUM[i] = (i * 7) % LIMIT; \
+ } \
+ f (a_##NUM, b_##NUM, c_##NUM, d_##NUM, NUM); \
+ f_golden (a_golden_##NUM, b_golden_##NUM, c_##NUM, d_##NUM, NUM); \
+ for (int i = 0; i < NUM * 8 + 8; i++) \
+ { \
+ if (a_##NUM[i] != a_golden_##NUM[i]) \
+ __builtin_abort (); \
+ if (b_##NUM[i] != b_golden_##NUM[i]) \
+ __builtin_abort (); \
+ }
+
+ RUN (3);
+ RUN (5);
+ RUN (15);
+ RUN (16);
+ RUN (17);
+ RUN (31);
+ RUN (32);
+ RUN (33);
+ RUN (63);
+ RUN (64);
+ RUN (65);
+ RUN (127);
+ RUN (128);
+ RUN (129);
+ RUN (239);
+ RUN (359);
+ RUN (498);
+ RUN (799);
+ RUN (977);
+ RUN (5789);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,69 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=scalable" } */
+
+#include "slp-18.c"
+
+void __attribute__ ((optimize (0)))
+f_golden (float *restrict a, float *restrict b, float *restrict c,
+ float *restrict d, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ a[i * 4] = c[i * 4] + d[i * 4];
+ a[i * 4 + 1] = c[i * 4] + d[i * 4 + 1];
+ a[i * 4 + 2] = c[i * 4 + 2] + d[i * 4 + 2];
+ a[i * 4 + 3] = c[i * 4 + 2] + d[i * 4 + 3];
+ b[i * 4] = c[i * 4 + 1] + d[i * 4];
+ b[i * 4 + 1] = c[i * 4 + 1] + d[i * 4 + 1];
+ b[i * 4 + 2] = c[i * 4 + 3] + d[i * 4 + 2];
+ b[i * 4 + 3] = c[i * 4 + 3] + d[i * 4 + 3];
+ }
+}
+
+int
+main (void)
+{
+#define RUN(NUM) \
+ float a_##NUM[NUM * 4 + 4] = {0}; \
+ float a_golden_##NUM[NUM * 4 + 4] = {0}; \
+ float b_##NUM[NUM * 4 + 4] = {0}; \
+ float b_golden_##NUM[NUM * 4 + 4] = {0}; \
+ float c_##NUM[NUM * 4 + 4] = {0}; \
+ float d_##NUM[NUM * 4 + 4] = {0}; \
+ for (int i = 0; i < NUM * 4 + 4; i++) \
+ { \
+ c_##NUM[i] = i * 3.789 - 987.135; \
+ d_##NUM[i] = i * -13.789 + 1987.135; \
+ } \
+ f (a_##NUM, b_##NUM, c_##NUM, d_##NUM, NUM); \
+ f_golden (a_golden_##NUM, b_golden_##NUM, c_##NUM, d_##NUM, NUM); \
+ for (int i = 0; i < NUM * 4 + 4; i++) \
+ { \
+ if (a_##NUM[i] != a_golden_##NUM[i]) \
+ __builtin_abort (); \
+ if (b_##NUM[i] != b_golden_##NUM[i]) \
+ __builtin_abort (); \
+ }
+
+ RUN (3);
+ RUN (5);
+ RUN (15);
+ RUN (16);
+ RUN (17);
+ RUN (31);
+ RUN (32);
+ RUN (33);
+ RUN (63);
+ RUN (64);
+ RUN (65);
+ RUN (127);
+ RUN (128);
+ RUN (129);
+ RUN (239);
+ RUN (359);
+ RUN (498);
+ RUN (799);
+ RUN (977);
+ RUN (5789);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,69 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=scalable" } */
+
+#include "slp-19.c"
+
+void __attribute__ ((optimize (0)))
+f_golden (float *restrict a, float *restrict b, float *restrict c,
+ float *restrict d, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ a[i * 4] = c[i * 4] + d[i * 4];
+ a[i * 4 + 1] = c[i * 4] + d[i * 4 + 1];
+ a[i * 4 + 2] = c[i * 4 + 2] + d[i * 4 + 2];
+ a[i * 4 + 3] = c[i * 4 + 3] + d[i * 4 + 3];
+ b[i * 4] = c[i * 4 + 2] + d[i * 4];
+ b[i * 4 + 1] = c[i * 4 + 1] + d[i * 4 + 1];
+ b[i * 4 + 2] = c[i * 4 + 3] + d[i * 4 + 2];
+ b[i * 4 + 3] = c[i * 4 + 3] + d[i * 4 + 3];
+ }
+}
+
+int
+main (void)
+{
+#define RUN(NUM) \
+ float a_##NUM[NUM * 4 + 4] = {0}; \
+ float a_golden_##NUM[NUM * 4 + 4] = {0}; \
+ float b_##NUM[NUM * 4 + 4] = {0}; \
+ float b_golden_##NUM[NUM * 4 + 4] = {0}; \
+ float c_##NUM[NUM * 4 + 4] = {0}; \
+ float d_##NUM[NUM * 4 + 4] = {0}; \
+ for (int i = 0; i < NUM * 4 + 4; i++) \
+ { \
+ c_##NUM[i] = i * 3.789 - 987.135; \
+ d_##NUM[i] = i * -13.789 + 1987.135; \
+ } \
+ f (a_##NUM, b_##NUM, c_##NUM, d_##NUM, NUM); \
+ f_golden (a_golden_##NUM, b_golden_##NUM, c_##NUM, d_##NUM, NUM); \
+ for (int i = 0; i < NUM * 4 + 4; i++) \
+ { \
+ if (a_##NUM[i] != a_golden_##NUM[i]) \
+ __builtin_abort (); \
+ if (b_##NUM[i] != b_golden_##NUM[i]) \
+ __builtin_abort (); \
+ }
+
+ RUN (3);
+ RUN (5);
+ RUN (15);
+ RUN (16);
+ RUN (17);
+ RUN (31);
+ RUN (32);
+ RUN (33);
+ RUN (63);
+ RUN (64);
+ RUN (65);
+ RUN (127);
+ RUN (128);
+ RUN (129);
+ RUN (239);
+ RUN (359);
+ RUN (498);
+ RUN (799);
+ RUN (977);
+ RUN (5789);
+ return 0;
+}