RISC-V: Expand VLS mode to scalar mode move[PR111391]
Checks
Commit Message
This patch fixes PR111391: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111391
PR target/111391
gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_legitimize_move): Expand VLS to scalar move.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/partial/slp-9.c: Adapt test.
* gcc.target/riscv/rvv/autovec/pr111391.c: New test.
---
gcc/config/riscv/riscv.cc | 29 +++++++++++++++++++
.../riscv/rvv/autovec/partial/slp-9.c | 1 -
.../gcc.target/riscv/rvv/autovec/pr111391.c | 28 ++++++++++++++++++
3 files changed, 57 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111391.c
Comments
Just realize this patch cause some unexpected ICE FAILs in GCC regression.
Now, V2:
https://gcc.gnu.org/pipermail/gcc-patches/2023-September/630194.html
has fully passed the regression.
juzhe.zhong@rivai.ai
From: Juzhe-Zhong
Date: 2023-09-13 21:01
To: gcc-patches
CC: kito.cheng; kito.cheng; jeffreyalaw; rdapp.gcc; Juzhe-Zhong
Subject: [PATCH] RISC-V: Expand VLS mode to scalar mode move[PR111391]
This patch fixes PR111391: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111391
PR target/111391
gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_legitimize_move): Expand VLS to scalar move.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/partial/slp-9.c: Adapt test.
* gcc.target/riscv/rvv/autovec/pr111391.c: New test.
---
gcc/config/riscv/riscv.cc | 29 +++++++++++++++++++
.../riscv/rvv/autovec/partial/slp-9.c | 1 -
.../gcc.target/riscv/rvv/autovec/pr111391.c | 28 ++++++++++++++++++
3 files changed, 57 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111391.c
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 9d04ddd69e0..b7daad7cbb5 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -2513,6 +2513,35 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
}
return true;
}
+ /* Expand
+ (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
+ Expand this data movement instead of simply forbid it since
+ we can improve the code generation for this following scenario
+ by RVV auto-vectorization:
+ (set (reg:V8QI 149) (vec_duplicate:V8QI (reg:QI))
+ (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
+ Since RVV mode and scalar mode are in different REG_CLASS,
+ we need to explicitly move data from V_REGS to GR_REGS by scalar move. */
+ if (SUBREG_P (src) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (src))))
+ {
+ rtx subreg = force_reg (GET_MODE (SUBREG_REG (src)), SUBREG_REG (src));
+ machine_mode imode = GET_MODE_INNER (GET_MODE (subreg));
+ unsigned int ratio = GET_MODE_SIZE (mode).to_constant ()
+ / GET_MODE_SIZE (imode).to_constant ();
+ poly_int64 nunits = GET_MODE_NUNITS (GET_MODE (subreg));
+ nunits = exact_div (nunits, ratio);
+ scalar_mode smode = as_a<scalar_mode> (mode);
+ machine_mode vmode
+ = riscv_vector::get_vector_mode (smode, nunits).require ();
+ rtx tmp = gen_reg_rtx (mode);
+ rtx index
+ = gen_int_mode (exact_div (SUBREG_BYTE (src), GET_MODE_SIZE (smode)),
+ Pmode);
+ emit_insn (gen_vec_extract (vmode, vmode, tmp,
+ gen_lowpart (vmode, subreg), index));
+ emit_move_insn (dest, tmp);
+ return true;
+ }
/* Expand
(set (reg:QI target) (mem:QI (address)))
to
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-9.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-9.c
index 5fba27c7a35..7c42438c9d9 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-9.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-9.c
@@ -29,4 +29,3 @@
TEST_ALL (VEC_PERM)
/* { dg-final { scan-assembler-times {viota.m} 2 } } */
-/* { dg-final { scan-assembler-not {vmv\.v\.i} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111391.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111391.c
new file mode 100644
index 00000000000..a7f64c937c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111391.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -Wno-int-conversion -Wno-implicit-function -Wno-incompatible-pointer-types -Wno-implicit-function-declaration -Ofast -ftree-vectorize" } */
+
+int d ();
+typedef struct
+{
+ int b;
+} c;
+int
+e (char *f, long g)
+{
+ f += g;
+ while (g--)
+ *--f = d;
+}
+
+int
+d (c * f)
+{
+ while (h ())
+ switch (f->b)
+ case 'Q':
+ {
+ long a;
+ e (&a, sizeof (a));
+ i (a);
+ }
+}
--
2.36.3
@@ -2513,6 +2513,35 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
}
return true;
}
+ /* Expand
+ (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
+ Expand this data movement instead of simply forbid it since
+ we can improve the code generation for this following scenario
+ by RVV auto-vectorization:
+ (set (reg:V8QI 149) (vec_duplicate:V8QI (reg:QI))
+ (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
+ Since RVV mode and scalar mode are in different REG_CLASS,
+ we need to explicitly move data from V_REGS to GR_REGS by scalar move. */
+ if (SUBREG_P (src) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (src))))
+ {
+ rtx subreg = force_reg (GET_MODE (SUBREG_REG (src)), SUBREG_REG (src));
+ machine_mode imode = GET_MODE_INNER (GET_MODE (subreg));
+ unsigned int ratio = GET_MODE_SIZE (mode).to_constant ()
+ / GET_MODE_SIZE (imode).to_constant ();
+ poly_int64 nunits = GET_MODE_NUNITS (GET_MODE (subreg));
+ nunits = exact_div (nunits, ratio);
+ scalar_mode smode = as_a<scalar_mode> (mode);
+ machine_mode vmode
+ = riscv_vector::get_vector_mode (smode, nunits).require ();
+ rtx tmp = gen_reg_rtx (mode);
+ rtx index
+ = gen_int_mode (exact_div (SUBREG_BYTE (src), GET_MODE_SIZE (smode)),
+ Pmode);
+ emit_insn (gen_vec_extract (vmode, vmode, tmp,
+ gen_lowpart (vmode, subreg), index));
+ emit_move_insn (dest, tmp);
+ return true;
+ }
/* Expand
(set (reg:QI target) (mem:QI (address)))
to
@@ -29,4 +29,3 @@
TEST_ALL (VEC_PERM)
/* { dg-final { scan-assembler-times {viota.m} 2 } } */
-/* { dg-final { scan-assembler-not {vmv\.v\.i} } } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -Wno-int-conversion -Wno-implicit-function -Wno-incompatible-pointer-types -Wno-implicit-function-declaration -Ofast -ftree-vectorize" } */
+
+int d ();
+typedef struct
+{
+ int b;
+} c;
+int
+e (char *f, long g)
+{
+ f += g;
+ while (g--)
+ *--f = d;
+}
+
+int
+d (c * f)
+{
+ while (h ())
+ switch (f->b)
+ case 'Q':
+ {
+ long a;
+ e (&a, sizeof (a));
+ i (a);
+ }
+}