RISC-V: Enable select_vl for RVV auto-vectorization
Checks
Commit Message
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
Consider this following example:
void vec_add(int32_t *restrict c, int32_t *restrict a, int32_t *restrict b,
int N) {
for (long i = 0; i < N; i++) {
c[i] = a[i] + b[i];
}
}
After this patch:
vec_add:
ble a3,zero,.L5
.L3:
vsetvli a5,a3,e32,m1,ta,ma
vle32.v v2,0(a1)
vle32.v v1,0(a2)
vsetvli a6,zero,e32,m1,ta,ma ===> redundant vsetvl.
slli a4,a5,2
vadd.vv v1,v1,v2
sub a3,a3,a5
vsetvli zero,a5,e32,m1,ta,ma ===> redundant vsetvl.
vse32.v v1,0(a0)
add a1,a1,a4
add a2,a2,a4
add a0,a0,a4
bne a3,zero,.L3
.L5:
ret
We can get close-to-optimal codegen but with some redundant vsetvls.
This is not the big issue which will be easily addressed in RISC-V backend.
I am going to add a standalone PASS "AVL propagation" (avlprop) to addresse
such issue.
gcc/ChangeLog:
* config/riscv/autovec.md (select_vl<mode>): New pattern.
* config/riscv/riscv-protos.h (expand_select_vl): New function.
* config/riscv/riscv-v.cc (expand_select_vl): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/ternop/ternop-2.c: Adapt test.
* gcc.target/riscv/rvv/autovec/ternop/ternop-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/partial/select_vl-1.c: New test.
---
gcc/config/riscv/autovec.md | 14 ++++++++++
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv-v.cc | 12 +++++++++
.../riscv/rvv/autovec/partial/select_vl-1.c | 26 +++++++++++++++++++
.../riscv/rvv/autovec/ternop/ternop-2.c | 2 +-
.../riscv/rvv/autovec/ternop/ternop-5.c | 2 +-
6 files changed, 55 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c
Comments
LGTM, thanks for this!!!!
On Sat, Jun 10, 2023 at 8:42 AM <juzhe.zhong@rivai.ai> wrote:
>
> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
>
> Consider this following example:
> void vec_add(int32_t *restrict c, int32_t *restrict a, int32_t *restrict b,
> int N) {
> for (long i = 0; i < N; i++) {
> c[i] = a[i] + b[i];
> }
> }
>
> After this patch:
> vec_add:
> ble a3,zero,.L5
> .L3:
> vsetvli a5,a3,e32,m1,ta,ma
> vle32.v v2,0(a1)
> vle32.v v1,0(a2)
> vsetvli a6,zero,e32,m1,ta,ma ===> redundant vsetvl.
> slli a4,a5,2
> vadd.vv v1,v1,v2
> sub a3,a3,a5
> vsetvli zero,a5,e32,m1,ta,ma ===> redundant vsetvl.
> vse32.v v1,0(a0)
> add a1,a1,a4
> add a2,a2,a4
> add a0,a0,a4
> bne a3,zero,.L3
> .L5:
> ret
>
> We can get close-to-optimal codegen but with some redundant vsetvls.
> This is not the big issue which will be easily addressed in RISC-V backend.
>
> I am going to add a standalone PASS "AVL propagation" (avlprop) to addresse
> such issue.
>
> gcc/ChangeLog:
>
> * config/riscv/autovec.md (select_vl<mode>): New pattern.
> * config/riscv/riscv-protos.h (expand_select_vl): New function.
> * config/riscv/riscv-v.cc (expand_select_vl): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/ternop/ternop-2.c: Adapt test.
> * gcc.target/riscv/rvv/autovec/ternop/ternop-5.c: Ditto.
> * gcc.target/riscv/rvv/autovec/partial/select_vl-1.c: New test.
>
> ---
> gcc/config/riscv/autovec.md | 14 ++++++++++
> gcc/config/riscv/riscv-protos.h | 1 +
> gcc/config/riscv/riscv-v.cc | 12 +++++++++
> .../riscv/rvv/autovec/partial/select_vl-1.c | 26 +++++++++++++++++++
> .../riscv/rvv/autovec/ternop/ternop-2.c | 2 +-
> .../riscv/rvv/autovec/ternop/ternop-5.c | 2 +-
> 6 files changed, 55 insertions(+), 2 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c
>
> diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
> index 9f4492db23c..b7070099f29 100644
> --- a/gcc/config/riscv/autovec.md
> +++ b/gcc/config/riscv/autovec.md
> @@ -626,3 +626,17 @@
> }
> [(set_attr "type" "vimuladd")
> (set_attr "mode" "<MODE>")])
> +
> +;; =========================================================================
> +;; == SELECT_VL
> +;; =========================================================================
> +
> +(define_expand "select_vl<mode>"
> + [(match_operand:P 0 "register_operand")
> + (match_operand:P 1 "vector_length_operand")
> + (match_operand:P 2 "")]
> + "TARGET_VECTOR"
> +{
> + riscv_vector::expand_select_vl (operands);
> + DONE;
> +})
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> index 66c1f535d60..6db3a46c682 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -246,6 +246,7 @@ void expand_vec_series (rtx, rtx, rtx);
> void expand_vec_init (rtx, rtx);
> void expand_vcond (rtx *);
> void expand_vec_perm (rtx, rtx, rtx, rtx);
> +void expand_select_vl (rtx *);
> /* Rounding mode bitfield for fixed point VXRM. */
> enum vxrm_field_enum
> {
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 477a22cd2b0..e1b85a5af91 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -2447,4 +2447,16 @@ expand_vec_perm_const (machine_mode vmode, machine_mode op_mode, rtx target,
> return ret;
> }
>
> +/* Generate no side effects vsetvl to get the vector length. */
> +void
> +expand_select_vl (rtx *ops)
> +{
> + poly_int64 nunits = rtx_to_poly_int64 (ops[2]);
> + /* We arbitrary picked QImode as inner scalar mode to get vector mode.
> + since vsetvl only demand ratio. We let VSETVL PASS to optimize it. */
> + scalar_int_mode mode = QImode;
> + machine_mode rvv_mode = get_vector_mode (mode, nunits).require ();
> + emit_insn (gen_no_side_effects_vsetvl_rtx (rvv_mode, ops[0], ops[1]));
> +}
> +
> } // namespace riscv_vector
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c
> new file mode 100644
> index 00000000000..74bbf40ee9f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fno-vect-cost-model -fno-tree-loop-distribute-patterns -fdump-tree-optimized-details" } */
> +
> +#include <stdint-gcc.h>
> +
> +#define TEST_TYPE(TYPE) \
> + __attribute__ ((noipa)) void select_vl_##TYPE (TYPE *__restrict dst, \
> + TYPE *__restrict a, int n) \
> + { \
> + for (int i = 0; i < n; i++) \
> + dst[i] = a[i]; \
> + }
> +
> +#define TEST_ALL() \
> + TEST_TYPE (int8_t) \
> + TEST_TYPE (uint8_t) \
> + TEST_TYPE (int16_t) \
> + TEST_TYPE (uint16_t) \
> + TEST_TYPE (int32_t) \
> + TEST_TYPE (uint32_t) \
> + TEST_TYPE (int64_t) \
> + TEST_TYPE (uint64_t) \
> + TEST_TYPE (float) \
> + TEST_TYPE (double)
> +
> +TEST_ALL ()
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-2.c
> index 89eeaf6315f..e52e07ddd09 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-2.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-2.c
> @@ -1,5 +1,5 @@
> /* { dg-do compile } */
> -/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-schedule-insns" } */
>
> #include <stdint-gcc.h>
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-5.c
> index a9a7198feb4..49c85efbf3a 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-5.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-5.c
> @@ -1,5 +1,5 @@
> /* { dg-do compile } */
> -/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-schedule-insns" } */
>
> #include <stdint-gcc.h>
>
> --
> 2.36.3
>
Committed, thanks Kito.
Pan
-----Original Message-----
From: Gcc-patches <gcc-patches-bounces+pan2.li=intel.com@gcc.gnu.org> On Behalf Of Kito Cheng via Gcc-patches
Sent: Saturday, June 10, 2023 11:03 AM
To: juzhe.zhong@rivai.ai
Cc: gcc-patches@gcc.gnu.org; kito.cheng@sifive.com; palmer@rivosinc.com; rdapp.gcc@gmail.com; jeffreyalaw@gmail.com
Subject: Re: [PATCH] RISC-V: Enable select_vl for RVV auto-vectorization
LGTM, thanks for this!!!!
On Sat, Jun 10, 2023 at 8:42 AM <juzhe.zhong@rivai.ai> wrote:
>
> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
>
> Consider this following example:
> void vec_add(int32_t *restrict c, int32_t *restrict a, int32_t *restrict b,
> int N) {
> for (long i = 0; i < N; i++) {
> c[i] = a[i] + b[i];
> }
> }
>
> After this patch:
> vec_add:
> ble a3,zero,.L5
> .L3:
> vsetvli a5,a3,e32,m1,ta,ma
> vle32.v v2,0(a1)
> vle32.v v1,0(a2)
> vsetvli a6,zero,e32,m1,ta,ma ===> redundant vsetvl.
> slli a4,a5,2
> vadd.vv v1,v1,v2
> sub a3,a3,a5
> vsetvli zero,a5,e32,m1,ta,ma ===> redundant vsetvl.
> vse32.v v1,0(a0)
> add a1,a1,a4
> add a2,a2,a4
> add a0,a0,a4
> bne a3,zero,.L3
> .L5:
> ret
>
> We can get close-to-optimal codegen but with some redundant vsetvls.
> This is not the big issue which will be easily addressed in RISC-V backend.
>
> I am going to add a standalone PASS "AVL propagation" (avlprop) to
> addresse such issue.
>
> gcc/ChangeLog:
>
> * config/riscv/autovec.md (select_vl<mode>): New pattern.
> * config/riscv/riscv-protos.h (expand_select_vl): New function.
> * config/riscv/riscv-v.cc (expand_select_vl): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/ternop/ternop-2.c: Adapt test.
> * gcc.target/riscv/rvv/autovec/ternop/ternop-5.c: Ditto.
> * gcc.target/riscv/rvv/autovec/partial/select_vl-1.c: New test.
>
> ---
> gcc/config/riscv/autovec.md | 14 ++++++++++
> gcc/config/riscv/riscv-protos.h | 1 +
> gcc/config/riscv/riscv-v.cc | 12 +++++++++
> .../riscv/rvv/autovec/partial/select_vl-1.c | 26 +++++++++++++++++++
> .../riscv/rvv/autovec/ternop/ternop-2.c | 2 +-
> .../riscv/rvv/autovec/ternop/ternop-5.c | 2 +-
> 6 files changed, 55 insertions(+), 2 deletions(-) create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c
>
> diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
> index 9f4492db23c..b7070099f29 100644
> --- a/gcc/config/riscv/autovec.md
> +++ b/gcc/config/riscv/autovec.md
> @@ -626,3 +626,17 @@
> }
> [(set_attr "type" "vimuladd")
> (set_attr "mode" "<MODE>")])
> +
> +;;
> +=====================================================================
> +====
> +;; == SELECT_VL
> +;;
> +=====================================================================
> +====
> +
> +(define_expand "select_vl<mode>"
> + [(match_operand:P 0 "register_operand")
> + (match_operand:P 1 "vector_length_operand")
> + (match_operand:P 2 "")]
> + "TARGET_VECTOR"
> +{
> + riscv_vector::expand_select_vl (operands);
> + DONE;
> +})
> diff --git a/gcc/config/riscv/riscv-protos.h
> b/gcc/config/riscv/riscv-protos.h index 66c1f535d60..6db3a46c682
> 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -246,6 +246,7 @@ void expand_vec_series (rtx, rtx, rtx); void
> expand_vec_init (rtx, rtx); void expand_vcond (rtx *); void
> expand_vec_perm (rtx, rtx, rtx, rtx);
> +void expand_select_vl (rtx *);
> /* Rounding mode bitfield for fixed point VXRM. */ enum
> vxrm_field_enum { diff --git a/gcc/config/riscv/riscv-v.cc
> b/gcc/config/riscv/riscv-v.cc index 477a22cd2b0..e1b85a5af91 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -2447,4 +2447,16 @@ expand_vec_perm_const (machine_mode vmode, machine_mode op_mode, rtx target,
> return ret;
> }
>
> +/* Generate no side effects vsetvl to get the vector length. */ void
> +expand_select_vl (rtx *ops) {
> + poly_int64 nunits = rtx_to_poly_int64 (ops[2]);
> + /* We arbitrary picked QImode as inner scalar mode to get vector mode.
> + since vsetvl only demand ratio. We let VSETVL PASS to optimize
> +it. */
> + scalar_int_mode mode = QImode;
> + machine_mode rvv_mode = get_vector_mode (mode, nunits).require ();
> + emit_insn (gen_no_side_effects_vsetvl_rtx (rvv_mode, ops[0],
> +ops[1])); }
> +
> } // namespace riscv_vector
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c
> new file mode 100644
> index 00000000000..74bbf40ee9f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param
> +riscv-autovec-preference=scalable -fno-vect-cost-model
> +-fno-tree-loop-distribute-patterns -fdump-tree-optimized-details" }
> +*/
> +
> +#include <stdint-gcc.h>
> +
> +#define TEST_TYPE(TYPE) \
> + __attribute__ ((noipa)) void select_vl_##TYPE (TYPE *__restrict dst, \
> + TYPE *__restrict a, int n) \
> + { \
> + for (int i = 0; i < n; i++) \
> + dst[i] = a[i]; \
> + }
> +
> +#define TEST_ALL() \
> + TEST_TYPE (int8_t) \
> + TEST_TYPE (uint8_t) \
> + TEST_TYPE (int16_t) \
> + TEST_TYPE (uint16_t) \
> + TEST_TYPE (int32_t) \
> + TEST_TYPE (uint32_t) \
> + TEST_TYPE (int64_t) \
> + TEST_TYPE (uint64_t) \
> + TEST_TYPE (float) \
> + TEST_TYPE (double)
> +
> +TEST_ALL ()
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-2.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-2.c
> index 89eeaf6315f..e52e07ddd09 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-2.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-2.c
> @@ -1,5 +1,5 @@
> /* { dg-do compile } */
> -/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d
> --param=riscv-autovec-preference=scalable" } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d
> +--param=riscv-autovec-preference=scalable -fno-schedule-insns" } */
>
> #include <stdint-gcc.h>
>
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-5.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-5.c
> index a9a7198feb4..49c85efbf3a 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-5.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-5.c
> @@ -1,5 +1,5 @@
> /* { dg-do compile } */
> -/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d
> --param=riscv-autovec-preference=scalable" } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d
> +--param=riscv-autovec-preference=scalable -fno-schedule-insns" } */
>
> #include <stdint-gcc.h>
>
> --
> 2.36.3
>
@@ -626,3 +626,17 @@
}
[(set_attr "type" "vimuladd")
(set_attr "mode" "<MODE>")])
+
+;; =========================================================================
+;; == SELECT_VL
+;; =========================================================================
+
+(define_expand "select_vl<mode>"
+ [(match_operand:P 0 "register_operand")
+ (match_operand:P 1 "vector_length_operand")
+ (match_operand:P 2 "")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_select_vl (operands);
+ DONE;
+})
@@ -246,6 +246,7 @@ void expand_vec_series (rtx, rtx, rtx);
void expand_vec_init (rtx, rtx);
void expand_vcond (rtx *);
void expand_vec_perm (rtx, rtx, rtx, rtx);
+void expand_select_vl (rtx *);
/* Rounding mode bitfield for fixed point VXRM. */
enum vxrm_field_enum
{
@@ -2447,4 +2447,16 @@ expand_vec_perm_const (machine_mode vmode, machine_mode op_mode, rtx target,
return ret;
}
+/* Generate no side effects vsetvl to get the vector length. */
+void
+expand_select_vl (rtx *ops)
+{
+ poly_int64 nunits = rtx_to_poly_int64 (ops[2]);
+ /* We arbitrary picked QImode as inner scalar mode to get vector mode.
+ since vsetvl only demand ratio. We let VSETVL PASS to optimize it. */
+ scalar_int_mode mode = QImode;
+ machine_mode rvv_mode = get_vector_mode (mode, nunits).require ();
+ emit_insn (gen_no_side_effects_vsetvl_rtx (rvv_mode, ops[0], ops[1]));
+}
+
} // namespace riscv_vector
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fno-vect-cost-model -fno-tree-loop-distribute-patterns -fdump-tree-optimized-details" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE) \
+ __attribute__ ((noipa)) void select_vl_##TYPE (TYPE *__restrict dst, \
+ TYPE *__restrict a, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] = a[i]; \
+ }
+
+#define TEST_ALL() \
+ TEST_TYPE (int8_t) \
+ TEST_TYPE (uint8_t) \
+ TEST_TYPE (int16_t) \
+ TEST_TYPE (uint16_t) \
+ TEST_TYPE (int32_t) \
+ TEST_TYPE (uint32_t) \
+ TEST_TYPE (int64_t) \
+ TEST_TYPE (uint64_t) \
+ TEST_TYPE (float) \
+ TEST_TYPE (double)
+
+TEST_ALL ()
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-schedule-insns" } */
#include <stdint-gcc.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-schedule-insns" } */
#include <stdint-gcc.h>