RISC-V: Optimize vsetvl AVL for VLS VLMAX auto-vectorization
Checks
Commit Message
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
This patch is optimizing the AVL for VLS auto-vectorzation.
Consider such case:
typedef int8_t vnx2qi __attribute__ ((vector_size (2)));
__attribute__ ((noipa)) void
f_vnx2qi (int8_t a, int8_t b, int8_t *out)
{
vnx2qi v = {a, b};
*(vnx2qi *) out = v;
}
Before this patch:
f_vnx2qi:
vsetvli a5,zero,e8,mf8,ta,ma
vmv.v.x v1,a0
vslide1down.vx v1,v1,a1
vse8.v v1,0(a2)
ret
After this patch:
f_vnx2qi:
vsetivli zero,2,e8,mf8,ta,ma
vmv.v.x v1,a0
vslide1down.vx v1,v1,a1
vse8.v v1,0(a2)
ret
gcc/ChangeLog:
* config/riscv/riscv-protos.h (emit_vlmax_vsetvl): Change argument type.
* config/riscv/riscv-v.cc (emit_vlmax_vsetvl): Optimize AVL for vlmax VLS.
(emit_vlmax_reg_op): Ditto.
* config/riscv/vector.md: Adapt argument.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/vf_avl-1.c: New test.
---
gcc/config/riscv/riscv-protos.h | 2 +-
gcc/config/riscv/riscv-v.cc | 25 +++++++++++++++----
gcc/config/riscv/vector.md | 4 +--
.../gcc.target/riscv/rvv/base/vf_avl-1.c | 15 +++++++++++
4 files changed, 38 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c
Comments
Hmmm here is alternative approach for this:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index b8dc333f54e1..c88056024e7d 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -50,6 +50,21 @@ using namespace riscv_vector;
namespace riscv_vector {
+/* Return true if vlmax is constant value instead and can be used
+ in vsetivli. */
+static bool
+const_vlmax_p (machine_mode mode)
+{
+ poly_uint64 nunits = GET_MODE_NUNITS (mode);
+
+ /* Only allowed in VLS-VLMAX mode. */
+ if (!nunits.is_constant ())
+ return false;
+
+ /* vsetivli can only hold 0~31. */
+ return (IN_RANGE (nunits.to_constant (), 0, 31));
+}
+
template <int MAX_OPERANDS> class insn_expander
{
public:
@@ -101,15 +116,26 @@ public:
void set_len_and_policy (rtx len, bool force_vlmax = false)
{
- bool vlmax_p = force_vlmax;
+ bool vlmax_p = force_vlmax || !len;
gcc_assert (has_dest);
- if (!len)
+ if (vlmax_p)
{
- vlmax_p = true;
- len = gen_reg_rtx (Pmode);
- emit_vlmax_vsetvl (dest_mode, len);
- }
+ if (const_vlmax_p (dest_mode))
+ {
+ /* Optimize VLS-VLMAX code gen, we can use vsetivli instead of
+ vsetvli to obtain the value of vlmax. */
+ poly_uint64 nunits = GET_MODE_NUNITS (dest_mode);
+ len = gen_int_mode (nunits, Pmode);
+ /* It has become NONVLMAX now. */
+ vlmax_p = false;
+ }
+ else if (!len)
+ {
+ len = gen_reg_rtx (Pmode);
+ emit_vlmax_vsetvl (dest_mode, len);
+ }
+ }
add_input_operand (len, Pmode);
Hi kito,
Per off-line sync with Juzhe, I can help the rest part of this PATCH, will try the suggested approach and keep you posted.
Pan
-----Original Message-----
From: Gcc-patches <gcc-patches-bounces+pan2.li=intel.com@gcc.gnu.org> On Behalf Of Kito Cheng via Gcc-patches
Sent: Saturday, May 13, 2023 11:14 AM
To: juzhe.zhong@rivai.ai
Cc: gcc-patches@gcc.gnu.org; palmer@dabbelt.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH] RISC-V: Optimize vsetvl AVL for VLS VLMAX auto-vectorization
Hmmm here is alternative approach for this:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index b8dc333f54e1..c88056024e7d 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -50,6 +50,21 @@ using namespace riscv_vector;
namespace riscv_vector {
+/* Return true if vlmax is constant value instead and can be used
+ in vsetivli. */
+static bool
+const_vlmax_p (machine_mode mode)
+{
+ poly_uint64 nunits = GET_MODE_NUNITS (mode);
+
+ /* Only allowed in VLS-VLMAX mode. */ if (!nunits.is_constant ())
+ return false;
+
+ /* vsetivli can only hold 0~31. */
+ return (IN_RANGE (nunits.to_constant (), 0, 31)); }
+
template <int MAX_OPERANDS> class insn_expander {
public:
@@ -101,15 +116,26 @@ public:
void set_len_and_policy (rtx len, bool force_vlmax = false)
{
- bool vlmax_p = force_vlmax;
+ bool vlmax_p = force_vlmax || !len;
gcc_assert (has_dest);
- if (!len)
+ if (vlmax_p)
{
- vlmax_p = true;
- len = gen_reg_rtx (Pmode);
- emit_vlmax_vsetvl (dest_mode, len);
- }
+ if (const_vlmax_p (dest_mode))
+ {
+ /* Optimize VLS-VLMAX code gen, we can use vsetivli instead of
+ vsetvli to obtain the value of vlmax. */
+ poly_uint64 nunits = GET_MODE_NUNITS (dest_mode);
+ len = gen_int_mode (nunits, Pmode);
+ /* It has become NONVLMAX now. */
+ vlmax_p = false;
+ }
+ else if (!len)
+ {
+ len = gen_reg_rtx (Pmode);
+ emit_vlmax_vsetvl (dest_mode, len);
+ }
+ }
add_input_operand (len, Pmode);
--
On Sat, May 13, 2023 at 10:09 AM <juzhe.zhong@rivai.ai> wrote:
>
> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
>
> This patch is optimizing the AVL for VLS auto-vectorzation.
>
> Consider such case:
>
> typedef int8_t vnx2qi __attribute__ ((vector_size (2)));
>
> __attribute__ ((noipa)) void
> f_vnx2qi (int8_t a, int8_t b, int8_t *out) {
> vnx2qi v = {a, b};
> *(vnx2qi *) out = v;
> }
>
> Before this patch:
>
> f_vnx2qi:
> vsetvli a5,zero,e8,mf8,ta,ma
> vmv.v.x v1,a0
> vslide1down.vx v1,v1,a1
> vse8.v v1,0(a2)
> ret
>
> After this patch:
>
> f_vnx2qi:
> vsetivli zero,2,e8,mf8,ta,ma
> vmv.v.x v1,a0
> vslide1down.vx v1,v1,a1
> vse8.v v1,0(a2)
> ret
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-protos.h (emit_vlmax_vsetvl): Change argument type.
> * config/riscv/riscv-v.cc (emit_vlmax_vsetvl): Optimize AVL for vlmax VLS.
> (emit_vlmax_reg_op): Ditto.
> * config/riscv/vector.md: Adapt argument.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/vf_avl-1.c: New test.
>
> ---
> gcc/config/riscv/riscv-protos.h | 2 +-
> gcc/config/riscv/riscv-v.cc | 25 +++++++++++++++----
> gcc/config/riscv/vector.md | 4 +--
> .../gcc.target/riscv/rvv/base/vf_avl-1.c | 15 +++++++++++
> 4 files changed, 38 insertions(+), 8 deletions(-) create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c
>
> diff --git a/gcc/config/riscv/riscv-protos.h
> b/gcc/config/riscv/riscv-protos.h index bc71f9cbbba..90934d43430
> 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -164,7 +164,7 @@ bool check_builtin_call (location_t, vec<location_t>, unsigned int,
> tree, unsigned int, tree *); bool
> const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
> bool legitimize_move (rtx, rtx, machine_mode); -void emit_vlmax_vsetvl
> (machine_mode, rtx);
> +void emit_vlmax_vsetvl (machine_mode, rtx *);
> void emit_hard_vlmax_vsetvl (machine_mode, rtx); void emit_vlmax_op
> (unsigned, rtx, rtx, machine_mode); void emit_vlmax_reg_op (unsigned,
> rtx, rtx, rtx, machine_mode); diff --git a/gcc/config/riscv/riscv-v.cc
> b/gcc/config/riscv/riscv-v.cc index d844c305320..382cc4b6311 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -120,7 +120,9 @@ public:
> {
> vlmax_p = true;
> len = gen_reg_rtx (Pmode);
> - emit_vlmax_vsetvl (dest_mode, len);
> + emit_vlmax_vsetvl (dest_mode, &len);
> + if (CONST_INT_P (len))
> + vlmax_p = false;
> }
>
> add_input_operand (len, Pmode);
> @@ -183,16 +185,29 @@ emit_hard_vlmax_vsetvl (machine_mode vmode, rtx
> vl) }
>
> void
> -emit_vlmax_vsetvl (machine_mode vmode, rtx vl)
> +emit_vlmax_vsetvl (machine_mode vmode, rtx *vl)
> {
> unsigned int sew = get_sew (vmode);
> enum vlmul_type vlmul = get_vlmul (vmode);
> unsigned int ratio = calculate_ratio (sew, vlmul);
>
> + /* For VLS VLMAX auto-vectorization, we change
> + VL into const_int value of VF so that we
> + will emit "vsetivli zero, CONST_INT" instead of
> + "vsetvli a5, zero".
> +
> + TODO: Support VLS min-length in the future. */
> + poly_uint64 nunits = GET_MODE_NUNITS (vmode); if
> + (nunits.is_constant () && IN_RANGE (nunits.to_constant (), 0, 31))
> + {
> + *vl = gen_int_mode (nunits, Pmode);
> + return;
> + }
> +
> if (!optimize)
> - emit_hard_vlmax_vsetvl (vmode, vl);
> + emit_hard_vlmax_vsetvl (vmode, *vl);
> else
> - emit_insn (gen_vlmax_avl (Pmode, vl, gen_int_mode (ratio, Pmode)));
> + emit_insn (gen_vlmax_avl (Pmode, *vl, gen_int_mode (ratio,
> + Pmode)));
> }
>
> /* Calculate SEW/LMUL ratio. */
> @@ -323,7 +338,7 @@ emit_vlmax_reg_op (unsigned icode, rtx dest, rtx src, rtx len,
> machine_mode mask_mode) {
> emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode,
> - /* Force VLMAX */ true);
> + /* Force VLMAX */ CONST_INT_P (len) ? false : true);
> }
>
> void
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index 328fce8d632..b02ecf92c00 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -720,7 +720,7 @@
> emit_insn (gen_rtx_SET (operands[0], operands[1]));
> else
> {
> - riscv_vector::emit_vlmax_vsetvl (<V_FRACT:MODE>mode, operands[2]);
> + riscv_vector::emit_vlmax_vsetvl (<V_FRACT:MODE>mode,
> + &operands[2]);
> riscv_vector::emit_vlmax_reg_op (code_for_pred_mov (<V_FRACT:MODE>mode),
> operands[0], operands[1], operands[2],
> <VM>mode); @@ -741,7 +741,7 @@
> emit_insn (gen_rtx_SET (operands[0], operands[1]));
> else
> {
> - riscv_vector::emit_vlmax_vsetvl (<VB:MODE>mode, operands[2]);
> + riscv_vector::emit_vlmax_vsetvl (<VB:MODE>mode, &operands[2]);
> riscv_vector::emit_vlmax_reg_op (code_for_pred_mov (<VB:MODE>mode),
> operands[0], operands[1], operands[2],
> <VB:MODE>mode); diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c
> new file mode 100644
> index 00000000000..11adf6bc611
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-1.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv32gcv -mabi=ilp32d --param
> +riscv-autovec-preference=fixed-vlmax" } */
> +
> +#include <stdint-gcc.h>
> +
> +typedef int8_t vnx2qi __attribute__ ((vector_size (2)));
> +
> +__attribute__ ((noipa)) void
> +f_vnx2qi (int8_t a, int8_t b, int8_t *out) {
> + vnx2qi v = {a, b};
> + *(vnx2qi *) out = v;
> +}
> +
> +/* { dg-final { scan-assembler-times
> +{vsetivli\s+zero,\s*2,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 } } */
> --
> 2.36.1
>
@@ -164,7 +164,7 @@ bool check_builtin_call (location_t, vec<location_t>, unsigned int,
tree, unsigned int, tree *);
bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
bool legitimize_move (rtx, rtx, machine_mode);
-void emit_vlmax_vsetvl (machine_mode, rtx);
+void emit_vlmax_vsetvl (machine_mode, rtx *);
void emit_hard_vlmax_vsetvl (machine_mode, rtx);
void emit_vlmax_op (unsigned, rtx, rtx, machine_mode);
void emit_vlmax_reg_op (unsigned, rtx, rtx, rtx, machine_mode);
@@ -120,7 +120,9 @@ public:
{
vlmax_p = true;
len = gen_reg_rtx (Pmode);
- emit_vlmax_vsetvl (dest_mode, len);
+ emit_vlmax_vsetvl (dest_mode, &len);
+ if (CONST_INT_P (len))
+ vlmax_p = false;
}
add_input_operand (len, Pmode);
@@ -183,16 +185,29 @@ emit_hard_vlmax_vsetvl (machine_mode vmode, rtx vl)
}
void
-emit_vlmax_vsetvl (machine_mode vmode, rtx vl)
+emit_vlmax_vsetvl (machine_mode vmode, rtx *vl)
{
unsigned int sew = get_sew (vmode);
enum vlmul_type vlmul = get_vlmul (vmode);
unsigned int ratio = calculate_ratio (sew, vlmul);
+ /* For VLS VLMAX auto-vectorization, we change
+ VL into const_int value of VF so that we
+ will emit "vsetivli zero, CONST_INT" instead of
+ "vsetvli a5, zero".
+
+ TODO: Support VLS min-length in the future. */
+ poly_uint64 nunits = GET_MODE_NUNITS (vmode);
+ if (nunits.is_constant () && IN_RANGE (nunits.to_constant (), 0, 31))
+ {
+ *vl = gen_int_mode (nunits, Pmode);
+ return;
+ }
+
if (!optimize)
- emit_hard_vlmax_vsetvl (vmode, vl);
+ emit_hard_vlmax_vsetvl (vmode, *vl);
else
- emit_insn (gen_vlmax_avl (Pmode, vl, gen_int_mode (ratio, Pmode)));
+ emit_insn (gen_vlmax_avl (Pmode, *vl, gen_int_mode (ratio, Pmode)));
}
/* Calculate SEW/LMUL ratio. */
@@ -323,7 +338,7 @@ emit_vlmax_reg_op (unsigned icode, rtx dest, rtx src, rtx len,
machine_mode mask_mode)
{
emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode,
- /* Force VLMAX */ true);
+ /* Force VLMAX */ CONST_INT_P (len) ? false : true);
}
void
@@ -720,7 +720,7 @@
emit_insn (gen_rtx_SET (operands[0], operands[1]));
else
{
- riscv_vector::emit_vlmax_vsetvl (<V_FRACT:MODE>mode, operands[2]);
+ riscv_vector::emit_vlmax_vsetvl (<V_FRACT:MODE>mode, &operands[2]);
riscv_vector::emit_vlmax_reg_op (code_for_pred_mov (<V_FRACT:MODE>mode),
operands[0], operands[1], operands[2],
<VM>mode);
@@ -741,7 +741,7 @@
emit_insn (gen_rtx_SET (operands[0], operands[1]));
else
{
- riscv_vector::emit_vlmax_vsetvl (<VB:MODE>mode, operands[2]);
+ riscv_vector::emit_vlmax_vsetvl (<VB:MODE>mode, &operands[2]);
riscv_vector::emit_vlmax_reg_op (code_for_pred_mov (<VB:MODE>mode),
operands[0], operands[1], operands[2],
<VB:MODE>mode);
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+
+typedef int8_t vnx2qi __attribute__ ((vector_size (2)));
+
+__attribute__ ((noipa)) void
+f_vnx2qi (int8_t a, int8_t b, int8_t *out)
+{
+ vnx2qi v = {a, b};
+ *(vnx2qi *) out = v;
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*2,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 } } */