[GCC14,QUEUE] RISC-V: Eliminate redundant vsetvli for duplicate AVL def
Checks
Commit Message
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
void f (int8_t* base1,int8_t* base2,int8_t* out,int n)
{
vint8mf4_t v = __riscv_vle8_v_i8mf4 (base1, 32);
for (int i = 0; i < n; i++){
v = __riscv_vor_vx_i8mf4 (v, 101, 32);
v = __riscv_vle8_v_i8mf4_tu (v, base2, 32);
}
__riscv_vse8_v_i8mf4 (out, v, 32);
}
before this patch:
f:
li a5,32
vsetvli zero,a5,e8,mf4,tu,ma
vle8.v v1,0(a0)
ble a3,zero,.L2
li t0,0
li a0,101
.L3:
addiw t0,t0,1
vor.vx v1,v1,a0
vle8.v v1,0(a1)
bne a3,t0,.L3
.L2:
vsetvli zero,zero,e8,mf4,tu,ma
vse8.v v1,0(a2)
ret
afther this patch:
f:
li a5,32
vsetvli zero,a5,e8,mf4,tu,ma
vle8.v v1,0(a0)
ble a3,zero,.L2
li t0,0
li a0,101
.L3:
addiw t0,t0,1
vor.vx v1,v1,a0
vle8.v v1,0(a1)
bne a3,t0,.L3
.L2:
vse8.v v1,0(a2)
ret
gcc/ChangeLog:
* config/riscv/riscv-vsetvl.cc (vector_infos_manager::all_avail_in_compatible_p): New function.
(pass_vsetvl::refine_vsetvls): Remove redundant vsetvli.
* config/riscv/riscv-vsetvl.h: New function declare.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/vsetvl/avl_single-102.c: New test.
---
gcc/config/riscv/riscv-vsetvl.cc | 67 ++++++++++++++++++-
gcc/config/riscv/riscv-vsetvl.h | 1 +
.../riscv/rvv/vsetvl/avl_single-102.c | 16 +++++
3 files changed, 81 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-102.c
Comments
On 3/27/23 19:01, juzhe.zhong@rivai.ai wrote:
> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
>
> void f (int8_t* base1,int8_t* base2,int8_t* out,int n)
> {
> vint8mf4_t v = __riscv_vle8_v_i8mf4 (base1, 32);
> for (int i = 0; i < n; i++){
> v = __riscv_vor_vx_i8mf4 (v, 101, 32);
> v = __riscv_vle8_v_i8mf4_tu (v, base2, 32);
> }
> __riscv_vse8_v_i8mf4 (out, v, 32);
> }
>
> before this patch:
> f:
> li a5,32
> vsetvli zero,a5,e8,mf4,tu,ma
> vle8.v v1,0(a0)
> ble a3,zero,.L2
> li t0,0
> li a0,101
> .L3:
> addiw t0,t0,1
> vor.vx v1,v1,a0
> vle8.v v1,0(a1)
> bne a3,t0,.L3
> .L2:
> vsetvli zero,zero,e8,mf4,tu,ma
> vse8.v v1,0(a2)
> ret
>
>
> afther this patch:
>
> f:
> li a5,32
> vsetvli zero,a5,e8,mf4,tu,ma
> vle8.v v1,0(a0)
> ble a3,zero,.L2
> li t0,0
> li a0,101
> .L3:
> addiw t0,t0,1
> vor.vx v1,v1,a0
> vle8.v v1,0(a1)
> bne a3,t0,.L3
> .L2:
> vse8.v v1,0(a2)
> ret
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vsetvl.cc (vector_infos_manager::all_avail_in_compatible_p): New function.
> (pass_vsetvl::refine_vsetvls): Remove redundant vsetvli.
> * config/riscv/riscv-vsetvl.h: New function declare.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/vsetvl/avl_single-102.c: New test.
>
> ---
> gcc/config/riscv/riscv-vsetvl.cc | 67 ++++++++++++++++++-
> gcc/config/riscv/riscv-vsetvl.h | 1 +
> .../riscv/rvv/vsetvl/avl_single-102.c | 16 +++++
> 3 files changed, 81 insertions(+), 3 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-102.c
>
> diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
> index 4948e5d4c5e..58568b45010 100644
> --- a/gcc/config/riscv/riscv-vsetvl.cc
> +++ b/gcc/config/riscv/riscv-vsetvl.cc
> @@ -2376,6 +2376,23 @@ vector_infos_manager::all_empty_predecessor_p (const basic_block cfg_bb) const
> return true;
> }
>
> +bool
> +vector_infos_manager::all_avail_in_compatible_p (const basic_block cfg_bb) const
This needs a function comment. Perhaps:
/* Return TRUE if the incoming vector configuration state
to CFG_BB is compatible with the vector configuration
state in CFG_BB, FALSE otherwise. */
> +
> + /* Optimize such case:
> + void f (int8_t* base1,int8_t* base2,int8_t* out,int n)
> + {
> + vint8mf4_t v = __riscv_vle8_v_i8mf4 (base1, 32);
> + for (int i = 0; i < n; i++){
> + v = __riscv_vor_vx_i8mf4 (v, 101, 32);
> + v = __riscv_vle8_v_i8mf4_tu (v, base2, 32);
> + }
> + __riscv_vse8_v_i8mf4 (out, v, 32);
> + }
In general I would suggest rather than writing code like this in the
comments, instead describe the properties you're looking for. That way
someone who may not be a RISC-V expert can more easily interpret the
scenario you're looking for and what action you want to take when the
scenario is discovered.
In this particular case it look like you're trying to describe the
scenario where all incoming edges to a block have a vector state that is
compatbile with the block. In such a case we need not emit a vsetvl in
the current block.
THe right place for code is in the testsuite.
So generally OK, though you do need to adjust the comments slightly.
Please do that and repost for a final review/ACK.
Thanks,
Jeff
@@ -2376,6 +2376,23 @@ vector_infos_manager::all_empty_predecessor_p (const basic_block cfg_bb) const
return true;
}
+bool
+vector_infos_manager::all_avail_in_compatible_p (const basic_block cfg_bb) const
+{
+ const auto &info = vector_block_infos[cfg_bb->index].local_dem;
+ sbitmap avin = vector_avin[cfg_bb->index];
+ unsigned int bb_index;
+ sbitmap_iterator sbi;
+ EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
+ {
+ const auto &avin_info
+ = static_cast<const vl_vtype_info &> (*vector_exprs[bb_index]);
+ if (!info.compatible_p (avin_info))
+ return false;
+ }
+ return true;
+}
+
bool
vector_infos_manager::all_same_avl_p (const basic_block cfg_bb,
sbitmap bitdata) const
@@ -3741,9 +3758,53 @@ pass_vsetvl::refine_vsetvls (void) const
m_vector_manager->to_refine_vsetvls.add (rinsn);
continue;
}
- rinsn = PREV_INSN (rinsn);
- rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
- change_insn (rinsn, new_pat);
+
+ /* Optimize such case:
+ void f (int8_t* base1,int8_t* base2,int8_t* out,int n)
+ {
+ vint8mf4_t v = __riscv_vle8_v_i8mf4 (base1, 32);
+ for (int i = 0; i < n; i++){
+ v = __riscv_vor_vx_i8mf4 (v, 101, 32);
+ v = __riscv_vle8_v_i8mf4_tu (v, base2, 32);
+ }
+ __riscv_vse8_v_i8mf4 (out, v, 32);
+ }
+
+ f:
+ li a5,32
+ vsetvli zero,a5,e8,mf4,tu,ma
+ vle8.v v1,0(a0)
+ ble a3,zero,.L2
+ li t0,0
+ li a0,101
+ .L3:
+ addiw t0,t0,1
+ vor.vx v1,v1,a0
+ vle8.v v1,0(a1)
+ bne a3,t0,.L3
+ .L2:
+ vsetvli zero,zero,e8,mf4,tu,ma
+ vse8.v v1,0(a2)
+ ret
+
+ The second vsetvli is redundant. */
+
+ gcc_assert (has_vtype_op (insn->rtl ()));
+ rinsn = PREV_INSN (insn->rtl ());
+ gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ())));
+ if (m_vector_manager->all_avail_in_compatible_p (cfg_bb))
+ {
+ size_t id = m_vector_manager->get_expr_id (info);
+ if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], id))
+ continue;
+ eliminate_insn (rinsn);
+ }
+ else
+ {
+ rtx new_pat
+ = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
+ change_insn (rinsn, new_pat);
+ }
}
}
@@ -451,6 +451,7 @@ public:
bool all_same_ratio_p (sbitmap) const;
bool all_empty_predecessor_p (const basic_block) const;
+ bool all_avail_in_compatible_p (const basic_block) const;
void release (void);
void create_bitmap_vectors (void);
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t* base1,int8_t* base2,int8_t* out,int n)
+{
+ vint8mf4_t v = __riscv_vle8_v_i8mf4 (base1, 32);
+ for (int i = 0; i < n; i++){
+ v = __riscv_vor_vx_i8mf4 (v, 101, 32);
+ v = __riscv_vle8_v_i8mf4_tu (v, base2, 32);
+ }
+ __riscv_vse8_v_i8mf4 (out, v, 32);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */