RISC-V: Fix redundant vmv1r.v instruction in vmsge.vx codegen
Checks
Commit Message
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
Current expansion of vmsge will make RA produce redundant vmv1r.v.
testcase:
void f1 (void * in, void *out, int32_t x)
{
vbool32_t mask = *(vbool32_t*)in;
asm volatile ("":::"memory");
vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4);
vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4);
m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4);
__riscv_vsm_v_b32 (out, m4, 4);
}
Before this patch:
f1:
vsetvli a5,zero,e8,mf4,ta,ma
vlm.v v0,0(a0)
vsetivli zero,4,e32,m1,ta,mu
vle32.v v3,0(a0)
vle32.v v2,0(a0),v0.t
vmslt.vx v1,v3,a2
vmnot.m v1,v1
vmslt.vx v1,v3,a2,v0.t
vmxor.mm v1,v1,v0
vmv1r.v v0,v1
vmsge.vv v2,v2,v2,v0.t
vsm.v v2,0(a1)
ret
After this patch:
f1:
vsetvli a5,zero,e8,mf4,ta,ma
vlm.v v0,0(a0)
vsetivli zero,4,e32,m1,ta,mu
vle32.v v3,0(a0)
vle32.v v2,0(a0),v0.t
vmslt.vx v1,v3,a2
vmnot.m v1,v1
vmslt.vx v1,v3,a2,v0.t
vmxor.mm v0,v1,v0
vmsge.vv v2,v2,v2,v0.t
vsm.v v2,0(a1)
ret
gcc/ChangeLog:
* config/riscv/vector.md: Fix redundant vmv1r.v.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly check.
---
gcc/config/riscv/vector.md | 15 +++++++--------
.../riscv/rvv/base/binop_vx_constraint-150.c | 2 +-
2 files changed, 8 insertions(+), 9 deletions(-)
Comments
LGTM, but pending this to the GCC 14 queue.
On Wed, Mar 22, 2023 at 8:16 PM <juzhe.zhong@rivai.ai> wrote:
>
> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>
> Current expansion of vmsge will make RA produce redundant vmv1r.v.
>
> testcase:
> void f1 (void * in, void *out, int32_t x)
> {
> vbool32_t mask = *(vbool32_t*)in;
> asm volatile ("":::"memory");
> vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
> vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
> vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4);
> vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4);
> m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4);
> __riscv_vsm_v_b32 (out, m4, 4);
> }
>
> Before this patch:
> f1:
> vsetvli a5,zero,e8,mf4,ta,ma
> vlm.v v0,0(a0)
> vsetivli zero,4,e32,m1,ta,mu
> vle32.v v3,0(a0)
> vle32.v v2,0(a0),v0.t
> vmslt.vx v1,v3,a2
> vmnot.m v1,v1
> vmslt.vx v1,v3,a2,v0.t
> vmxor.mm v1,v1,v0
> vmv1r.v v0,v1
> vmsge.vv v2,v2,v2,v0.t
> vsm.v v2,0(a1)
> ret
>
> After this patch:
> f1:
> vsetvli a5,zero,e8,mf4,ta,ma
> vlm.v v0,0(a0)
> vsetivli zero,4,e32,m1,ta,mu
> vle32.v v3,0(a0)
> vle32.v v2,0(a0),v0.t
> vmslt.vx v1,v3,a2
> vmnot.m v1,v1
> vmslt.vx v1,v3,a2,v0.t
> vmxor.mm v0,v1,v0
> vmsge.vv v2,v2,v2,v0.t
> vsm.v v2,0(a1)
> ret
>
>
> gcc/ChangeLog:
>
> * config/riscv/vector.md: Fix redundant vmv1r.v.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly check.
>
> ---
> gcc/config/riscv/vector.md | 15 +++++++--------
> .../riscv/rvv/base/binop_vx_constraint-150.c | 2 +-
> 2 files changed, 8 insertions(+), 9 deletions(-)
>
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index ebb014aecb1..f06d68be80f 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -4111,6 +4111,7 @@
> {
> enum rtx_code code = GET_CODE (operands[3]);
> rtx undef = RVV_VUNDEF (<VM>mode);
> + rtx tmp = gen_reg_rtx (<VM>mode);
> if (code == GEU && rtx_equal_p (operands[5], const0_rtx))
> {
> /* If vmsgeu with 0 immediate, expand it to vmset. */
> @@ -4157,12 +4158,11 @@
> - pseudoinstruction: vmsge{u}.vx vd, va, x
> - expansion: vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd. */
> emit_insn (
> - gen_pred_cmp<mode>_scalar (operands[0], operands[1], operands[2],
> + gen_pred_cmp<mode>_scalar (tmp, operands[1], operands[2],
> operands[3], operands[4], operands[5],
> operands[6], operands[7], operands[8]));
> emit_insn (gen_pred_nand<vm> (operands[0], CONSTM1_RTX (<VM>mode),
> - undef, operands[0], operands[0],
> - operands[6], operands[8]));
> + undef, tmp, tmp, operands[6], operands[8]));
> }
> else
> {
> @@ -4171,13 +4171,12 @@
> /* masked va >= x, vd == v0
> - pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
> - expansion: vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt. */
> - rtx reg = gen_reg_rtx (<VM>mode);
> emit_insn (gen_pred_cmp<mode>_scalar (
> - reg, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4],
> + tmp, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4],
> operands[5], operands[6], operands[7], operands[8]));
> emit_insn (
> gen_pred_andnot<vm> (operands[0], CONSTM1_RTX (<VM>mode), undef,
> - operands[1], reg, operands[6], operands[8]));
> + operands[1], tmp, operands[6], operands[8]));
> }
> else
> {
> @@ -4186,10 +4185,10 @@
> - expansion: vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0.
> */
> emit_insn (gen_pred_cmp<mode>_scalar (
> - operands[0], operands[1], operands[2], operands[3], operands[4],
> + tmp, operands[1], operands[2], operands[3], operands[4],
> operands[5], operands[6], operands[7], operands[8]));
> emit_insn (gen_pred (XOR, <VM>mode, operands[0],
> - CONSTM1_RTX (<VM>mode), undef, operands[0],
> + CONSTM1_RTX (<VM>mode), undef, tmp,
> operands[1], operands[6], operands[8]));
> }
> }
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c
> index 55a222f47ea..e92a8115f09 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c
> @@ -18,4 +18,4 @@ void f1 (void * in, void *out, int32_t x)
> /* { dg-final { scan-assembler-times {vmslt\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t} 1 } } */
> /* { dg-final { scan-assembler-times {vmxor\.mm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 1 } } */
> /* { dg-final { scan-assembler-times {vmnot\.m\s+v[0-9]+,\s*v[0-9]+} 1 } } */
> -/* { dg-final { scan-assembler-times {vmv} 1 } } */
> +/* { dg-final { scan-assembler-not {vmv} } } */
> --
> 2.36.1
>
On 3/22/23 06:15, juzhe.zhong@rivai.ai wrote:
> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>
> Current expansion of vmsge will make RA produce redundant vmv1r.v.
>
> testcase:
> void f1 (void * in, void *out, int32_t x)
> {
> vbool32_t mask = *(vbool32_t*)in;
> asm volatile ("":::"memory");
> vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
> vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
> vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4);
> vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4);
> m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4);
> __riscv_vsm_v_b32 (out, m4, 4);
> }
>
> Before this patch:
> f1:
> vsetvli a5,zero,e8,mf4,ta,ma
> vlm.v v0,0(a0)
> vsetivli zero,4,e32,m1,ta,mu
> vle32.v v3,0(a0)
> vle32.v v2,0(a0),v0.t
> vmslt.vx v1,v3,a2
> vmnot.m v1,v1
> vmslt.vx v1,v3,a2,v0.t
> vmxor.mm v1,v1,v0
> vmv1r.v v0,v1
> vmsge.vv v2,v2,v2,v0.t
> vsm.v v2,0(a1)
> ret
>
> After this patch:
> f1:
> vsetvli a5,zero,e8,mf4,ta,ma
> vlm.v v0,0(a0)
> vsetivli zero,4,e32,m1,ta,mu
> vle32.v v3,0(a0)
> vle32.v v2,0(a0),v0.t
> vmslt.vx v1,v3,a2
> vmnot.m v1,v1
> vmslt.vx v1,v3,a2,v0.t
> vmxor.mm v0,v1,v0
> vmsge.vv v2,v2,v2,v0.t
> vsm.v v2,0(a1)
> ret
>
>
> gcc/ChangeLog:
>
> * config/riscv/vector.md: Fix redundant vmv1r.v.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly check.
OK. Please push this to the trunk.
jeff
I can push codes yet. Can you push them for me?
juzhe.zhong@rivai.ai
From: Jeff Law
Date: 2023-04-22 04:42
To: juzhe.zhong; gcc-patches
CC: kito.cheng; palmer
Subject: Re: [PATCH] RISC-V: Fix redundant vmv1r.v instruction in vmsge.vx codegen
On 3/22/23 06:15, juzhe.zhong@rivai.ai wrote:
> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>
> Current expansion of vmsge will make RA produce redundant vmv1r.v.
>
> testcase:
> void f1 (void * in, void *out, int32_t x)
> {
> vbool32_t mask = *(vbool32_t*)in;
> asm volatile ("":::"memory");
> vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
> vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
> vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4);
> vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4);
> m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4);
> __riscv_vsm_v_b32 (out, m4, 4);
> }
>
> Before this patch:
> f1:
> vsetvli a5,zero,e8,mf4,ta,ma
> vlm.v v0,0(a0)
> vsetivli zero,4,e32,m1,ta,mu
> vle32.v v3,0(a0)
> vle32.v v2,0(a0),v0.t
> vmslt.vx v1,v3,a2
> vmnot.m v1,v1
> vmslt.vx v1,v3,a2,v0.t
> vmxor.mm v1,v1,v0
> vmv1r.v v0,v1
> vmsge.vv v2,v2,v2,v0.t
> vsm.v v2,0(a1)
> ret
>
> After this patch:
> f1:
> vsetvli a5,zero,e8,mf4,ta,ma
> vlm.v v0,0(a0)
> vsetivli zero,4,e32,m1,ta,mu
> vle32.v v3,0(a0)
> vle32.v v2,0(a0),v0.t
> vmslt.vx v1,v3,a2
> vmnot.m v1,v1
> vmslt.vx v1,v3,a2,v0.t
> vmxor.mm v0,v1,v0
> vmsge.vv v2,v2,v2,v0.t
> vsm.v v2,0(a1)
> ret
>
>
> gcc/ChangeLog:
>
> * config/riscv/vector.md: Fix redundant vmv1r.v.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly check.
OK. Please push this to the trunk.
jeff
Committed to trunk
On Mon, Apr 24, 2023 at 11:09 AM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> I can push codes yet. Can you push them for me?
>
>
>
> juzhe.zhong@rivai.ai
>
> From: Jeff Law
> Date: 2023-04-22 04:42
> To: juzhe.zhong; gcc-patches
> CC: kito.cheng; palmer
> Subject: Re: [PATCH] RISC-V: Fix redundant vmv1r.v instruction in vmsge.vx codegen
>
>
> On 3/22/23 06:15, juzhe.zhong@rivai.ai wrote:
> > From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
> >
> > Current expansion of vmsge will make RA produce redundant vmv1r.v.
> >
> > testcase:
> > void f1 (void * in, void *out, int32_t x)
> > {
> > vbool32_t mask = *(vbool32_t*)in;
> > asm volatile ("":::"memory");
> > vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
> > vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
> > vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4);
> > vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4);
> > m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4);
> > __riscv_vsm_v_b32 (out, m4, 4);
> > }
> >
> > Before this patch:
> > f1:
> > vsetvli a5,zero,e8,mf4,ta,ma
> > vlm.v v0,0(a0)
> > vsetivli zero,4,e32,m1,ta,mu
> > vle32.v v3,0(a0)
> > vle32.v v2,0(a0),v0.t
> > vmslt.vx v1,v3,a2
> > vmnot.m v1,v1
> > vmslt.vx v1,v3,a2,v0.t
> > vmxor.mm v1,v1,v0
> > vmv1r.v v0,v1
> > vmsge.vv v2,v2,v2,v0.t
> > vsm.v v2,0(a1)
> > ret
> >
> > After this patch:
> > f1:
> > vsetvli a5,zero,e8,mf4,ta,ma
> > vlm.v v0,0(a0)
> > vsetivli zero,4,e32,m1,ta,mu
> > vle32.v v3,0(a0)
> > vle32.v v2,0(a0),v0.t
> > vmslt.vx v1,v3,a2
> > vmnot.m v1,v1
> > vmslt.vx v1,v3,a2,v0.t
> > vmxor.mm v0,v1,v0
> > vmsge.vv v2,v2,v2,v0.t
> > vsm.v v2,0(a1)
> > ret
> >
> >
> > gcc/ChangeLog:
> >
> > * config/riscv/vector.md: Fix redundant vmv1r.v.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly check.
> OK. Please push this to the trunk.
>
> jeff
>
@@ -4111,6 +4111,7 @@
{
enum rtx_code code = GET_CODE (operands[3]);
rtx undef = RVV_VUNDEF (<VM>mode);
+ rtx tmp = gen_reg_rtx (<VM>mode);
if (code == GEU && rtx_equal_p (operands[5], const0_rtx))
{
/* If vmsgeu with 0 immediate, expand it to vmset. */
@@ -4157,12 +4158,11 @@
- pseudoinstruction: vmsge{u}.vx vd, va, x
- expansion: vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd. */
emit_insn (
- gen_pred_cmp<mode>_scalar (operands[0], operands[1], operands[2],
+ gen_pred_cmp<mode>_scalar (tmp, operands[1], operands[2],
operands[3], operands[4], operands[5],
operands[6], operands[7], operands[8]));
emit_insn (gen_pred_nand<vm> (operands[0], CONSTM1_RTX (<VM>mode),
- undef, operands[0], operands[0],
- operands[6], operands[8]));
+ undef, tmp, tmp, operands[6], operands[8]));
}
else
{
@@ -4171,13 +4171,12 @@
/* masked va >= x, vd == v0
- pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
- expansion: vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt. */
- rtx reg = gen_reg_rtx (<VM>mode);
emit_insn (gen_pred_cmp<mode>_scalar (
- reg, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4],
+ tmp, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4],
operands[5], operands[6], operands[7], operands[8]));
emit_insn (
gen_pred_andnot<vm> (operands[0], CONSTM1_RTX (<VM>mode), undef,
- operands[1], reg, operands[6], operands[8]));
+ operands[1], tmp, operands[6], operands[8]));
}
else
{
@@ -4186,10 +4185,10 @@
- expansion: vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0.
*/
emit_insn (gen_pred_cmp<mode>_scalar (
- operands[0], operands[1], operands[2], operands[3], operands[4],
+ tmp, operands[1], operands[2], operands[3], operands[4],
operands[5], operands[6], operands[7], operands[8]));
emit_insn (gen_pred (XOR, <VM>mode, operands[0],
- CONSTM1_RTX (<VM>mode), undef, operands[0],
+ CONSTM1_RTX (<VM>mode), undef, tmp,
operands[1], operands[6], operands[8]));
}
}
@@ -18,4 +18,4 @@ void f1 (void * in, void *out, int32_t x)
/* { dg-final { scan-assembler-times {vmslt\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t} 1 } } */
/* { dg-final { scan-assembler-times {vmxor\.mm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 1 } } */
/* { dg-final { scan-assembler-times {vmnot\.m\s+v[0-9]+,\s*v[0-9]+} 1 } } */
-/* { dg-final { scan-assembler-times {vmv} 1 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */