RISC-V: Fix redundant vmv1r.v instruction in vmsge.vx codegen

Message ID 20230322121556.94496-1-juzhe.zhong@rivai.ai
State Accepted
Headers
Series RISC-V: Fix redundant vmv1r.v instruction in vmsge.vx codegen |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

juzhe.zhong@rivai.ai March 22, 2023, 12:15 p.m. UTC
  From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>

Current expansion of vmsge will make RA produce redundant vmv1r.v.

testcase:
void f1 (void * in, void *out, int32_t x)
{
    vbool32_t mask = *(vbool32_t*)in;
    asm volatile ("":::"memory");
    vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
    vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
    vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4);
    vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4);
    m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4);
    __riscv_vsm_v_b32 (out, m4, 4);
}

Before this patch:
f1:
        vsetvli a5,zero,e8,mf4,ta,ma
        vlm.v   v0,0(a0)
        vsetivli        zero,4,e32,m1,ta,mu
        vle32.v v3,0(a0)
        vle32.v v2,0(a0),v0.t
        vmslt.vx        v1,v3,a2
        vmnot.m v1,v1
        vmslt.vx        v1,v3,a2,v0.t
        vmxor.mm        v1,v1,v0
        vmv1r.v v0,v1
        vmsge.vv        v2,v2,v2,v0.t
        vsm.v   v2,0(a1)
        ret

After this patch:
f1:
        vsetvli a5,zero,e8,mf4,ta,ma
        vlm.v   v0,0(a0)
        vsetivli        zero,4,e32,m1,ta,mu
        vle32.v v3,0(a0)
        vle32.v v2,0(a0),v0.t
        vmslt.vx        v1,v3,a2
        vmnot.m v1,v1
        vmslt.vx        v1,v3,a2,v0.t
        vmxor.mm        v0,v1,v0
        vmsge.vv        v2,v2,v2,v0.t
        vsm.v   v2,0(a1)
        ret


gcc/ChangeLog:

        * config/riscv/vector.md: Fix redundant vmv1r.v.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly check.

---
 gcc/config/riscv/vector.md                        | 15 +++++++--------
 .../riscv/rvv/base/binop_vx_constraint-150.c      |  2 +-
 2 files changed, 8 insertions(+), 9 deletions(-)
  

Comments

Kito Cheng March 23, 2023, 3:27 a.m. UTC | #1
LGTM, but pending this to the GCC 14 queue.

On Wed, Mar 22, 2023 at 8:16 PM <juzhe.zhong@rivai.ai> wrote:
>
> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>
> Current expansion of vmsge will make RA produce redundant vmv1r.v.
>
> testcase:
> void f1 (void * in, void *out, int32_t x)
> {
>     vbool32_t mask = *(vbool32_t*)in;
>     asm volatile ("":::"memory");
>     vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
>     vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
>     vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4);
>     vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4);
>     m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4);
>     __riscv_vsm_v_b32 (out, m4, 4);
> }
>
> Before this patch:
> f1:
>         vsetvli a5,zero,e8,mf4,ta,ma
>         vlm.v   v0,0(a0)
>         vsetivli        zero,4,e32,m1,ta,mu
>         vle32.v v3,0(a0)
>         vle32.v v2,0(a0),v0.t
>         vmslt.vx        v1,v3,a2
>         vmnot.m v1,v1
>         vmslt.vx        v1,v3,a2,v0.t
>         vmxor.mm        v1,v1,v0
>         vmv1r.v v0,v1
>         vmsge.vv        v2,v2,v2,v0.t
>         vsm.v   v2,0(a1)
>         ret
>
> After this patch:
> f1:
>         vsetvli a5,zero,e8,mf4,ta,ma
>         vlm.v   v0,0(a0)
>         vsetivli        zero,4,e32,m1,ta,mu
>         vle32.v v3,0(a0)
>         vle32.v v2,0(a0),v0.t
>         vmslt.vx        v1,v3,a2
>         vmnot.m v1,v1
>         vmslt.vx        v1,v3,a2,v0.t
>         vmxor.mm        v0,v1,v0
>         vmsge.vv        v2,v2,v2,v0.t
>         vsm.v   v2,0(a1)
>         ret
>
>
> gcc/ChangeLog:
>
>         * config/riscv/vector.md: Fix redundant vmv1r.v.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly check.
>
> ---
>  gcc/config/riscv/vector.md                        | 15 +++++++--------
>  .../riscv/rvv/base/binop_vx_constraint-150.c      |  2 +-
>  2 files changed, 8 insertions(+), 9 deletions(-)
>
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index ebb014aecb1..f06d68be80f 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -4111,6 +4111,7 @@
>  {
>    enum rtx_code code = GET_CODE (operands[3]);
>    rtx undef = RVV_VUNDEF (<VM>mode);
> +  rtx tmp = gen_reg_rtx (<VM>mode);
>    if (code == GEU && rtx_equal_p (operands[5], const0_rtx))
>      {
>        /* If vmsgeu with 0 immediate, expand it to vmset.  */
> @@ -4157,12 +4158,11 @@
>             - pseudoinstruction: vmsge{u}.vx vd, va, x
>             - expansion: vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd.  */
>           emit_insn (
> -           gen_pred_cmp<mode>_scalar (operands[0], operands[1], operands[2],
> +           gen_pred_cmp<mode>_scalar (tmp, operands[1], operands[2],
>                                         operands[3], operands[4], operands[5],
>                                         operands[6], operands[7], operands[8]));
>           emit_insn (gen_pred_nand<vm> (operands[0], CONSTM1_RTX (<VM>mode),
> -                                       undef, operands[0], operands[0],
> -                                       operands[6], operands[8]));
> +                                       undef, tmp, tmp, operands[6], operands[8]));
>         }
>        else
>         {
> @@ -4171,13 +4171,12 @@
>               /* masked va >= x, vd == v0
>                 - pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
>                 - expansion: vmslt{u}.vx vt, va, x;  vmandn.mm vd, vd, vt.  */
> -             rtx reg = gen_reg_rtx (<VM>mode);
>               emit_insn (gen_pred_cmp<mode>_scalar (
> -               reg, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4],
> +               tmp, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4],
>                 operands[5], operands[6], operands[7], operands[8]));
>               emit_insn (
>                 gen_pred_andnot<vm> (operands[0], CONSTM1_RTX (<VM>mode), undef,
> -                                  operands[1], reg, operands[6], operands[8]));
> +                                  operands[1], tmp, operands[6], operands[8]));
>             }
>           else
>             {
> @@ -4186,10 +4185,10 @@
>                 - expansion: vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0.
>               */
>               emit_insn (gen_pred_cmp<mode>_scalar (
> -               operands[0], operands[1], operands[2], operands[3], operands[4],
> +               tmp, operands[1], operands[2], operands[3], operands[4],
>                 operands[5], operands[6], operands[7], operands[8]));
>               emit_insn (gen_pred (XOR, <VM>mode, operands[0],
> -                                  CONSTM1_RTX (<VM>mode), undef, operands[0],
> +                                  CONSTM1_RTX (<VM>mode), undef, tmp,
>                                    operands[1], operands[6], operands[8]));
>             }
>         }
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c
> index 55a222f47ea..e92a8115f09 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c
> @@ -18,4 +18,4 @@ void f1 (void * in, void *out, int32_t x)
>  /* { dg-final { scan-assembler-times {vmslt\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t} 1 } } */
>  /* { dg-final { scan-assembler-times {vmxor\.mm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 1 } } */
>  /* { dg-final { scan-assembler-times {vmnot\.m\s+v[0-9]+,\s*v[0-9]+} 1 } } */
> -/* { dg-final { scan-assembler-times {vmv} 1 } } */
> +/* { dg-final { scan-assembler-not {vmv} } } */
> --
> 2.36.1
>
  
Jeff Law April 21, 2023, 8:42 p.m. UTC | #2
On 3/22/23 06:15, juzhe.zhong@rivai.ai wrote:
> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
> 
> Current expansion of vmsge will make RA produce redundant vmv1r.v.
> 
> testcase:
> void f1 (void * in, void *out, int32_t x)
> {
>      vbool32_t mask = *(vbool32_t*)in;
>      asm volatile ("":::"memory");
>      vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
>      vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
>      vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4);
>      vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4);
>      m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4);
>      __riscv_vsm_v_b32 (out, m4, 4);
> }
> 
> Before this patch:
> f1:
>          vsetvli a5,zero,e8,mf4,ta,ma
>          vlm.v   v0,0(a0)
>          vsetivli        zero,4,e32,m1,ta,mu
>          vle32.v v3,0(a0)
>          vle32.v v2,0(a0),v0.t
>          vmslt.vx        v1,v3,a2
>          vmnot.m v1,v1
>          vmslt.vx        v1,v3,a2,v0.t
>          vmxor.mm        v1,v1,v0
>          vmv1r.v v0,v1
>          vmsge.vv        v2,v2,v2,v0.t
>          vsm.v   v2,0(a1)
>          ret
> 
> After this patch:
> f1:
>          vsetvli a5,zero,e8,mf4,ta,ma
>          vlm.v   v0,0(a0)
>          vsetivli        zero,4,e32,m1,ta,mu
>          vle32.v v3,0(a0)
>          vle32.v v2,0(a0),v0.t
>          vmslt.vx        v1,v3,a2
>          vmnot.m v1,v1
>          vmslt.vx        v1,v3,a2,v0.t
>          vmxor.mm        v0,v1,v0
>          vmsge.vv        v2,v2,v2,v0.t
>          vsm.v   v2,0(a1)
>          ret
> 
> 
> gcc/ChangeLog:
> 
>          * config/riscv/vector.md: Fix redundant vmv1r.v.
> 
> gcc/testsuite/ChangeLog:
> 
>          * gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly check.
OK.  Please push this to the trunk.

jeff
  
juzhe.zhong@rivai.ai April 24, 2023, 3:09 a.m. UTC | #3
I can push codes yet. Can you push them for me?



juzhe.zhong@rivai.ai
 
From: Jeff Law
Date: 2023-04-22 04:42
To: juzhe.zhong; gcc-patches
CC: kito.cheng; palmer
Subject: Re: [PATCH] RISC-V: Fix redundant vmv1r.v instruction in vmsge.vx codegen
 
 
On 3/22/23 06:15, juzhe.zhong@rivai.ai wrote:
> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
> 
> Current expansion of vmsge will make RA produce redundant vmv1r.v.
> 
> testcase:
> void f1 (void * in, void *out, int32_t x)
> {
>      vbool32_t mask = *(vbool32_t*)in;
>      asm volatile ("":::"memory");
>      vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
>      vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
>      vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4);
>      vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4);
>      m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4);
>      __riscv_vsm_v_b32 (out, m4, 4);
> }
> 
> Before this patch:
> f1:
>          vsetvli a5,zero,e8,mf4,ta,ma
>          vlm.v   v0,0(a0)
>          vsetivli        zero,4,e32,m1,ta,mu
>          vle32.v v3,0(a0)
>          vle32.v v2,0(a0),v0.t
>          vmslt.vx        v1,v3,a2
>          vmnot.m v1,v1
>          vmslt.vx        v1,v3,a2,v0.t
>          vmxor.mm        v1,v1,v0
>          vmv1r.v v0,v1
>          vmsge.vv        v2,v2,v2,v0.t
>          vsm.v   v2,0(a1)
>          ret
> 
> After this patch:
> f1:
>          vsetvli a5,zero,e8,mf4,ta,ma
>          vlm.v   v0,0(a0)
>          vsetivli        zero,4,e32,m1,ta,mu
>          vle32.v v3,0(a0)
>          vle32.v v2,0(a0),v0.t
>          vmslt.vx        v1,v3,a2
>          vmnot.m v1,v1
>          vmslt.vx        v1,v3,a2,v0.t
>          vmxor.mm        v0,v1,v0
>          vmsge.vv        v2,v2,v2,v0.t
>          vsm.v   v2,0(a1)
>          ret
> 
> 
> gcc/ChangeLog:
> 
>          * config/riscv/vector.md: Fix redundant vmv1r.v.
> 
> gcc/testsuite/ChangeLog:
> 
>          * gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly check.
OK.  Please push this to the trunk.
 
jeff
  
Kito Cheng April 26, 2023, 4:22 a.m. UTC | #4
Committed to trunk

On Mon, Apr 24, 2023 at 11:09 AM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> I can push codes yet. Can you push them for me?
>
>
>
> juzhe.zhong@rivai.ai
>
> From: Jeff Law
> Date: 2023-04-22 04:42
> To: juzhe.zhong; gcc-patches
> CC: kito.cheng; palmer
> Subject: Re: [PATCH] RISC-V: Fix redundant vmv1r.v instruction in vmsge.vx codegen
>
>
> On 3/22/23 06:15, juzhe.zhong@rivai.ai wrote:
> > From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
> >
> > Current expansion of vmsge will make RA produce redundant vmv1r.v.
> >
> > testcase:
> > void f1 (void * in, void *out, int32_t x)
> > {
> >      vbool32_t mask = *(vbool32_t*)in;
> >      asm volatile ("":::"memory");
> >      vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
> >      vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
> >      vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4);
> >      vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4);
> >      m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4);
> >      __riscv_vsm_v_b32 (out, m4, 4);
> > }
> >
> > Before this patch:
> > f1:
> >          vsetvli a5,zero,e8,mf4,ta,ma
> >          vlm.v   v0,0(a0)
> >          vsetivli        zero,4,e32,m1,ta,mu
> >          vle32.v v3,0(a0)
> >          vle32.v v2,0(a0),v0.t
> >          vmslt.vx        v1,v3,a2
> >          vmnot.m v1,v1
> >          vmslt.vx        v1,v3,a2,v0.t
> >          vmxor.mm        v1,v1,v0
> >          vmv1r.v v0,v1
> >          vmsge.vv        v2,v2,v2,v0.t
> >          vsm.v   v2,0(a1)
> >          ret
> >
> > After this patch:
> > f1:
> >          vsetvli a5,zero,e8,mf4,ta,ma
> >          vlm.v   v0,0(a0)
> >          vsetivli        zero,4,e32,m1,ta,mu
> >          vle32.v v3,0(a0)
> >          vle32.v v2,0(a0),v0.t
> >          vmslt.vx        v1,v3,a2
> >          vmnot.m v1,v1
> >          vmslt.vx        v1,v3,a2,v0.t
> >          vmxor.mm        v0,v1,v0
> >          vmsge.vv        v2,v2,v2,v0.t
> >          vsm.v   v2,0(a1)
> >          ret
> >
> >
> > gcc/ChangeLog:
> >
> >          * config/riscv/vector.md: Fix redundant vmv1r.v.
> >
> > gcc/testsuite/ChangeLog:
> >
> >          * gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly check.
> OK.  Please push this to the trunk.
>
> jeff
>
  

Patch

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index ebb014aecb1..f06d68be80f 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -4111,6 +4111,7 @@ 
 {
   enum rtx_code code = GET_CODE (operands[3]);
   rtx undef = RVV_VUNDEF (<VM>mode);
+  rtx tmp = gen_reg_rtx (<VM>mode);
   if (code == GEU && rtx_equal_p (operands[5], const0_rtx))
     {
       /* If vmsgeu with 0 immediate, expand it to vmset.  */
@@ -4157,12 +4158,11 @@ 
 	    - pseudoinstruction: vmsge{u}.vx vd, va, x
 	    - expansion: vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd.  */
 	  emit_insn (
-	    gen_pred_cmp<mode>_scalar (operands[0], operands[1], operands[2],
+	    gen_pred_cmp<mode>_scalar (tmp, operands[1], operands[2],
 					operands[3], operands[4], operands[5],
 					operands[6], operands[7], operands[8]));
 	  emit_insn (gen_pred_nand<vm> (operands[0], CONSTM1_RTX (<VM>mode),
-					undef, operands[0], operands[0],
-					operands[6], operands[8]));
+					undef, tmp, tmp, operands[6], operands[8]));
 	}
       else
 	{
@@ -4171,13 +4171,12 @@ 
 	      /* masked va >= x, vd == v0
 		- pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
 		- expansion: vmslt{u}.vx vt, va, x;  vmandn.mm vd, vd, vt.  */
-	      rtx reg = gen_reg_rtx (<VM>mode);
 	      emit_insn (gen_pred_cmp<mode>_scalar (
-		reg, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4],
+		tmp, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4],
 		operands[5], operands[6], operands[7], operands[8]));
 	      emit_insn (
 		gen_pred_andnot<vm> (operands[0], CONSTM1_RTX (<VM>mode), undef,
-				   operands[1], reg, operands[6], operands[8]));
+				   operands[1], tmp, operands[6], operands[8]));
 	    }
 	  else
 	    {
@@ -4186,10 +4185,10 @@ 
 		- expansion: vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0.
 	      */
 	      emit_insn (gen_pred_cmp<mode>_scalar (
-		operands[0], operands[1], operands[2], operands[3], operands[4],
+		tmp, operands[1], operands[2], operands[3], operands[4],
 		operands[5], operands[6], operands[7], operands[8]));
 	      emit_insn (gen_pred (XOR, <VM>mode, operands[0],
-				   CONSTM1_RTX (<VM>mode), undef, operands[0],
+				   CONSTM1_RTX (<VM>mode), undef, tmp,
 				   operands[1], operands[6], operands[8]));
 	    }
 	}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c
index 55a222f47ea..e92a8115f09 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c
@@ -18,4 +18,4 @@  void f1 (void * in, void *out, int32_t x)
 /* { dg-final { scan-assembler-times {vmslt\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t} 1 } } */
 /* { dg-final { scan-assembler-times {vmxor\.mm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 1 } } */
 /* { dg-final { scan-assembler-times {vmnot\.m\s+v[0-9]+,\s*v[0-9]+} 1 } } */
-/* { dg-final { scan-assembler-times {vmv} 1 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */