RISC-V: Optimize TARGET_XTHEADCONDMOV

Message ID 20230526005240.86495-1-lidie@eswincomputing.com
State Unresolved
Headers
Series RISC-V: Optimize TARGET_XTHEADCONDMOV |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Die Li May 26, 2023, 12:52 a.m. UTC
  This patch allows less instructions to be used when TARGET_XTHEADCONDMOV is enabled.

Provide an example from the existing testcases.

Testcase:
int ConEmv_imm_imm_reg(int x, int y){
  if (x == 1000) return 10;
  return y;
}

Cflags:
-O2 -march=rv64gc_xtheadcondmov -mabi=lp64d

before patch:
ConEmv_imm_imm_reg:
	addi	a5,a0,-1000
	li	a0,10
	th.mvnez	a0,zero,a5
	th.mveqz	a1,zero,a5
	or	a0,a0,a1
	ret

after patch:
ConEmv_imm_imm_reg:
	addi	a5,a0,-1000
	li	a0,10
	th.mvnez	a0,a1,a5
	ret

Signed-off-by: Die Li <lidie@eswincomputing.com>

gcc/ChangeLog:

        * config/riscv/riscv.cc (riscv_expand_conditional_move_onesided): Delete.
        (riscv_expand_conditional_move):  Reuse the TARGET_SFB_ALU expand process for TARGET_XTHEADCONDMOV

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/xtheadcondmov-indirect-rv32.c: Update the output.
        * gcc.target/riscv/xtheadcondmov-indirect-rv64.c: Likewise.
---
 gcc/config/riscv/riscv.cc                     | 44 +++--------------
 .../riscv/xtheadcondmov-indirect-rv32.c       | 48 +++++++------------
 .../riscv/xtheadcondmov-indirect-rv64.c       | 48 +++++++------------
 3 files changed, 42 insertions(+), 98 deletions(-)
  

Comments

Kito Cheng May 26, 2023, 2:43 a.m. UTC | #1
I would defer this to vrull or t-head folks :)

Die Li <lidie@eswincomputing.com> 於 2023年5月26日 週五 08:53 寫道:

> This patch allows less instructions to be used when TARGET_XTHEADCONDMOV
> is enabled.
>
> Provide an example from the existing testcases.
>
> Testcase:
> int ConEmv_imm_imm_reg(int x, int y){
>   if (x == 1000) return 10;
>   return y;
> }
>
> Cflags:
> -O2 -march=rv64gc_xtheadcondmov -mabi=lp64d
>
> before patch:
> ConEmv_imm_imm_reg:
>         addi    a5,a0,-1000
>         li      a0,10
>         th.mvnez        a0,zero,a5
>         th.mveqz        a1,zero,a5
>         or      a0,a0,a1
>         ret
>
> after patch:
> ConEmv_imm_imm_reg:
>         addi    a5,a0,-1000
>         li      a0,10
>         th.mvnez        a0,a1,a5
>         ret
>
> Signed-off-by: Die Li <lidie@eswincomputing.com>
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv.cc (riscv_expand_conditional_move_onesided):
> Delete.
>         (riscv_expand_conditional_move):  Reuse the TARGET_SFB_ALU expand
> process for TARGET_XTHEADCONDMOV
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/xtheadcondmov-indirect-rv32.c: Update the
> output.
>         * gcc.target/riscv/xtheadcondmov-indirect-rv64.c: Likewise.
> ---
>  gcc/config/riscv/riscv.cc                     | 44 +++--------------
>  .../riscv/xtheadcondmov-indirect-rv32.c       | 48 +++++++------------
>  .../riscv/xtheadcondmov-indirect-rv64.c       | 48 +++++++------------
>  3 files changed, 42 insertions(+), 98 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 09fc9e5d95e..8b8ac9181ba 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -3442,37 +3442,6 @@ riscv_expand_conditional_branch (rtx label,
> rtx_code code, rtx op0, rtx op1)
>    emit_jump_insn (gen_condjump (condition, label));
>  }
>
> -/* Helper to emit two one-sided conditional moves for the movecc.  */
> -
> -static void
> -riscv_expand_conditional_move_onesided (rtx dest, rtx cons, rtx alt,
> -                                       rtx_code code, rtx op0, rtx op1)
> -{
> -  machine_mode mode = GET_MODE (dest);
> -
> -  gcc_assert (GET_MODE_CLASS (mode) == MODE_INT);
> -  gcc_assert (reg_or_0_operand (cons, mode));
> -  gcc_assert (reg_or_0_operand (alt, mode));
> -
> -  riscv_emit_int_compare (&code, &op0, &op1, true);
> -  rtx cond = gen_rtx_fmt_ee (code, mode, op0, op1);
> -
> -  rtx tmp1 = gen_reg_rtx (mode);
> -  rtx tmp2 = gen_reg_rtx (mode);
> -
> -  emit_insn (gen_rtx_SET (tmp1, gen_rtx_IF_THEN_ELSE (mode, cond,
> -                                                     cons, const0_rtx)));
> -
> -  /* We need to expand a sequence for both blocks and we do that such,
> -     that the second conditional move will use the inverted condition.
> -     We use temporaries that are or'd to the dest register.  */
> -  cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, mode, op0, op1);
> -  emit_insn (gen_rtx_SET (tmp2, gen_rtx_IF_THEN_ELSE (mode, cond,
> -                                                     alt, const0_rtx)));
> -
> -  emit_insn (gen_rtx_SET (dest, gen_rtx_IOR (mode, tmp1, tmp2)));
> - }
> -
>  /* Emit a cond move: If OP holds, move CONS to DEST; else move ALT to
> DEST.
>     Return 0 if expansion failed.  */
>
> @@ -3483,6 +3452,7 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx
> cons, rtx alt)
>    rtx_code code = GET_CODE (op);
>    rtx op0 = XEXP (op, 0);
>    rtx op1 = XEXP (op, 1);
> +  bool need_eq_ne_p = false;
>
>    if (TARGET_XTHEADCONDMOV
>        && GET_MODE_CLASS (mode) == MODE_INT
> @@ -3492,14 +3462,12 @@ riscv_expand_conditional_move (rtx dest, rtx op,
> rtx cons, rtx alt)
>        && GET_MODE (op0) == mode
>        && GET_MODE (op1) == mode
>        && (code == EQ || code == NE))
> +        need_eq_ne_p = true;
> +
> +  if (need_eq_ne_p || (TARGET_SFB_ALU
> +          && GET_MODE (op0) == word_mode))
>      {
> -      riscv_expand_conditional_move_onesided (dest, cons, alt, code, op0,
> op1);
> -      return true;
> -    }
> -  else if (TARGET_SFB_ALU
> -          && GET_MODE (op0) == word_mode)
> -    {
> -      riscv_emit_int_compare (&code, &op0, &op1);
> +      riscv_emit_int_compare (&code, &op0, &op1, need_eq_ne_p);
>        rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
>
>        /* The expander allows (const_int 0) for CONS for the benefit of
> diff --git a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv32.c
> b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv32.c
> index 9afdc2eabfd..e2b135f3d00 100644
> --- a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv32.c
> +++ b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv32.c
> @@ -1,15 +1,13 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -march=rv32gc_xtheadcondmov -mabi=ilp32
> -mriscv-attribute" } */
> -/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" } } */
> +/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz"
> "-flto"} } */
>  /* { dg-final { check-function-bodies "**" ""  } } */
>
>  /*
>  **ConEmv_imm_imm_reg:
>  **     addi    a5,a0,-1000
>  **     li      a0,10
> -**     th.mvnez        a0,zero,a5
> -**     th.mveqz        a1,zero,a5
> -**     or      a0,a0,a1
> +**     th.mvnez        a0,a1,a5
>  **     ret
>  */
>  int ConEmv_imm_imm_reg(int x, int y){
> @@ -20,9 +18,8 @@ int ConEmv_imm_imm_reg(int x, int y){
>  /*
>  **ConEmv_imm_reg_reg:
>  **     addi    a5,a0,-1000
> -**     th.mvnez        a1,zero,a5
> -**     th.mveqz        a2,zero,a5
> -**     or      a0,a1,a2
> +**     th.mveqz        a2,a1,a5
> +**     mv      a0,a2
>  **     ret
>  */
>  int ConEmv_imm_reg_reg(int x, int y, int z){
> @@ -34,9 +31,7 @@ int ConEmv_imm_reg_reg(int x, int y, int z){
>  **ConEmv_reg_imm_reg:
>  **     sub     a1,a0,a1
>  **     li      a0,10
> -**     th.mvnez        a0,zero,a1
> -**     th.mveqz        a2,zero,a1
> -**     or      a0,a0,a2
> +**     th.mvnez        a0,a2,a1
>  **     ret
>  */
>  int ConEmv_reg_imm_reg(int x, int y, int z){
> @@ -47,9 +42,8 @@ int ConEmv_reg_imm_reg(int x, int y, int z){
>  /*
>  **ConEmv_reg_reg_reg:
>  **     sub     a1,a0,a1
> -**     th.mvnez        a2,zero,a1
> -**     th.mveqz        a3,zero,a1
> -**     or      a0,a2,a3
> +**     th.mveqz        a3,a2,a1
> +**     mv      a0,a3
>  **     ret
>  */
>  int ConEmv_reg_reg_reg(int x, int y, int z, int n){
> @@ -59,12 +53,10 @@ int ConEmv_reg_reg_reg(int x, int y, int z, int n){
>
>  /*
>  **ConNmv_imm_imm_reg:
> -**     li      a5,9998336
> -**     addi    a4,a0,-1000
> -**     addi    a5,a5,1664
> -**     th.mvnez        a1,zero,a4
> -**     th.mveqz        a5,zero,a4
> -**     or      a0,a1,a5
> +**     addi    a5,a0,-1000
> +**     li      a0,9998336
> +**     addi    a0,a0,1664
> +**     th.mveqz        a0,a1,a5
>  **     ret
>  */
>  int ConNmv_imm_imm_reg(int x, int y){
> @@ -74,10 +66,9 @@ int ConNmv_imm_imm_reg(int x, int y){
>
>  /*
>  **ConNmv_imm_reg_reg:
> -**     addi    a5,a0,-1000
> -**     th.mveqz        a1,zero,a5
> -**     th.mvnez        a2,zero,a5
> -**     or      a0,a1,a2
> +**     addi    a0,a0,-1000
> +**     th.mvnez        a2,a1,a0
> +**     mv      a0,a2
>  **     ret
>  */
>  int ConNmv_imm_reg_reg(int x, int y, int z){
> @@ -89,9 +80,7 @@ int ConNmv_imm_reg_reg(int x, int y, int z){
>  **ConNmv_reg_imm_reg:
>  **     sub     a1,a0,a1
>  **     li      a0,10
> -**     th.mveqz        a0,zero,a1
> -**     th.mvnez        a2,zero,a1
> -**     or      a0,a0,a2
> +**     th.mveqz        a0,a2,a1
>  **     ret
>  */
>  int ConNmv_reg_imm_reg(int x, int y, int z){
> @@ -101,10 +90,9 @@ int ConNmv_reg_imm_reg(int x, int y, int z){
>
>  /*
>  **ConNmv_reg_reg_reg:
> -**     sub     a1,a0,a1
> -**     th.mveqz        a2,zero,a1
> -**     th.mvnez        a3,zero,a1
> -**     or      a0,a2,a3
> +**     sub     a0,a0,a1
> +**     th.mvnez        a3,a2,a0
> +**     mv      a0,a3
>  **     ret
>  */
>  int ConNmv_reg_reg_reg(int x, int y, int z, int n){
> diff --git a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv64.c
> b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv64.c
> index a1982fd90bd..99956f8496c 100644
> --- a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv64.c
> +++ b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv64.c
> @@ -1,15 +1,13 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -march=rv64gc_xtheadcondmov -mabi=lp64d
> -mriscv-attribute" } */
> -/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" } } */
> +/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz"
> "-flto"} } */
>  /* { dg-final { check-function-bodies "**" ""  } } */
>
>  /*
>  **ConEmv_imm_imm_reg:
>  **     addi    a5,a0,-1000
>  **     li      a0,10
> -**     th.mvnez        a0,zero,a5
> -**     th.mveqz        a1,zero,a5
> -**     or      a0,a0,a1
> +**     th.mvnez        a0,a1,a5
>  **     ret
>  */
>  int ConEmv_imm_imm_reg(int x, int y){
> @@ -19,10 +17,9 @@ int ConEmv_imm_imm_reg(int x, int y){
>
>  /*
>  **ConEmv_imm_reg_reg:
> -**     addi    a5,a0,-1000
> -**     th.mvnez        a1,zero,a5
> -**     th.mveqz        a2,zero,a5
> -**     or      a0,a1,a2
> +**     addi    a0,a0,-1000
> +**     th.mveqz        a2,a1,a5
> +**     mv      a0,a2
>  **     ret
>  */
>  int ConEmv_imm_reg_reg(int x, int y, int z){
> @@ -34,9 +31,7 @@ int ConEmv_imm_reg_reg(int x, int y, int z){
>  **ConEmv_reg_imm_reg:
>  **     sub     a1,a0,a1
>  **     li      a0,10
> -**     th.mvnez        a0,zero,a1
> -**     th.mveqz        a2,zero,a1
> -**     or      a0,a0,a2
> +**     th.mvnez        a0,a2,a1
>  **     ret
>  */
>  int ConEmv_reg_imm_reg(int x, int y, int z){
> @@ -47,9 +42,8 @@ int ConEmv_reg_imm_reg(int x, int y, int z){
>  /*
>  **ConEmv_reg_reg_reg:
>  **     sub     a1,a0,a1
> -**     th.mvnez        a2,zero,a1
> -**     th.mveqz        a3,zero,a1
> -**     or      a0,a2,a3
> +**     th.mveqz        a3,a2,a1
> +**     mv      a0,a3
>  **     ret
>  */
>  int ConEmv_reg_reg_reg(int x, int y, int z, int n){
> @@ -59,12 +53,10 @@ int ConEmv_reg_reg_reg(int x, int y, int z, int n){
>
>  /*
>  **ConNmv_imm_imm_reg:
> -**     li      a5,9998336
> -**     addi    a4,a0,-1000
> -**     addi    a5,a5,1664
> -**     th.mvnez        a1,zero,a4
> -**     th.mveqz        a5,zero,a4
> -**     or      a0,a1,a5
> +**     addi    a5,a0,-1000
> +**     li      a0,9998336
> +**     addi    a0,a0,1664
> +**     th.mveqz        a0,a1,a5
>  **     ret
>  */
>  int ConNmv_imm_imm_reg(int x, int y){
> @@ -75,9 +67,8 @@ int ConNmv_imm_imm_reg(int x, int y){
>  /*
>  **ConNmv_imm_reg_reg:
>  **     addi    a5,a0,-1000
> -**     th.mveqz        a1,zero,a5
> -**     th.mvnez        a2,zero,a5
> -**     or      a0,a1,a2
> +**     th.mvnez        a2,a1,a0
> +**     mv      a0,a2
>  **     ret
>  */
>  int ConNmv_imm_reg_reg(int x, int y, int z){
> @@ -89,9 +80,7 @@ int ConNmv_imm_reg_reg(int x, int y, int z){
>  **ConNmv_reg_imm_reg:
>  **     sub     a1,a0,a1
>  **     li      a0,10
> -**     th.mveqz        a0,zero,a1
> -**     th.mvnez        a2,zero,a1
> -**     or      a0,a0,a2
> +**     th.mveqz        a0,a2,a1
>  **     ret
>  */
>  int ConNmv_reg_imm_reg(int x, int y, int z){
> @@ -101,10 +90,9 @@ int ConNmv_reg_imm_reg(int x, int y, int z){
>
>  /*
>  **ConNmv_reg_reg_reg:
> -**     sub     a1,a0,a1
> -**     th.mveqz        a2,zero,a1
> -**     th.mvnez        a3,zero,a1
> -**     or      a0,a2,a3
> +**     sub     a0,a0,a1
> +**     th.mvnez        a3,a2,a0
> +**     mv      a0,a3
>  **     ret
>  */
>  int ConNmv_reg_reg_reg(int x, int y, int z, int n){
> --
> 2.17.1
>
>
  
Jeff Law May 26, 2023, 4:01 a.m. UTC | #2
On 5/25/23 20:43, Kito Cheng wrote:
> I would defer this to vrull or t-head folks :)
Given the overlap between where this is going and how I think we should 
be handling Zicondops, I'll take it.  It overlaps with work I've had 
Raphael doing recently.

jeff
  
Philipp Tomsich May 26, 2023, 8:20 a.m. UTC | #3
LGTM.  Happy to move this forward, once it receives an OK from one of you.

--Philipp.

On Fri, 26 May 2023 at 02:53, Die Li <lidie@eswincomputing.com> wrote:
>
> This patch allows less instructions to be used when TARGET_XTHEADCONDMOV is enabled.
>
> Provide an example from the existing testcases.
>
> Testcase:
> int ConEmv_imm_imm_reg(int x, int y){
>   if (x == 1000) return 10;
>   return y;
> }
>
> Cflags:
> -O2 -march=rv64gc_xtheadcondmov -mabi=lp64d
>
> before patch:
> ConEmv_imm_imm_reg:
>         addi    a5,a0,-1000
>         li      a0,10
>         th.mvnez        a0,zero,a5
>         th.mveqz        a1,zero,a5
>         or      a0,a0,a1
>         ret
>
> after patch:
> ConEmv_imm_imm_reg:
>         addi    a5,a0,-1000
>         li      a0,10
>         th.mvnez        a0,a1,a5
>         ret
>
> Signed-off-by: Die Li <lidie@eswincomputing.com>
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv.cc (riscv_expand_conditional_move_onesided): Delete.
>         (riscv_expand_conditional_move):  Reuse the TARGET_SFB_ALU expand process for TARGET_XTHEADCONDMOV
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/xtheadcondmov-indirect-rv32.c: Update the output.
>         * gcc.target/riscv/xtheadcondmov-indirect-rv64.c: Likewise.
> ---
>  gcc/config/riscv/riscv.cc                     | 44 +++--------------
>  .../riscv/xtheadcondmov-indirect-rv32.c       | 48 +++++++------------
>  .../riscv/xtheadcondmov-indirect-rv64.c       | 48 +++++++------------
>  3 files changed, 42 insertions(+), 98 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 09fc9e5d95e..8b8ac9181ba 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -3442,37 +3442,6 @@ riscv_expand_conditional_branch (rtx label, rtx_code code, rtx op0, rtx op1)
>    emit_jump_insn (gen_condjump (condition, label));
>  }
>
> -/* Helper to emit two one-sided conditional moves for the movecc.  */
> -
> -static void
> -riscv_expand_conditional_move_onesided (rtx dest, rtx cons, rtx alt,
> -                                       rtx_code code, rtx op0, rtx op1)
> -{
> -  machine_mode mode = GET_MODE (dest);
> -
> -  gcc_assert (GET_MODE_CLASS (mode) == MODE_INT);
> -  gcc_assert (reg_or_0_operand (cons, mode));
> -  gcc_assert (reg_or_0_operand (alt, mode));
> -
> -  riscv_emit_int_compare (&code, &op0, &op1, true);
> -  rtx cond = gen_rtx_fmt_ee (code, mode, op0, op1);
> -
> -  rtx tmp1 = gen_reg_rtx (mode);
> -  rtx tmp2 = gen_reg_rtx (mode);
> -
> -  emit_insn (gen_rtx_SET (tmp1, gen_rtx_IF_THEN_ELSE (mode, cond,
> -                                                     cons, const0_rtx)));
> -
> -  /* We need to expand a sequence for both blocks and we do that such,
> -     that the second conditional move will use the inverted condition.
> -     We use temporaries that are or'd to the dest register.  */
> -  cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, mode, op0, op1);
> -  emit_insn (gen_rtx_SET (tmp2, gen_rtx_IF_THEN_ELSE (mode, cond,
> -                                                     alt, const0_rtx)));
> -
> -  emit_insn (gen_rtx_SET (dest, gen_rtx_IOR (mode, tmp1, tmp2)));
> - }
> -
>  /* Emit a cond move: If OP holds, move CONS to DEST; else move ALT to DEST.
>     Return 0 if expansion failed.  */
>
> @@ -3483,6 +3452,7 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
>    rtx_code code = GET_CODE (op);
>    rtx op0 = XEXP (op, 0);
>    rtx op1 = XEXP (op, 1);
> +  bool need_eq_ne_p = false;
>
>    if (TARGET_XTHEADCONDMOV
>        && GET_MODE_CLASS (mode) == MODE_INT
> @@ -3492,14 +3462,12 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
>        && GET_MODE (op0) == mode
>        && GET_MODE (op1) == mode
>        && (code == EQ || code == NE))
> +        need_eq_ne_p = true;
> +
> +  if (need_eq_ne_p || (TARGET_SFB_ALU
> +          && GET_MODE (op0) == word_mode))
>      {
> -      riscv_expand_conditional_move_onesided (dest, cons, alt, code, op0, op1);
> -      return true;
> -    }
> -  else if (TARGET_SFB_ALU
> -          && GET_MODE (op0) == word_mode)
> -    {
> -      riscv_emit_int_compare (&code, &op0, &op1);
> +      riscv_emit_int_compare (&code, &op0, &op1, need_eq_ne_p);
>        rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
>
>        /* The expander allows (const_int 0) for CONS for the benefit of
> diff --git a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv32.c b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv32.c
> index 9afdc2eabfd..e2b135f3d00 100644
> --- a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv32.c
> +++ b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv32.c
> @@ -1,15 +1,13 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -march=rv32gc_xtheadcondmov -mabi=ilp32 -mriscv-attribute" } */
> -/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" } } */
> +/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
>  /* { dg-final { check-function-bodies "**" ""  } } */
>
>  /*
>  **ConEmv_imm_imm_reg:
>  **     addi    a5,a0,-1000
>  **     li      a0,10
> -**     th.mvnez        a0,zero,a5
> -**     th.mveqz        a1,zero,a5
> -**     or      a0,a0,a1
> +**     th.mvnez        a0,a1,a5
>  **     ret
>  */
>  int ConEmv_imm_imm_reg(int x, int y){
> @@ -20,9 +18,8 @@ int ConEmv_imm_imm_reg(int x, int y){
>  /*
>  **ConEmv_imm_reg_reg:
>  **     addi    a5,a0,-1000
> -**     th.mvnez        a1,zero,a5
> -**     th.mveqz        a2,zero,a5
> -**     or      a0,a1,a2
> +**     th.mveqz        a2,a1,a5
> +**     mv      a0,a2
>  **     ret
>  */
>  int ConEmv_imm_reg_reg(int x, int y, int z){
> @@ -34,9 +31,7 @@ int ConEmv_imm_reg_reg(int x, int y, int z){
>  **ConEmv_reg_imm_reg:
>  **     sub     a1,a0,a1
>  **     li      a0,10
> -**     th.mvnez        a0,zero,a1
> -**     th.mveqz        a2,zero,a1
> -**     or      a0,a0,a2
> +**     th.mvnez        a0,a2,a1
>  **     ret
>  */
>  int ConEmv_reg_imm_reg(int x, int y, int z){
> @@ -47,9 +42,8 @@ int ConEmv_reg_imm_reg(int x, int y, int z){
>  /*
>  **ConEmv_reg_reg_reg:
>  **     sub     a1,a0,a1
> -**     th.mvnez        a2,zero,a1
> -**     th.mveqz        a3,zero,a1
> -**     or      a0,a2,a3
> +**     th.mveqz        a3,a2,a1
> +**     mv      a0,a3
>  **     ret
>  */
>  int ConEmv_reg_reg_reg(int x, int y, int z, int n){
> @@ -59,12 +53,10 @@ int ConEmv_reg_reg_reg(int x, int y, int z, int n){
>
>  /*
>  **ConNmv_imm_imm_reg:
> -**     li      a5,9998336
> -**     addi    a4,a0,-1000
> -**     addi    a5,a5,1664
> -**     th.mvnez        a1,zero,a4
> -**     th.mveqz        a5,zero,a4
> -**     or      a0,a1,a5
> +**     addi    a5,a0,-1000
> +**     li      a0,9998336
> +**     addi    a0,a0,1664
> +**     th.mveqz        a0,a1,a5
>  **     ret
>  */
>  int ConNmv_imm_imm_reg(int x, int y){
> @@ -74,10 +66,9 @@ int ConNmv_imm_imm_reg(int x, int y){
>
>  /*
>  **ConNmv_imm_reg_reg:
> -**     addi    a5,a0,-1000
> -**     th.mveqz        a1,zero,a5
> -**     th.mvnez        a2,zero,a5
> -**     or      a0,a1,a2
> +**     addi    a0,a0,-1000
> +**     th.mvnez        a2,a1,a0
> +**     mv      a0,a2
>  **     ret
>  */
>  int ConNmv_imm_reg_reg(int x, int y, int z){
> @@ -89,9 +80,7 @@ int ConNmv_imm_reg_reg(int x, int y, int z){
>  **ConNmv_reg_imm_reg:
>  **     sub     a1,a0,a1
>  **     li      a0,10
> -**     th.mveqz        a0,zero,a1
> -**     th.mvnez        a2,zero,a1
> -**     or      a0,a0,a2
> +**     th.mveqz        a0,a2,a1
>  **     ret
>  */
>  int ConNmv_reg_imm_reg(int x, int y, int z){
> @@ -101,10 +90,9 @@ int ConNmv_reg_imm_reg(int x, int y, int z){
>
>  /*
>  **ConNmv_reg_reg_reg:
> -**     sub     a1,a0,a1
> -**     th.mveqz        a2,zero,a1
> -**     th.mvnez        a3,zero,a1
> -**     or      a0,a2,a3
> +**     sub     a0,a0,a1
> +**     th.mvnez        a3,a2,a0
> +**     mv      a0,a3
>  **     ret
>  */
>  int ConNmv_reg_reg_reg(int x, int y, int z, int n){
> diff --git a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv64.c b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv64.c
> index a1982fd90bd..99956f8496c 100644
> --- a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv64.c
> +++ b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv64.c
> @@ -1,15 +1,13 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -march=rv64gc_xtheadcondmov -mabi=lp64d -mriscv-attribute" } */
> -/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" } } */
> +/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
>  /* { dg-final { check-function-bodies "**" ""  } } */
>
>  /*
>  **ConEmv_imm_imm_reg:
>  **     addi    a5,a0,-1000
>  **     li      a0,10
> -**     th.mvnez        a0,zero,a5
> -**     th.mveqz        a1,zero,a5
> -**     or      a0,a0,a1
> +**     th.mvnez        a0,a1,a5
>  **     ret
>  */
>  int ConEmv_imm_imm_reg(int x, int y){
> @@ -19,10 +17,9 @@ int ConEmv_imm_imm_reg(int x, int y){
>
>  /*
>  **ConEmv_imm_reg_reg:
> -**     addi    a5,a0,-1000
> -**     th.mvnez        a1,zero,a5
> -**     th.mveqz        a2,zero,a5
> -**     or      a0,a1,a2
> +**     addi    a0,a0,-1000
> +**     th.mveqz        a2,a1,a5
> +**     mv      a0,a2
>  **     ret
>  */
>  int ConEmv_imm_reg_reg(int x, int y, int z){
> @@ -34,9 +31,7 @@ int ConEmv_imm_reg_reg(int x, int y, int z){
>  **ConEmv_reg_imm_reg:
>  **     sub     a1,a0,a1
>  **     li      a0,10
> -**     th.mvnez        a0,zero,a1
> -**     th.mveqz        a2,zero,a1
> -**     or      a0,a0,a2
> +**     th.mvnez        a0,a2,a1
>  **     ret
>  */
>  int ConEmv_reg_imm_reg(int x, int y, int z){
> @@ -47,9 +42,8 @@ int ConEmv_reg_imm_reg(int x, int y, int z){
>  /*
>  **ConEmv_reg_reg_reg:
>  **     sub     a1,a0,a1
> -**     th.mvnez        a2,zero,a1
> -**     th.mveqz        a3,zero,a1
> -**     or      a0,a2,a3
> +**     th.mveqz        a3,a2,a1
> +**     mv      a0,a3
>  **     ret
>  */
>  int ConEmv_reg_reg_reg(int x, int y, int z, int n){
> @@ -59,12 +53,10 @@ int ConEmv_reg_reg_reg(int x, int y, int z, int n){
>
>  /*
>  **ConNmv_imm_imm_reg:
> -**     li      a5,9998336
> -**     addi    a4,a0,-1000
> -**     addi    a5,a5,1664
> -**     th.mvnez        a1,zero,a4
> -**     th.mveqz        a5,zero,a4
> -**     or      a0,a1,a5
> +**     addi    a5,a0,-1000
> +**     li      a0,9998336
> +**     addi    a0,a0,1664
> +**     th.mveqz        a0,a1,a5
>  **     ret
>  */
>  int ConNmv_imm_imm_reg(int x, int y){
> @@ -75,9 +67,8 @@ int ConNmv_imm_imm_reg(int x, int y){
>  /*
>  **ConNmv_imm_reg_reg:
>  **     addi    a5,a0,-1000
> -**     th.mveqz        a1,zero,a5
> -**     th.mvnez        a2,zero,a5
> -**     or      a0,a1,a2
> +**     th.mvnez        a2,a1,a0
> +**     mv      a0,a2
>  **     ret
>  */
>  int ConNmv_imm_reg_reg(int x, int y, int z){
> @@ -89,9 +80,7 @@ int ConNmv_imm_reg_reg(int x, int y, int z){
>  **ConNmv_reg_imm_reg:
>  **     sub     a1,a0,a1
>  **     li      a0,10
> -**     th.mveqz        a0,zero,a1
> -**     th.mvnez        a2,zero,a1
> -**     or      a0,a0,a2
> +**     th.mveqz        a0,a2,a1
>  **     ret
>  */
>  int ConNmv_reg_imm_reg(int x, int y, int z){
> @@ -101,10 +90,9 @@ int ConNmv_reg_imm_reg(int x, int y, int z){
>
>  /*
>  **ConNmv_reg_reg_reg:
> -**     sub     a1,a0,a1
> -**     th.mveqz        a2,zero,a1
> -**     th.mvnez        a3,zero,a1
> -**     or      a0,a2,a3
> +**     sub     a0,a0,a1
> +**     th.mvnez        a3,a2,a0
> +**     mv      a0,a3
>  **     ret
>  */
>  int ConNmv_reg_reg_reg(int x, int y, int z, int n){
> --
> 2.17.1
>
  
Jeff Law May 29, 2023, 5:14 p.m. UTC | #4
On 5/25/23 18:52, Die Li wrote:
> This patch allows less instructions to be used when TARGET_XTHEADCONDMOV is enabled.
> 
> Provide an example from the existing testcases.
> 
> Testcase:
> int ConEmv_imm_imm_reg(int x, int y){
>    if (x == 1000) return 10;
>    return y;
> }
> 
> Cflags:
> -O2 -march=rv64gc_xtheadcondmov -mabi=lp64d
> 
> before patch:
> ConEmv_imm_imm_reg:
> 	addi	a5,a0,-1000
> 	li	a0,10
> 	th.mvnez	a0,zero,a5
> 	th.mveqz	a1,zero,a5
> 	or	a0,a0,a1
> 	ret
> 
> after patch:
> ConEmv_imm_imm_reg:
> 	addi	a5,a0,-1000
> 	li	a0,10
> 	th.mvnez	a0,a1,a5
> 	ret
> 
> Signed-off-by: Die Li <lidie@eswincomputing.com>
> 
> gcc/ChangeLog:
> 
>          * config/riscv/riscv.cc (riscv_expand_conditional_move_onesided): Delete.
>          (riscv_expand_conditional_move):  Reuse the TARGET_SFB_ALU expand process for TARGET_XTHEADCONDMOV
> 
> gcc/testsuite/ChangeLog:
> 
>          * gcc.target/riscv/xtheadcondmov-indirect-rv32.c: Update the output.
>          * gcc.target/riscv/xtheadcondmov-indirect-rv64.c: Likewise.
I've made minor formatting adjustments and pushed this to the trunk.


> @@ -3492,14 +3462,12 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
>         && GET_MODE (op0) == mode
>         && GET_MODE (op1) == mode
>         && (code == EQ || code == NE))
> +        need_eq_ne_p = true;
So the need_eq_ne_p should have indented 2 spaces in from the IF statement.

> +
> +  if (need_eq_ne_p || (TARGET_SFB_ALU
> +	   && GET_MODE (op0) == word_mode))This should have been:
   if (need_eq_ne_p
       || (TARGET_SFB_ALU && GET_MODE (op0) == word_mode))

I've fixed this as well.

I would recommend you review the GCC coding guidelines.  While I fixed 
the problems this time, I'll likely ask you to do so in the future.

jeff
  

Patch

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 09fc9e5d95e..8b8ac9181ba 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3442,37 +3442,6 @@  riscv_expand_conditional_branch (rtx label, rtx_code code, rtx op0, rtx op1)
   emit_jump_insn (gen_condjump (condition, label));
 }
 
-/* Helper to emit two one-sided conditional moves for the movecc.  */
-
-static void
-riscv_expand_conditional_move_onesided (rtx dest, rtx cons, rtx alt,
-					rtx_code code, rtx op0, rtx op1)
-{
-  machine_mode mode = GET_MODE (dest);
-
-  gcc_assert (GET_MODE_CLASS (mode) == MODE_INT);
-  gcc_assert (reg_or_0_operand (cons, mode));
-  gcc_assert (reg_or_0_operand (alt, mode));
-
-  riscv_emit_int_compare (&code, &op0, &op1, true);
-  rtx cond = gen_rtx_fmt_ee (code, mode, op0, op1);
-
-  rtx tmp1 = gen_reg_rtx (mode);
-  rtx tmp2 = gen_reg_rtx (mode);
-
-  emit_insn (gen_rtx_SET (tmp1, gen_rtx_IF_THEN_ELSE (mode, cond,
-						      cons, const0_rtx)));
-
-  /* We need to expand a sequence for both blocks and we do that such,
-     that the second conditional move will use the inverted condition.
-     We use temporaries that are or'd to the dest register.  */
-  cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, mode, op0, op1);
-  emit_insn (gen_rtx_SET (tmp2, gen_rtx_IF_THEN_ELSE (mode, cond,
-						      alt, const0_rtx)));
-
-  emit_insn (gen_rtx_SET (dest, gen_rtx_IOR (mode, tmp1, tmp2)));
- }
-
 /* Emit a cond move: If OP holds, move CONS to DEST; else move ALT to DEST.
    Return 0 if expansion failed.  */
 
@@ -3483,6 +3452,7 @@  riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
   rtx_code code = GET_CODE (op);
   rtx op0 = XEXP (op, 0);
   rtx op1 = XEXP (op, 1);
+  bool need_eq_ne_p = false;
 
   if (TARGET_XTHEADCONDMOV
       && GET_MODE_CLASS (mode) == MODE_INT
@@ -3492,14 +3462,12 @@  riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
       && GET_MODE (op0) == mode
       && GET_MODE (op1) == mode
       && (code == EQ || code == NE))
+        need_eq_ne_p = true;
+
+  if (need_eq_ne_p || (TARGET_SFB_ALU
+	   && GET_MODE (op0) == word_mode))
     {
-      riscv_expand_conditional_move_onesided (dest, cons, alt, code, op0, op1);
-      return true;
-    }
-  else if (TARGET_SFB_ALU
-	   && GET_MODE (op0) == word_mode)
-    {
-      riscv_emit_int_compare (&code, &op0, &op1);
+      riscv_emit_int_compare (&code, &op0, &op1, need_eq_ne_p);
       rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
 
       /* The expander allows (const_int 0) for CONS for the benefit of
diff --git a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv32.c b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv32.c
index 9afdc2eabfd..e2b135f3d00 100644
--- a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv32.c
+++ b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv32.c
@@ -1,15 +1,13 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O2 -march=rv32gc_xtheadcondmov -mabi=ilp32 -mriscv-attribute" } */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" } } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
 /* { dg-final { check-function-bodies "**" ""  } } */
 
 /*
 **ConEmv_imm_imm_reg:
 **	addi	a5,a0,-1000
 **	li	a0,10
-**	th.mvnez	a0,zero,a5
-**	th.mveqz	a1,zero,a5
-**	or	a0,a0,a1
+**	th.mvnez	a0,a1,a5
 **	ret
 */
 int ConEmv_imm_imm_reg(int x, int y){
@@ -20,9 +18,8 @@  int ConEmv_imm_imm_reg(int x, int y){
 /*
 **ConEmv_imm_reg_reg:
 **	addi	a5,a0,-1000
-**	th.mvnez	a1,zero,a5
-**	th.mveqz	a2,zero,a5
-**	or	a0,a1,a2
+**	th.mveqz	a2,a1,a5
+**	mv	a0,a2
 **	ret
 */
 int ConEmv_imm_reg_reg(int x, int y, int z){
@@ -34,9 +31,7 @@  int ConEmv_imm_reg_reg(int x, int y, int z){
 **ConEmv_reg_imm_reg:
 **	sub	a1,a0,a1
 **	li	a0,10
-**	th.mvnez	a0,zero,a1
-**	th.mveqz	a2,zero,a1
-**	or	a0,a0,a2
+**	th.mvnez	a0,a2,a1
 **	ret
 */
 int ConEmv_reg_imm_reg(int x, int y, int z){
@@ -47,9 +42,8 @@  int ConEmv_reg_imm_reg(int x, int y, int z){
 /*
 **ConEmv_reg_reg_reg:
 **	sub	a1,a0,a1
-**	th.mvnez	a2,zero,a1
-**	th.mveqz	a3,zero,a1
-**	or	a0,a2,a3
+**	th.mveqz	a3,a2,a1
+**	mv	a0,a3
 **	ret
 */
 int ConEmv_reg_reg_reg(int x, int y, int z, int n){
@@ -59,12 +53,10 @@  int ConEmv_reg_reg_reg(int x, int y, int z, int n){
 
 /*
 **ConNmv_imm_imm_reg:
-**	li	a5,9998336
-**	addi	a4,a0,-1000
-**	addi	a5,a5,1664
-**	th.mvnez	a1,zero,a4
-**	th.mveqz	a5,zero,a4
-**	or	a0,a1,a5
+**	addi	a5,a0,-1000
+**	li	a0,9998336
+**	addi	a0,a0,1664
+**	th.mveqz	a0,a1,a5
 **	ret
 */
 int ConNmv_imm_imm_reg(int x, int y){
@@ -74,10 +66,9 @@  int ConNmv_imm_imm_reg(int x, int y){
 
 /*
 **ConNmv_imm_reg_reg:
-**	addi	a5,a0,-1000
-**	th.mveqz	a1,zero,a5
-**	th.mvnez	a2,zero,a5
-**	or	a0,a1,a2
+**	addi	a0,a0,-1000
+**	th.mvnez	a2,a1,a0
+**	mv	a0,a2
 **	ret
 */
 int ConNmv_imm_reg_reg(int x, int y, int z){
@@ -89,9 +80,7 @@  int ConNmv_imm_reg_reg(int x, int y, int z){
 **ConNmv_reg_imm_reg:
 **	sub	a1,a0,a1
 **	li	a0,10
-**	th.mveqz	a0,zero,a1
-**	th.mvnez	a2,zero,a1
-**	or	a0,a0,a2
+**	th.mveqz	a0,a2,a1
 **	ret
 */
 int ConNmv_reg_imm_reg(int x, int y, int z){
@@ -101,10 +90,9 @@  int ConNmv_reg_imm_reg(int x, int y, int z){
 
 /*
 **ConNmv_reg_reg_reg:
-**	sub	a1,a0,a1
-**	th.mveqz	a2,zero,a1
-**	th.mvnez	a3,zero,a1
-**	or	a0,a2,a3
+**	sub	a0,a0,a1
+**	th.mvnez	a3,a2,a0
+**	mv	a0,a3
 **	ret
 */
 int ConNmv_reg_reg_reg(int x, int y, int z, int n){
diff --git a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv64.c b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv64.c
index a1982fd90bd..99956f8496c 100644
--- a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv64.c
+++ b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-indirect-rv64.c
@@ -1,15 +1,13 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O2 -march=rv64gc_xtheadcondmov -mabi=lp64d -mriscv-attribute" } */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" } } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
 /* { dg-final { check-function-bodies "**" ""  } } */
 
 /*
 **ConEmv_imm_imm_reg:
 **	addi	a5,a0,-1000
 **	li	a0,10
-**	th.mvnez	a0,zero,a5
-**	th.mveqz	a1,zero,a5
-**	or	a0,a0,a1
+**	th.mvnez	a0,a1,a5
 **	ret
 */
 int ConEmv_imm_imm_reg(int x, int y){
@@ -19,10 +17,9 @@  int ConEmv_imm_imm_reg(int x, int y){
 
 /*
 **ConEmv_imm_reg_reg:
-**	addi	a5,a0,-1000
-**	th.mvnez	a1,zero,a5
-**	th.mveqz	a2,zero,a5
-**	or	a0,a1,a2
+**	addi	a0,a0,-1000
+**	th.mveqz	a2,a1,a5
+**	mv	a0,a2
 **	ret
 */
 int ConEmv_imm_reg_reg(int x, int y, int z){
@@ -34,9 +31,7 @@  int ConEmv_imm_reg_reg(int x, int y, int z){
 **ConEmv_reg_imm_reg:
 **	sub	a1,a0,a1
 **	li	a0,10
-**	th.mvnez	a0,zero,a1
-**	th.mveqz	a2,zero,a1
-**	or	a0,a0,a2
+**	th.mvnez	a0,a2,a1
 **	ret
 */
 int ConEmv_reg_imm_reg(int x, int y, int z){
@@ -47,9 +42,8 @@  int ConEmv_reg_imm_reg(int x, int y, int z){
 /*
 **ConEmv_reg_reg_reg:
 **	sub	a1,a0,a1
-**	th.mvnez	a2,zero,a1
-**	th.mveqz	a3,zero,a1
-**	or	a0,a2,a3
+**	th.mveqz	a3,a2,a1
+**	mv	a0,a3
 **	ret
 */
 int ConEmv_reg_reg_reg(int x, int y, int z, int n){
@@ -59,12 +53,10 @@  int ConEmv_reg_reg_reg(int x, int y, int z, int n){
 
 /*
 **ConNmv_imm_imm_reg:
-**	li	a5,9998336
-**	addi	a4,a0,-1000
-**	addi	a5,a5,1664
-**	th.mvnez	a1,zero,a4
-**	th.mveqz	a5,zero,a4
-**	or	a0,a1,a5
+**	addi	a5,a0,-1000
+**	li	a0,9998336
+**	addi	a0,a0,1664
+**	th.mveqz	a0,a1,a5
 **	ret
 */
 int ConNmv_imm_imm_reg(int x, int y){
@@ -75,9 +67,8 @@  int ConNmv_imm_imm_reg(int x, int y){
 /*
 **ConNmv_imm_reg_reg:
 **	addi	a5,a0,-1000
-**	th.mveqz	a1,zero,a5
-**	th.mvnez	a2,zero,a5
-**	or	a0,a1,a2
+**	th.mvnez	a2,a1,a0
+**	mv	a0,a2
 **	ret
 */
 int ConNmv_imm_reg_reg(int x, int y, int z){
@@ -89,9 +80,7 @@  int ConNmv_imm_reg_reg(int x, int y, int z){
 **ConNmv_reg_imm_reg:
 **	sub	a1,a0,a1
 **	li	a0,10
-**	th.mveqz	a0,zero,a1
-**	th.mvnez	a2,zero,a1
-**	or	a0,a0,a2
+**	th.mveqz	a0,a2,a1
 **	ret
 */
 int ConNmv_reg_imm_reg(int x, int y, int z){
@@ -101,10 +90,9 @@  int ConNmv_reg_imm_reg(int x, int y, int z){
 
 /*
 **ConNmv_reg_reg_reg:
-**	sub	a1,a0,a1
-**	th.mveqz	a2,zero,a1
-**	th.mvnez	a3,zero,a1
-**	or	a0,a2,a3
+**	sub	a0,a0,a1
+**	th.mvnez	a3,a2,a0
+**	mv	a0,a3
 **	ret
 */
 int ConNmv_reg_reg_reg(int x, int y, int z, int n){