RISC-V: convert the mulh with 0 to mov 0 to the reg.
Checks
Commit Message
From: Yanzhang Wang <yanzhang.wang@intel.com>
This patch will optimize the below mulh example,
vint32m1_t shortcut_for_riscv_vmulh_case_0(vint32m1_t v1, size_t vl) {
return __riscv_vmulh_vx_i32m1(v1, 0, vl);
}
from mulh pattern
vsetvli zero, a2, e32, m1, ta, ma
vmulh.vx v24, v24, zero
vs1r.v v24, 0(a0)
to below vmv.
vsetvli zero,a2,e32,m1,ta,ma
vmv.v.i v1,0
vs1r.v v1,0(a0)
It will elimate the mul with const 0 instruction to the simple mov
instruction.
Signed-off-by: Yanzhang Wang <yanzhang.wang@intel.com>
gcc/ChangeLog:
* config/riscv/autovec-opt.md: Add a split pattern.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/binop_vx_constraint-121.c: The mul
with 0 will be simplified to vmv.v.i.
* gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc: New test.
---
gcc/config/riscv/autovec-opt.md | 30 +++++++++++++++++++
.../riscv/rvv/autovec/vmulh-with-zero.cc | 19 ++++++++++++
.../riscv/rvv/base/binop_vx_constraint-121.c | 3 +-
3 files changed, 51 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
Comments
Good catch!
vmulh.vx v24,v24,zero -> vmv.v.i v1,0
can eliminate use of v24 and reduce register pressure.
But I wonder why you pick only VI_QHS?
+ [(set (match_operand:VI_QHS 0 "register_operand")
SEW = 64 should always have such optimization.
Thanks.
juzhe.zhong@rivai.ai
From: yanzhang.wang
Date: 2023-06-21 14:08
To: gcc-patches
CC: juzhe.zhong; kito.cheng; pan2.li; yanzhang.wang
Subject: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.
From: Yanzhang Wang <yanzhang.wang@intel.com>
This patch will optimize the below mulh example,
vint32m1_t shortcut_for_riscv_vmulh_case_0(vint32m1_t v1, size_t vl) {
return __riscv_vmulh_vx_i32m1(v1, 0, vl);
}
from mulh pattern
vsetvli zero, a2, e32, m1, ta, ma
vmulh.vx v24, v24, zero
vs1r.v v24, 0(a0)
to below vmv.
vsetvli zero,a2,e32,m1,ta,ma
vmv.v.i v1,0
vs1r.v v1,0(a0)
It will elimate the mul with const 0 instruction to the simple mov
instruction.
Signed-off-by: Yanzhang Wang <yanzhang.wang@intel.com>
gcc/ChangeLog:
* config/riscv/autovec-opt.md: Add a split pattern.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/binop_vx_constraint-121.c: The mul
with 0 will be simplified to vmv.v.i.
* gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc: New test.
---
gcc/config/riscv/autovec-opt.md | 30 +++++++++++++++++++
.../riscv/rvv/autovec/vmulh-with-zero.cc | 19 ++++++++++++
.../riscv/rvv/base/binop_vx_constraint-121.c | 3 +-
3 files changed, 51 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 28040805b23..9c14be964b5 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -405,3 +405,33 @@
"vmv.x.s\t%0,%1"
[(set_attr "type" "vimovvx")
(set_attr "mode" "<MODE>")])
+
+;; Simplify VMULH (V, 0) Instructions to vmv.v.i.
+(define_split
+ [(set (match_operand:VI_QHS 0 "register_operand")
+ (if_then_else:VI_QHS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_all_trues_mask_operand")
+ (match_operand 5 "vector_length_operand")
+ (match_operand 6 "const_int_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:VI_QHS
+ [(vec_duplicate:VI_QHS
+ (match_operand:<VEL> 4 "reg_or_0_operand"))
+ (match_operand:VI_QHS 3 "register_operand")] VMULH)
+ (match_operand:VI_QHS 2 "vector_merge_operand")))]
+ "TARGET_VECTOR
+ && rtx_equal_p (operands[4], CONST0_RTX (GET_MODE (operands[4])))"
+ [(const_int 0)]
+ {
+ machine_mode mask_mode = riscv_vector::get_mask_mode (<MODE>mode)
+ .require ();
+ emit_insn (gen_pred_mov (<MODE>mode, operands[0], CONST1_RTX (mask_mode),
+ RVV_VUNDEF (<MODE>mode), CONST0_RTX (GET_MODE (operands[0])),
+ operands[5], operands[6], operands[7], operands[8]));
+ DONE;
+ }
+)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
new file mode 100644
index 00000000000..6e4a3d62bc0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+#define VMULH_WITH_LMUL(X) \
+ vint32m##X##_t shortcut_for_riscv_vmulh_case_##X (vint32m##X##_t v1,\
+ size_t vl) { \
+ return __riscv_vmulh_vx_i32m ##X (v1, 0, vl); \
+ }
+
+
+VMULH_WITH_LMUL (1)
+VMULH_WITH_LMUL (2)
+VMULH_WITH_LMUL (4)
+VMULH_WITH_LMUL (8)
+VMULH_WITH_LMUL (f2)
+
+/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 5} */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
index 4d2de91bc14..d1473274137 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
@@ -50,6 +50,7 @@ void f6 (void * in, void *out, int32_t x)
__riscv_vse64_v_i64m1 (out, v3, 4);
}
-/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */
+/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 1 } } */
+/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 1 } } */
/* { dg-final { scan-assembler-times {vdiv\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */
/* { dg-final { scan-assembler-times {vrem\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */
--
2.40.1
+ machine_mode mask_mode = riscv_vector::get_mask_mode (<MODE>mode)
+ .require ();
+ emit_insn (gen_pred_mov (<MODE>mode, operands[0], CONST1_RTX (mask_mode),
+ RVV_VUNDEF (<MODE>mode), CONST0_RTX (GET_MODE (operands[0])),
+ operands[5], operands[6], operands[7], operands[8]));
I don't think you need to get_mask_mode, instead, you can simplify the code as follows:
emit_insn (gen_pred_mov (<MODE>mode, operands[0], CONST1_RTX (<VM>mode),
+ RVV_VUNDEF (<MODE>mode), CONST0_RTX (GET_MODE (operands[0])),
+ operands[5], operands[6], operands[7], operands[8]));
use <VM>mode to get the mask mode.
juzhe.zhong@rivai.ai
From: yanzhang.wang
Date: 2023-06-21 14:08
To: gcc-patches
CC: juzhe.zhong; kito.cheng; pan2.li; yanzhang.wang
Subject: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.
From: Yanzhang Wang <yanzhang.wang@intel.com>
This patch will optimize the below mulh example,
vint32m1_t shortcut_for_riscv_vmulh_case_0(vint32m1_t v1, size_t vl) {
return __riscv_vmulh_vx_i32m1(v1, 0, vl);
}
from mulh pattern
vsetvli zero, a2, e32, m1, ta, ma
vmulh.vx v24, v24, zero
vs1r.v v24, 0(a0)
to below vmv.
vsetvli zero,a2,e32,m1,ta,ma
vmv.v.i v1,0
vs1r.v v1,0(a0)
It will elimate the mul with const 0 instruction to the simple mov
instruction.
Signed-off-by: Yanzhang Wang <yanzhang.wang@intel.com>
gcc/ChangeLog:
* config/riscv/autovec-opt.md: Add a split pattern.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/binop_vx_constraint-121.c: The mul
with 0 will be simplified to vmv.v.i.
* gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc: New test.
---
gcc/config/riscv/autovec-opt.md | 30 +++++++++++++++++++
.../riscv/rvv/autovec/vmulh-with-zero.cc | 19 ++++++++++++
.../riscv/rvv/base/binop_vx_constraint-121.c | 3 +-
3 files changed, 51 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 28040805b23..9c14be964b5 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -405,3 +405,33 @@
"vmv.x.s\t%0,%1"
[(set_attr "type" "vimovvx")
(set_attr "mode" "<MODE>")])
+
+;; Simplify VMULH (V, 0) Instructions to vmv.v.i.
+(define_split
+ [(set (match_operand:VI_QHS 0 "register_operand")
+ (if_then_else:VI_QHS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_all_trues_mask_operand")
+ (match_operand 5 "vector_length_operand")
+ (match_operand 6 "const_int_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:VI_QHS
+ [(vec_duplicate:VI_QHS
+ (match_operand:<VEL> 4 "reg_or_0_operand"))
+ (match_operand:VI_QHS 3 "register_operand")] VMULH)
+ (match_operand:VI_QHS 2 "vector_merge_operand")))]
+ "TARGET_VECTOR
+ && rtx_equal_p (operands[4], CONST0_RTX (GET_MODE (operands[4])))"
+ [(const_int 0)]
+ {
+ machine_mode mask_mode = riscv_vector::get_mask_mode (<MODE>mode)
+ .require ();
+ emit_insn (gen_pred_mov (<MODE>mode, operands[0], CONST1_RTX (mask_mode),
+ RVV_VUNDEF (<MODE>mode), CONST0_RTX (GET_MODE (operands[0])),
+ operands[5], operands[6], operands[7], operands[8]));
+ DONE;
+ }
+)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
new file mode 100644
index 00000000000..6e4a3d62bc0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+#define VMULH_WITH_LMUL(X) \
+ vint32m##X##_t shortcut_for_riscv_vmulh_case_##X (vint32m##X##_t v1,\
+ size_t vl) { \
+ return __riscv_vmulh_vx_i32m ##X (v1, 0, vl); \
+ }
+
+
+VMULH_WITH_LMUL (1)
+VMULH_WITH_LMUL (2)
+VMULH_WITH_LMUL (4)
+VMULH_WITH_LMUL (8)
+VMULH_WITH_LMUL (f2)
+
+/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 5} */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
index 4d2de91bc14..d1473274137 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
@@ -50,6 +50,7 @@ void f6 (void * in, void *out, int32_t x)
__riscv_vse64_v_i64m1 (out, v3, 4);
}
-/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */
+/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 1 } } */
+/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 1 } } */
/* { dg-final { scan-assembler-times {vdiv\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */
/* { dg-final { scan-assembler-times {vrem\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */
--
2.40.1
Hi Yanzhang,
while I appreciate the optimization, I'm a bit wary about just adding a special
case for "0". Is that so common? Wouldn't we also like to have
* pow2_p (val) == << val and others?
* 1 should also be covered.
Regards
Robin
Oh. Yes. Thanks for Robin pointing this.
@yanzhang, could you refine this patch more deeply to gain more optimizations ?
Thanks.
juzhe.zhong@rivai.ai
From: Robin Dapp
Date: 2023-06-21 14:27
To: yanzhang.wang; gcc-patches
CC: rdapp.gcc; juzhe.zhong; kito.cheng; pan2.li
Subject: Re: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.
Hi Yanzhang,
while I appreciate the optimization, I'm a bit wary about just adding a special
case for "0". Is that so common? Wouldn't we also like to have
* pow2_p (val) == << val and others?
* 1 should also be covered.
Regards
Robin
Thanks, you are right. I have not considered the iterator much. I picked it
from one of pred_mulh directly. It should be able to work with VFULL_I.
Yanzhang
From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai>
Sent: Wednesday, June 21, 2023 2:21 PM
To: Wang, Yanzhang <yanzhang.wang@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Kito.cheng <kito.cheng@sifive.com>; Li, Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; Robin Dapp <rdapp.gcc@gmail.com>; jeffreyalaw <jeffreyalaw@gmail.com>
Subject: Re: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.
Good catch!
vmulh.vx v24,v24,zero -> vmv.v.i v1,0
can eliminate use of v24 and reduce register pressure.
But I wonder why you pick only VI_QHS?
+ [(set (match_operand:VI_QHS 0 "register_operand")
SEW = 64 should always have such optimization.
Thanks.
Of cause, I'd like to make it generic. Thanks Robin’s advice! It's right,
there're many similar situations.
But I'm not sure how to distinguish different operations. Currently, the
VMULH is fixed as below.
+ (unspec:VI_QHS
+ [(vec_duplicate:VI_QHS
+ (match_operand:<VEL> 4 "reg_or_0_operand"))
+ (match_operand:VI_QHS 3 "register_operand")] VMULH)
Do we need to define another UNSPEC ? And do we have any APIs to get the
operation, like whether it's VMULH or POW ?
Thanks,
Yanzhang
From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai>
Sent: Wednesday, June 21, 2023 2:33 PM
To: Robin Dapp <rdapp.gcc@gmail.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Robin Dapp <rdapp.gcc@gmail.com>; Kito.cheng <kito.cheng@sifive.com>; Li, Pan2 <pan2.li@intel.com>
Subject: Re: Re: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.
Oh. Yes. Thanks for Robin pointing this.
@yanzhang, could you refine this patch more deeply to gain more optimizations ?
Thanks.
________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>
From: Robin Dapp<mailto:rdapp.gcc@gmail.com>
Date: 2023-06-21 14:27
To: yanzhang.wang<mailto:yanzhang.wang@intel.com>; gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: rdapp.gcc<mailto:rdapp.gcc@gmail.com>; juzhe.zhong<mailto:juzhe.zhong@rivai.ai>; kito.cheng<mailto:kito.cheng@sifive.com>; pan2.li<mailto:pan2.li@intel.com>
Subject: Re: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.
Hi Yanzhang,
while I appreciate the optimization, I'm a bit wary about just adding a special
case for "0". Is that so common? Wouldn't we also like to have
* pow2_p (val) == << val and others?
* 1 should also be covered.
Regards
Robin
No, I don't think we need another UNSPEC.
You just need to modify predicate of (match_operand:<VEL> 4 "reg_or_0_operand")
juzhe.zhong@rivai.ai
From: Wang, Yanzhang
Date: 2023-06-21 15:08
To: juzhe.zhong@rivai.ai; Robin Dapp; gcc-patches
CC: Robin Dapp; Kito.cheng; Li, Pan2
Subject: RE: Re: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.
Of cause, I'd like to make it generic. Thanks Robin’s advice! It's right,
there're many similar situations.
But I'm not sure how to distinguish different operations. Currently, the
VMULH is fixed as below.
+ (unspec:VI_QHS
+ [(vec_duplicate:VI_QHS
+ (match_operand:<VEL> 4 "reg_or_0_operand"))
+ (match_operand:VI_QHS 3 "register_operand")] VMULH)
Do we need to define another UNSPEC ? And do we have any APIs to get the
operation, like whether it's VMULH or POW ?
Thanks,
Yanzhang
From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai>
Sent: Wednesday, June 21, 2023 2:33 PM
To: Robin Dapp <rdapp.gcc@gmail.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Robin Dapp <rdapp.gcc@gmail.com>; Kito.cheng <kito.cheng@sifive.com>; Li, Pan2 <pan2.li@intel.com>
Subject: Re: Re: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.
Oh. Yes. Thanks for Robin pointing this.
@yanzhang, could you refine this patch more deeply to gain more optimizations ?
Thanks.
juzhe.zhong@rivai.ai
From: Robin Dapp
Date: 2023-06-21 14:27
To: yanzhang.wang; gcc-patches
CC: rdapp.gcc; juzhe.zhong; kito.cheng; pan2.li
Subject: Re: [PATCH] RISC-V: convert the mulh with 0 to mov 0 to the reg.
Hi Yanzhang,
while I appreciate the optimization, I'm a bit wary about just adding a special
case for "0". Is that so common? Wouldn't we also like to have
* pow2_p (val) == << val and others?
* 1 should also be covered.
Regards
Robin
@@ -405,3 +405,33 @@
"vmv.x.s\t%0,%1"
[(set_attr "type" "vimovvx")
(set_attr "mode" "<MODE>")])
+
+;; Simplify VMULH (V, 0) Instructions to vmv.v.i.
+(define_split
+ [(set (match_operand:VI_QHS 0 "register_operand")
+ (if_then_else:VI_QHS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_all_trues_mask_operand")
+ (match_operand 5 "vector_length_operand")
+ (match_operand 6 "const_int_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:VI_QHS
+ [(vec_duplicate:VI_QHS
+ (match_operand:<VEL> 4 "reg_or_0_operand"))
+ (match_operand:VI_QHS 3 "register_operand")] VMULH)
+ (match_operand:VI_QHS 2 "vector_merge_operand")))]
+ "TARGET_VECTOR
+ && rtx_equal_p (operands[4], CONST0_RTX (GET_MODE (operands[4])))"
+ [(const_int 0)]
+ {
+ machine_mode mask_mode = riscv_vector::get_mask_mode (<MODE>mode)
+ .require ();
+ emit_insn (gen_pred_mov (<MODE>mode, operands[0], CONST1_RTX (mask_mode),
+ RVV_VUNDEF (<MODE>mode), CONST0_RTX (GET_MODE (operands[0])),
+ operands[5], operands[6], operands[7], operands[8]));
+ DONE;
+ }
+)
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+#define VMULH_WITH_LMUL(X) \
+ vint32m##X##_t shortcut_for_riscv_vmulh_case_##X (vint32m##X##_t v1,\
+ size_t vl) { \
+ return __riscv_vmulh_vx_i32m ##X (v1, 0, vl); \
+ }
+
+
+VMULH_WITH_LMUL (1)
+VMULH_WITH_LMUL (2)
+VMULH_WITH_LMUL (4)
+VMULH_WITH_LMUL (8)
+VMULH_WITH_LMUL (f2)
+
+/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 5} */
@@ -50,6 +50,7 @@ void f6 (void * in, void *out, int32_t x)
__riscv_vse64_v_i64m1 (out, v3, 4);
}
-/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */
+/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 1 } } */
+/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 1 } } */
/* { dg-final { scan-assembler-times {vdiv\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */
/* { dg-final { scan-assembler-times {vrem\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */