[V2] RISC-V: Optimize fault only first load
Checks
Commit Message
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
V2 patch for: https://patchwork.sourceware.org/project/gcc/patch/20230330012804.110539-1-juzhe.zhong@rivai.ai/
which has been reviewed.
This patch address Jeff's comment, refine ChangeLog to give more
clear information.
gcc/ChangeLog:
* config/riscv/vector-iterators.md: New unspec to refine fault first load pattern.
* config/riscv/vector.md: Refine fault first load pattern to erase avl from instructions
with the fault first load property.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/vsetvl/ffload-1.c: New test.
* gcc.target/riscv/rvv/vsetvl/ffload-2.c: New test.
* gcc.target/riscv/rvv/vsetvl/ffload-3.c: New test.
* gcc.target/riscv/rvv/vsetvl/ffload-5.c: New test.
* gcc.target/riscv/rvv/vsetvl/ffload-6.c: New test.
* gcc.target/riscv/rvv/vsetvl/ffload-7.c: New test.
---
gcc/config/riscv/vector-iterators.md | 1 +
gcc/config/riscv/vector.md | 10 +++++-
.../gcc.target/riscv/rvv/vsetvl/ffload-1.c | 21 ++++++++++++
.../gcc.target/riscv/rvv/vsetvl/ffload-2.c | 28 ++++++++++++++++
.../gcc.target/riscv/rvv/vsetvl/ffload-3.c | 28 ++++++++++++++++
.../gcc.target/riscv/rvv/vsetvl/ffload-5.c | 29 +++++++++++++++++
.../gcc.target/riscv/rvv/vsetvl/ffload-6.c | 29 +++++++++++++++++
.../gcc.target/riscv/rvv/vsetvl/ffload-7.c | 32 +++++++++++++++++++
8 files changed, 177 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-6.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-7.c
Comments
Committed, thanks :)
On Sun, Apr 23, 2023 at 7:18 PM <juzhe.zhong@rivai.ai> wrote:
>
> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
>
> V2 patch for: https://patchwork.sourceware.org/project/gcc/patch/20230330012804.110539-1-juzhe.zhong@rivai.ai/
> which has been reviewed.
>
> This patch address Jeff's comment, refine ChangeLog to give more
> clear information.
>
> gcc/ChangeLog:
>
> * config/riscv/vector-iterators.md: New unspec to refine fault first load pattern.
> * config/riscv/vector.md: Refine fault first load pattern to erase avl from instructions
> with the fault first load property.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/vsetvl/ffload-1.c: New test.
> * gcc.target/riscv/rvv/vsetvl/ffload-2.c: New test.
> * gcc.target/riscv/rvv/vsetvl/ffload-3.c: New test.
> * gcc.target/riscv/rvv/vsetvl/ffload-5.c: New test.
> * gcc.target/riscv/rvv/vsetvl/ffload-6.c: New test.
> * gcc.target/riscv/rvv/vsetvl/ffload-7.c: New test.
>
> ---
> gcc/config/riscv/vector-iterators.md | 1 +
> gcc/config/riscv/vector.md | 10 +++++-
> .../gcc.target/riscv/rvv/vsetvl/ffload-1.c | 21 ++++++++++++
> .../gcc.target/riscv/rvv/vsetvl/ffload-2.c | 28 ++++++++++++++++
> .../gcc.target/riscv/rvv/vsetvl/ffload-3.c | 28 ++++++++++++++++
> .../gcc.target/riscv/rvv/vsetvl/ffload-5.c | 29 +++++++++++++++++
> .../gcc.target/riscv/rvv/vsetvl/ffload-6.c | 29 +++++++++++++++++
> .../gcc.target/riscv/rvv/vsetvl/ffload-7.c | 32 +++++++++++++++++++
> 8 files changed, 177 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-2.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-3.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-5.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-6.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-7.c
>
> diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
> index 3c6575208be..a8e856161d3 100644
> --- a/gcc/config/riscv/vector-iterators.md
> +++ b/gcc/config/riscv/vector-iterators.md
> @@ -80,6 +80,7 @@
> UNSPEC_VRGATHEREI16
> UNSPEC_VCOMPRESS
> UNSPEC_VLEFF
> + UNSPEC_MODIFY_VL
> ])
>
> (define_mode_iterator V [
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index 0fda11ed67d..959afac2283 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -7414,7 +7414,15 @@
> (unspec:V
> [(match_operand:V 3 "memory_operand" " m, m, m, m")] UNSPEC_VLEFF)
> (match_operand:V 2 "vector_merge_operand" " vu, 0, vu, 0")))
> - (set (reg:SI VL_REGNUM) (unspec:SI [(match_dup 0)] UNSPEC_VLEFF))]
> + (set (reg:SI VL_REGNUM)
> + (unspec:SI
> + [(if_then_else:V
> + (unspec:<VM>
> + [(match_dup 1) (match_dup 4) (match_dup 5)
> + (match_dup 6) (match_dup 7)
> + (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> + (unspec:V [(match_dup 3)] UNSPEC_VLEFF)
> + (match_dup 2))] UNSPEC_MODIFY_VL))]
> "TARGET_VECTOR"
> "vle<sew>ff.v\t%0,%3%p1"
> [(set_attr "type" "vldff")
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-1.c
> new file mode 100644
> index 00000000000..b2b7eafa945
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-1.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void f (int8_t * restrict in, int8_t * restrict out, int n, int cond,size_t *new_vl,size_t *new_vl2)
> +{
> + size_t vl = 101;
> +
> + vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, vl);
> + __riscv_vse8_v_i8mf8 (out, v, vl);
> + vbool64_t mask = __riscv_vlm_v_b64 (in + 100, vl);
> + vint8mf8_t v2 = __riscv_vle8ff_v_i8mf8_tumu (mask, v, in + 100, new_vl, vl);
> + __riscv_vse8_v_i8mf8 (out + 100, v2, *new_vl);
> + v2 = __riscv_vle8ff_v_i8mf8_tumu (mask, v2, in + 200, new_vl2, vl);
> + __riscv_vse8_v_i8mf8 (out + 200, v2, *new_vl2);
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*mu} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {csrr} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-not {vmv} { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-2.c
> new file mode 100644
> index 00000000000..c0e21d461e7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-2.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
> +{
> + size_t vl = 101;
> +
> + for (size_t i = 0; i < n; i++)
> + {
> + vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
> + __riscv_vse8_v_i8mf8 (out + i, v, vl);
> +
> + vbool64_t mask = __riscv_vlm_v_b64 (in + i + 100, vl);
> +
> + vint8mf8_t v2 = __riscv_vle8ff_v_i8mf8_tumu (mask, v, in + i + 100, &vl, vl);
> + __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
> + }
> +
> + for (size_t i = 0; i < n; i++)
> + {
> + vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
> + __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
> + }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-3.c
> new file mode 100644
> index 00000000000..9e90b189bd6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-3.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
> +{
> + size_t vl = 101;
> +
> + for (size_t i = 0; i < n; i++)
> + {
> + vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
> + __riscv_vse8_v_i8mf8 (out + i, v, vl);
> +
> + vbool64_t mask = __riscv_vlm_v_b64 (in + i + 100, vl);
> +
> + vint8mf8_t v2 = __riscv_vle8ff_v_i8mf8_tumu (mask, v, in + i + 100, &vl, vl);
> + __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
> + }
> +
> + for (size_t i = 0; i < m; i++)
> + {
> + vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
> + __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
> + }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-5.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-5.c
> new file mode 100644
> index 00000000000..895180cc54e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-5.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
> +{
> + size_t vl = 101;
> + size_t new_vl;
> +
> + for (size_t i = 0; i < n; i++)
> + {
> + vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
> + __riscv_vse8_v_i8mf8 (out + i, v, vl);
> +
> + vbool64_t mask = __riscv_vlm_v_b64 (in + i + 100, vl);
> +
> + vint8mf8_t v2 = __riscv_vle8ff_v_i8mf8_tumu (mask, v, in + i + 100, &new_vl, vl);
> + __riscv_vse8_v_i8mf8 (out + i + 100, v2, new_vl);
> + }
> +
> + for (size_t i = 0; i < n; i++)
> + {
> + vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, new_vl);
> + __riscv_vse8_v_i8mf8 (out + i + 300, v, new_vl);
> + }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-6.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-6.c
> new file mode 100644
> index 00000000000..1b32f4ab24b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-6.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
> +{
> + size_t vl = 101;
> + size_t new_vl;
> +
> + for (size_t i = 0; i < n; i++)
> + {
> + vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
> + __riscv_vse8_v_i8mf8 (out + i, v, vl);
> +
> + vbool64_t mask = __riscv_vlm_v_b64 (in + i + 100, vl);
> +
> + vint8mf8_t v2 = __riscv_vle8ff_v_i8mf8_tumu (mask, v, in + i + 100, &new_vl, vl);
> + __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
> + }
> +
> + for (size_t i = 0; i < n; i++)
> + {
> + vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, new_vl);
> + __riscv_vse8_v_i8mf8 (out + i + 300, v, new_vl);
> + }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-7.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-7.c
> new file mode 100644
> index 00000000000..1c08b75873d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/ffload-7.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
> +{
> + size_t vl = 101;
> + if (cond)
> + vl = m * 2;
> + else
> + vl = m * 2 * vl;
> +
> + for (size_t i = 0; i < n; i++)
> + {
> + vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
> + __riscv_vse8_v_i8mf8 (out + i, v, vl);
> +
> + vbool64_t mask = __riscv_vlm_v_b64 (in + i + 100, vl);
> +
> + vint8mf8_t v2 = __riscv_vle8ff_v_i8mf8_tumu (mask, v, in + i + 100, &vl, vl);
> + __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
> + }
> +
> + for (size_t i = 0; i < n; i++)
> + {
> + vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
> + __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
> + }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> --
> 2.36.1
>
@@ -80,6 +80,7 @@
UNSPEC_VRGATHEREI16
UNSPEC_VCOMPRESS
UNSPEC_VLEFF
+ UNSPEC_MODIFY_VL
])
(define_mode_iterator V [
@@ -7414,7 +7414,15 @@
(unspec:V
[(match_operand:V 3 "memory_operand" " m, m, m, m")] UNSPEC_VLEFF)
(match_operand:V 2 "vector_merge_operand" " vu, 0, vu, 0")))
- (set (reg:SI VL_REGNUM) (unspec:SI [(match_dup 0)] UNSPEC_VLEFF))]
+ (set (reg:SI VL_REGNUM)
+ (unspec:SI
+ [(if_then_else:V
+ (unspec:<VM>
+ [(match_dup 1) (match_dup 4) (match_dup 5)
+ (match_dup 6) (match_dup 7)
+ (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:V [(match_dup 3)] UNSPEC_VLEFF)
+ (match_dup 2))] UNSPEC_MODIFY_VL))]
"TARGET_VECTOR"
"vle<sew>ff.v\t%0,%3%p1"
[(set_attr "type" "vldff")
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int cond,size_t *new_vl,size_t *new_vl2)
+{
+ size_t vl = 101;
+
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, vl);
+ __riscv_vse8_v_i8mf8 (out, v, vl);
+ vbool64_t mask = __riscv_vlm_v_b64 (in + 100, vl);
+ vint8mf8_t v2 = __riscv_vle8ff_v_i8mf8_tumu (mask, v, in + 100, new_vl, vl);
+ __riscv_vse8_v_i8mf8 (out + 100, v2, *new_vl);
+ v2 = __riscv_vle8ff_v_i8mf8_tumu (mask, v2, in + 200, new_vl2, vl);
+ __riscv_vse8_v_i8mf8 (out + 200, v2, *new_vl2);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*mu} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {csrr} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-not {vmv} { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
+{
+ size_t vl = 101;
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vbool64_t mask = __riscv_vlm_v_b64 (in + i + 100, vl);
+
+ vint8mf8_t v2 = __riscv_vle8ff_v_i8mf8_tumu (mask, v, in + i + 100, &vl, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
+{
+ size_t vl = 101;
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vbool64_t mask = __riscv_vlm_v_b64 (in + i + 100, vl);
+
+ vint8mf8_t v2 = __riscv_vle8ff_v_i8mf8_tumu (mask, v, in + i + 100, &vl, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
+ }
+
+ for (size_t i = 0; i < m; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
+{
+ size_t vl = 101;
+ size_t new_vl;
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vbool64_t mask = __riscv_vlm_v_b64 (in + i + 100, vl);
+
+ vint8mf8_t v2 = __riscv_vle8ff_v_i8mf8_tumu (mask, v, in + i + 100, &new_vl, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v2, new_vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, new_vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v, new_vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
+{
+ size_t vl = 101;
+ size_t new_vl;
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vbool64_t mask = __riscv_vlm_v_b64 (in + i + 100, vl);
+
+ vint8mf8_t v2 = __riscv_vle8ff_v_i8mf8_tumu (mask, v, in + i + 100, &new_vl, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, new_vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v, new_vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t * restrict in, int8_t * restrict out, int n, int m, int cond)
+{
+ size_t vl = 101;
+ if (cond)
+ vl = m * 2;
+ else
+ vl = m * 2 * vl;
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
+ __riscv_vse8_v_i8mf8 (out + i, v, vl);
+
+ vbool64_t mask = __riscv_vlm_v_b64 (in + i + 100, vl);
+
+ vint8mf8_t v2 = __riscv_vle8ff_v_i8mf8_tumu (mask, v, in + i + 100, &vl, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
+ }
+
+ for (size_t i = 0; i < n; i++)
+ {
+ vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
+ __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*mu} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */