@@ -283,6 +283,36 @@
DONE;
})
+;; =========================================================================
+;; == Array Load/Store
+;; =========================================================================
+
+(define_expand "vec_mask_len_load_lanes<mode><vsingle>"
+ [(match_operand:VT 0 "register_operand")
+ (match_operand:VT 1 "memory_operand")
+ (match_operand:<VM> 2 "vector_mask_operand")
+ (match_operand 3 "autovec_length_operand")
+ (match_operand 4 "const_0_operand")]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::expand_lanes_load_store (operands, true);
+ DONE;
+ }
+)
+
+(define_expand "vec_mask_len_store_lanes<mode><vsingle>"
+ [(match_operand:VT 0 "memory_operand")
+ (match_operand:VT 1 "register_operand")
+ (match_operand:<VM> 2 "vector_mask_operand")
+ (match_operand 3 "autovec_length_operand")
+ (match_operand 4 "const_0_operand")]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::expand_lanes_load_store (operands, false);
+ DONE;
+ }
+)
+
;; =========================================================================
;; == Vector creation
;; =========================================================================
@@ -610,27 +640,37 @@
[(set_attr "type" "vext")
(set_attr "mode" "<MODE>")])
-(define_expand "<optab><v_quad_trunc><mode>2"
- [(set (match_operand:VQEXTI 0 "register_operand")
+(define_insn_and_split "<optab><v_quad_trunc><mode>2"
+ [(set (match_operand:VQEXTI 0 "register_operand" "=&vr")
(any_extend:VQEXTI
- (match_operand:<V_QUAD_TRUNC> 1 "register_operand")))]
- "TARGET_VECTOR"
+ (match_operand:<V_QUAD_TRUNC> 1 "register_operand" "vr")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
{
insn_code icode = code_for_pred_vf4 (<CODE>, <MODE>mode);
riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands);
DONE;
-})
+}
+ [(set_attr "type" "vext")
+ (set_attr "mode" "<MODE>")])
-(define_expand "<optab><v_oct_trunc><mode>2"
- [(set (match_operand:VOEXTI 0 "register_operand")
+(define_insn_and_split "<optab><v_oct_trunc><mode>2"
+ [(set (match_operand:VOEXTI 0 "register_operand" "=&vr")
(any_extend:VOEXTI
- (match_operand:<V_OCT_TRUNC> 1 "register_operand")))]
- "TARGET_VECTOR"
+ (match_operand:<V_OCT_TRUNC> 1 "register_operand" "vr")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
{
insn_code icode = code_for_pred_vf8 (<CODE>, <MODE>mode);
riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands);
DONE;
-})
+}
+ [(set_attr "type" "vext")
+ (set_attr "mode" "<MODE>")])
;; -------------------------------------------------------------------------
;; ---- [INT] Truncation
@@ -325,6 +325,7 @@ void expand_load_store (rtx *, bool);
void expand_gather_scatter (rtx *, bool);
void expand_cond_len_ternop (unsigned, rtx *);
void prepare_ternary_operands (rtx *, bool = false);
+void expand_lanes_load_store (rtx *, bool);
/* Rounding mode bitfield for fixed point VXRM. */
enum fixed_point_rounding_mode
@@ -1900,7 +1900,13 @@ get_avl_type_rtx (enum avl_type type)
machine_mode
get_mask_mode (machine_mode mode)
{
- return get_vector_mode (BImode, GET_MODE_NUNITS (mode)).require();
+ poly_int64 nunits = GET_MODE_NUNITS (mode);
+ if (riscv_v_ext_tuple_mode_p (mode))
+ {
+ unsigned int nf = get_nf (mode);
+ nunits = exact_div (nunits, nf);
+ }
+ return get_vector_mode (BImode, nunits).require ();
}
/* Return the appropriate M1 mode for MODE. */
@@ -3716,4 +3722,48 @@ prepare_ternary_operands (rtx *ops, bool split_p)
}
}
+/* Expand VEC_MASK_LEN_{LOAD_LANES,STORE_LANES}. */
+void
+expand_lanes_load_store (rtx *ops, bool is_load)
+{
+ poly_int64 value;
+ rtx mask = ops[2];
+ rtx len = ops[3];
+ rtx addr = is_load ? XEXP (ops[1], 0) : XEXP (ops[0], 0);
+ rtx reg = is_load ? ops[0] : ops[1];
+ machine_mode mode = GET_MODE (ops[0]);
+
+ if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode)))
+ {
+ /* If the length operand is equal to VF, it is VLMAX load/store. */
+ if (is_load)
+ {
+ rtx m_ops[] = {reg, mask, RVV_VUNDEF (mode), addr};
+ emit_vlmax_masked_insn (code_for_pred_unit_strided_load (mode),
+ RVV_UNOP_M, m_ops);
+ }
+ else
+ {
+ len = gen_reg_rtx (Pmode);
+ emit_vlmax_vsetvl (mode, len);
+ emit_insn (gen_pred_unit_strided_store (mode, mask, addr, reg, len,
+ get_avl_type_rtx (VLMAX)));
+ }
+ }
+ else
+ {
+ if (!satisfies_constraint_K (len))
+ len = force_reg (Pmode, len);
+ if (is_load)
+ {
+ rtx m_ops[] = {reg, mask, RVV_VUNDEF (mode), addr};
+ emit_nonvlmax_masked_insn (code_for_pred_unit_strided_load (mode),
+ RVV_UNOP_M, m_ops, len);
+ }
+ else
+ emit_insn (gen_pred_unit_strided_store (mode, mask, addr, reg, len,
+ get_avl_type_rtx (NONVLMAX)));
+ }
+}
+
} // namespace riscv_vector
@@ -1417,6 +1417,101 @@
(V1DF "df") (V2DF "df") (V4DF "df") (V8DF "df") (V16DF "df") (V32DF "df") (V64DF "df") (V128DF "df") (V256DF "df") (V512DF "df")
])
+(define_mode_attr vsingle [
+ (RVVM1x8QI "rvvm1qi") (RVVMF2x8QI "rvvmf2qi") (RVVMF4x8QI "rvvmf4qi") (RVVMF8x8QI "rvvmf8qi")
+ (RVVM1x7QI "rvvm1qi") (RVVMF2x7QI "rvvmf2qi") (RVVMF4x7QI "rvvmf4qi") (RVVMF8x7QI "rvvmf8qi")
+ (RVVM1x6QI "rvvm1qi") (RVVMF2x6QI "rvvmf2qi") (RVVMF4x6QI "rvvmf4qi") (RVVMF8x6QI "rvvmf8qi")
+ (RVVM1x5QI "rvvm1qi") (RVVMF2x5QI "rvvmf2qi") (RVVMF4x5QI "rvvmf4qi") (RVVMF8x5QI "rvvmf8qi")
+ (RVVM2x4QI "rvvm2qi") (RVVM1x4QI "rvvm1qi") (RVVMF2x4QI "rvvmf2qi") (RVVMF4x4QI "rvvmf4qi") (RVVMF8x4QI "rvvmf8qi")
+ (RVVM2x3QI "rvvm2qi") (RVVM1x3QI "rvvm1qi") (RVVMF2x3QI "rvvmf2qi") (RVVMF4x3QI "rvvmf4qi") (RVVMF8x3QI "rvvmf8qi")
+ (RVVM4x2QI "rvvm4qi") (RVVM2x2QI "rvvm1qi") (RVVM1x2QI "rvvm1qi") (RVVMF2x2QI "rvvmf2qi") (RVVMF4x2QI "rvvmf4qi") (RVVMF8x2QI "rvvmf8qi")
+
+ (RVVM1x8HI "rvvm1hi") (RVVMF2x8HI "rvvmf2hi") (RVVMF4x8HI "rvvmf4hi")
+ (RVVM1x7HI "rvvm1hi") (RVVMF2x7HI "rvvmf2hi") (RVVMF4x7HI "rvvmf4hi")
+ (RVVM1x6HI "rvvm1hi") (RVVMF2x6HI "rvvmf2hi") (RVVMF4x6HI "rvvmf4hi")
+ (RVVM1x5HI "rvvm1hi") (RVVMF2x5HI "rvvmf2hi") (RVVMF4x5HI "rvvmf4hi")
+ (RVVM2x4HI "rvvm2hi") (RVVM1x4HI "rvvm1hi") (RVVMF2x4HI "rvvmf2hi") (RVVMF4x4HI "rvvmf4hi")
+ (RVVM2x3HI "rvvm2hi") (RVVM1x3HI "rvvm1hi") (RVVMF2x3HI "rvvmf2hi") (RVVMF4x3HI "rvvmf4hi")
+ (RVVM4x2HI "rvvm4hi") (RVVM2x2HI "rvvm2hi") (RVVM1x2HI "rvvm1hi") (RVVMF2x2HI "rvvmf2hi") (RVVMF4x2HI "rvvmf4hi")
+
+ (RVVM1x8HF "rvvm1hf")
+ (RVVMF2x8HF "rvvmf2hf")
+ (RVVMF4x8HF "rvvmf4hf")
+ (RVVM1x7HF "rvvm1hf")
+ (RVVMF2x7HF "rvvmf2hf")
+ (RVVMF4x7HF "rvvmf4hf")
+ (RVVM1x6HF "rvvm1hf")
+ (RVVMF2x6HF "rvvmf2hf")
+ (RVVMF4x6HF "rvvmf4hf")
+ (RVVM1x5HF "rvvm1hf")
+ (RVVMF2x5HF "rvvmf2hf")
+ (RVVMF4x5HF "rvvmf4hf")
+ (RVVM2x4HF "rvvm2hf")
+ (RVVM1x4HF "rvvm1hf")
+ (RVVMF2x4HF "rvvmf2hf")
+ (RVVMF4x4HF "rvvmf4hf")
+ (RVVM2x3HF "rvvm2hf")
+ (RVVM1x3HF "rvvm1hf")
+ (RVVMF2x3HF "rvvmf2hf")
+ (RVVMF4x3HF "rvvmf4hf")
+ (RVVM4x2HF "rvvm4hf")
+ (RVVM2x2HF "rvvm2hf")
+ (RVVM1x2HF "rvvm1hf")
+ (RVVMF2x2HF "rvvmf2hf")
+ (RVVMF4x2HF "rvvmf4hf")
+
+ (RVVM1x8SI "rvvm1si") (RVVMF2x8SI "rvvmf2si")
+ (RVVM1x7SI "rvvm1si") (RVVMF2x7SI "rvvmf2si")
+ (RVVM1x6SI "rvvm1si") (RVVMF2x6SI "rvvmf2si")
+ (RVVM1x5SI "rvvm1si") (RVVMF2x5SI "rvvmf2si")
+ (RVVM2x4SI "rvvm2si") (RVVM1x4SI "rvvm1si") (RVVMF2x4SI "rvvmf2si")
+ (RVVM2x3SI "rvvm2si") (RVVM1x3SI "rvvm1si") (RVVMF2x3SI "rvvmf2si")
+ (RVVM4x2SI "rvvm4si") (RVVM2x2SI "rvvm2si") (RVVM1x2SI "rvvm1si") (RVVMF2x2SI "rvvmf2si")
+
+ (RVVM1x8SF "rvvm1sf")
+ (RVVMF2x8SF "rvvmf2sf")
+ (RVVM1x7SF "rvvm1sf")
+ (RVVMF2x7SF "rvvmf2sf")
+ (RVVM1x6SF "rvvm1sf")
+ (RVVMF2x6SF "rvvmf2sf")
+ (RVVM1x5SF "rvvm1sf")
+ (RVVMF2x5SF "rvvmf2sf")
+ (RVVM2x4SF "rvvm2sf")
+ (RVVM1x4SF "rvvm1sf")
+ (RVVMF2x4SF "rvvmf2sf")
+ (RVVM2x3SF "rvvm2sf")
+ (RVVM1x3SF "rvvm1sf")
+ (RVVMF2x3SF "rvvmf2sf")
+ (RVVM4x2SF "rvvm4sf")
+ (RVVM2x2SF "rvvm2sf")
+ (RVVM1x2SF "rvvm1sf")
+ (RVVMF2x2SF "rvvmf2sf")
+
+ (RVVM1x8DI "rvvm1di")
+ (RVVM1x7DI "rvvm1di")
+ (RVVM1x6DI "rvvm1di")
+ (RVVM1x5DI "rvvm1di")
+ (RVVM2x4DI "rvvm2di")
+ (RVVM1x4DI "rvvm1di")
+ (RVVM2x3DI "rvvm2di")
+ (RVVM1x3DI "rvvm1di")
+ (RVVM4x2DI "rvvm4di")
+ (RVVM2x2DI "rvvm2di")
+ (RVVM1x2DI "rvvm1di")
+
+ (RVVM1x8DF "rvvm1df")
+ (RVVM1x7DF "rvvm1df")
+ (RVVM1x6DF "rvvm1df")
+ (RVVM1x5DF "rvvm1df")
+ (RVVM2x4DF "rvvm2df")
+ (RVVM1x4DF "rvvm1df")
+ (RVVM2x3DF "rvvm2df")
+ (RVVM1x3DF "rvvm1df")
+ (RVVM4x2DF "rvvm4df")
+ (RVVM2x2DF "rvvm2df")
+ (RVVM1x2DF "rvvm1df")
+])
+
(define_mode_attr VSUBEL [
(RVVM8HI "QI") (RVVM4HI "QI") (RVVM2HI "QI") (RVVM1HI "QI") (RVVMF2HI "QI") (RVVMF4HI "QI")
@@ -40,6 +40,6 @@
TEST_ALL (TEST_LOOP)
-/* { dg-final { scan-tree-dump-times " \.MASK_LEN_GATHER_LOAD" 46 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \.MASK_LEN_GATHER_LOAD" 33 "optimized" } } */
/* { dg-final { scan-tree-dump-not " \.GATHER_LOAD" "optimized" } } */
/* { dg-final { scan-tree-dump-not " \.MASK_GATHER_LOAD" "optimized" } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fno-vect-cost-model -fdump-tree-optimized-details" } */
#include <stdint-gcc.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fno-vect-cost-model -fdump-tree-optimized-details" } */
#include <stdint-gcc.h>
new file mode 100644
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
+ MASKTYPE *__restrict cond, intptr_t n) \
+ { \
+ for (intptr_t i = 0; i < n; ++i) \
+ if (cond[i]) \
+ dest[i] = src[i * 2] + src[i * 2 + 1]; \
+ }
+
+#define TEST2(NAME, OUTTYPE, INTYPE) \
+ TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, int8_t) \
+ TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, uint16_t) \
+ TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \
+ TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double)
+
+#define TEST1(NAME, OUTTYPE) \
+ TEST2 (NAME##_i8, OUTTYPE, int8_t) \
+ TEST2 (NAME##_i16, OUTTYPE, uint16_t) \
+ TEST2 (NAME##_i32, OUTTYPE, int32_t) \
+ TEST2 (NAME##_i64, OUTTYPE, uint64_t)
+
+#define TEST(NAME) \
+ TEST1 (NAME##_i8, int8_t) \
+ TEST1 (NAME##_i16, uint16_t) \
+ TEST1 (NAME##_i32, int32_t) \
+ TEST1 (NAME##_i64, uint64_t) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
+ TEST2 (NAME##_f32_f32, float, float) \
+ TEST2 (NAME##_f64_f64, double, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vlseg2e8\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 16 } } */
+/* { dg-final { scan-assembler-times {vlseg2e16\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vlseg2e32\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vlseg2e64\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
new file mode 100644
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
+ MASKTYPE *__restrict cond, intptr_t n) \
+ { \
+ for (intptr_t i = 0; i < n; ++i) \
+ if (cond[i]) \
+ dest[i] = (src[i * 3] \
+ + src[i * 3 + 1] \
+ + src[i * 3 + 2]); \
+ }
+
+#define TEST2(NAME, OUTTYPE, INTYPE) \
+ TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, int8_t) \
+ TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, uint16_t) \
+ TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \
+ TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double)
+
+#define TEST1(NAME, OUTTYPE) \
+ TEST2 (NAME##_i8, OUTTYPE, int8_t) \
+ TEST2 (NAME##_i16, OUTTYPE, uint16_t) \
+ TEST2 (NAME##_i32, OUTTYPE, int32_t) \
+ TEST2 (NAME##_i64, OUTTYPE, uint64_t)
+
+#define TEST(NAME) \
+ TEST1 (NAME##_i8, int8_t) \
+ TEST1 (NAME##_i16, uint16_t) \
+ TEST1 (NAME##_i32, int32_t) \
+ TEST1 (NAME##_i64, uint64_t) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
+ TEST2 (NAME##_f32_f32, float, float) \
+ TEST2 (NAME##_f64_f64, double, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vlseg3e8\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 16 } } */
+/* { dg-final { scan-assembler-times {vlseg3e16\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vlseg3e32\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vlseg3e64\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
+ MASKTYPE *__restrict cond, intptr_t n) \
+ { \
+ for (intptr_t i = 0; i < n; ++i) \
+ if (cond[i]) \
+ dest[i] = (src[i * 4] \
+ + src[i * 4 + 1] \
+ + src[i * 4 + 2] \
+ + src[i * 4 + 3]); \
+ }
+
+#define TEST2(NAME, OUTTYPE, INTYPE) \
+ TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, int8_t) \
+ TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, uint16_t) \
+ TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \
+ TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double)
+
+#define TEST1(NAME, OUTTYPE) \
+ TEST2 (NAME##_i8, OUTTYPE, int8_t) \
+ TEST2 (NAME##_i16, OUTTYPE, uint16_t) \
+ TEST2 (NAME##_i32, OUTTYPE, int32_t) \
+ TEST2 (NAME##_i64, OUTTYPE, uint64_t)
+
+#define TEST(NAME) \
+ TEST1 (NAME##_i8, int8_t) \
+ TEST1 (NAME##_i16, uint16_t) \
+ TEST1 (NAME##_i32, int32_t) \
+ TEST1 (NAME##_i64, uint64_t) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
+ TEST2 (NAME##_f32_f32, float, float) \
+ TEST2 (NAME##_f64_f64, double, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vlseg4e8\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 16 } } */
+/* { dg-final { scan-assembler-times {vlseg4e16\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vlseg4e32\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vlseg4e64\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME##_5 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
+ MASKTYPE *__restrict cond, intptr_t n) \
+ { \
+ for (intptr_t i = 0; i < n; ++i) \
+ if (cond[i]) \
+ dest[i] = (src[i * 5] + src[i * 5 + 1] + src[i * 5 + 2] \
+ + src[i * 5 + 3] + src[i * 5 + 4]); \
+ }
+
+#define TEST2(NAME, OUTTYPE, INTYPE) \
+ TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, int8_t) \
+ TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, uint16_t) \
+ TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \
+ TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double)
+
+#define TEST1(NAME, OUTTYPE) \
+ TEST2 (NAME##_i8, OUTTYPE, int8_t) \
+ TEST2 (NAME##_i16, OUTTYPE, uint16_t) \
+ TEST2 (NAME##_i32, OUTTYPE, int32_t) \
+ TEST2 (NAME##_i64, OUTTYPE, uint64_t)
+
+#define TEST(NAME) \
+ TEST1 (NAME##_i8, int8_t) \
+ TEST1 (NAME##_i16, uint16_t) \
+ TEST1 (NAME##_i32, int32_t) \
+ TEST1 (NAME##_i64, uint64_t) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
+ TEST2 (NAME##_f32_f32, float, float) \
+ TEST2 (NAME##_f64_f64, double, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vlseg5e8\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 16 } } */
+/* { dg-final { scan-assembler-times {vlseg5e16\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vlseg5e32\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vlseg5e64\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME##_6 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
+ MASKTYPE *__restrict cond, intptr_t n) \
+ { \
+ for (intptr_t i = 0; i < n; ++i) \
+ if (cond[i]) \
+ dest[i] = (src[i * 6] + src[i * 6 + 1] + src[i * 6 + 2] \
+ + src[i * 6 + 3] + src[i * 6 + 4] + src[i * 6 + 5]); \
+ }
+
+#define TEST2(NAME, OUTTYPE, INTYPE) \
+ TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, int8_t) \
+ TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, uint16_t) \
+ TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \
+ TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double)
+
+#define TEST1(NAME, OUTTYPE) \
+ TEST2 (NAME##_i8, OUTTYPE, int8_t) \
+ TEST2 (NAME##_i16, OUTTYPE, uint16_t) \
+ TEST2 (NAME##_i32, OUTTYPE, int32_t) \
+ TEST2 (NAME##_i64, OUTTYPE, uint64_t)
+
+#define TEST(NAME) \
+ TEST1 (NAME##_i8, int8_t) \
+ TEST1 (NAME##_i16, uint16_t) \
+ TEST1 (NAME##_i32, int32_t) \
+ TEST1 (NAME##_i64, uint64_t) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
+ TEST2 (NAME##_f32_f32, float, float) \
+ TEST2 (NAME##_f64_f64, double, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vlseg6e8\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 16 } } */
+/* { dg-final { scan-assembler-times {vlseg6e16\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vlseg6e32\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vlseg6e64\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
new file mode 100644
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME##_7 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
+ MASKTYPE *__restrict cond, intptr_t n) \
+ { \
+ for (intptr_t i = 0; i < n; ++i) \
+ if (cond[i]) \
+ dest[i] \
+ = (src[i * 7] + src[i * 7 + 1] + src[i * 7 + 2] + src[i * 7 + 3] \
+ + src[i * 7 + 4] + src[i * 7 + 5] + src[i * 7 + 6]); \
+ }
+
+#define TEST2(NAME, OUTTYPE, INTYPE) \
+ TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, int8_t) \
+ TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, uint16_t) \
+ TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \
+ TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double)
+
+#define TEST1(NAME, OUTTYPE) \
+ TEST2 (NAME##_i8, OUTTYPE, int8_t) \
+ TEST2 (NAME##_i16, OUTTYPE, uint16_t) \
+ TEST2 (NAME##_i32, OUTTYPE, int32_t) \
+ TEST2 (NAME##_i64, OUTTYPE, uint64_t)
+
+#define TEST(NAME) \
+ TEST1 (NAME##_i8, int8_t) \
+ TEST1 (NAME##_i16, uint16_t) \
+ TEST1 (NAME##_i32, int32_t) \
+ TEST1 (NAME##_i64, uint64_t) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
+ TEST2 (NAME##_f32_f32, float, float) \
+ TEST2 (NAME##_f64_f64, double, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vlseg7e8\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 16 } } */
+/* { dg-final { scan-assembler-times {vlseg7e16\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vlseg7e32\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vlseg7e64\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
new file mode 100644
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME##_8 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
+ MASKTYPE *__restrict cond, intptr_t n) \
+ { \
+ for (intptr_t i = 0; i < n; ++i) \
+ if (cond[i]) \
+ dest[i] = (src[i * 8] + src[i * 8 + 1] + src[i * 8 + 2] \
+ + src[i * 8 + 3] + src[i * 8 + 4] + src[i * 8 + 5] \
+ + src[i * 8 + 6] + src[i * 8 + 7]); \
+ }
+
+#define TEST2(NAME, OUTTYPE, INTYPE) \
+ TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, int8_t) \
+ TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, uint16_t) \
+ TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \
+ TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double)
+
+#define TEST1(NAME, OUTTYPE) \
+ TEST2 (NAME##_i8, OUTTYPE, int8_t) \
+ TEST2 (NAME##_i16, OUTTYPE, uint16_t) \
+ TEST2 (NAME##_i32, OUTTYPE, int32_t) \
+ TEST2 (NAME##_i64, OUTTYPE, uint64_t)
+
+#define TEST(NAME) \
+ TEST1 (NAME##_i8, int8_t) \
+ TEST1 (NAME##_i16, uint16_t) \
+ TEST1 (NAME##_i32, int32_t) \
+ TEST1 (NAME##_i64, uint64_t) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
+ TEST2 (NAME##_f32_f32, float, float) \
+ TEST2 (NAME##_f64_f64, double, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vlseg8e8\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 16 } } */
+/* { dg-final { scan-assembler-times {vlseg8e16\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vlseg8e32\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vlseg8e64\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "mask_struct_load-1.c"
+
+#define N 100
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ { \
+ OUTTYPE out[N]; \
+ INTYPE in[N * 2]; \
+ MASKTYPE mask[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 2; ++i) \
+ in[i] = i * 9 / 2; \
+ NAME##_2 (out, in, mask, N); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ OUTTYPE if_true = in[i * 2] + in[i * 2 + 1]; \
+ OUTTYPE if_false = i * 7 / 2; \
+ if (out[i] != (mask[i] ? if_true : if_false)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,40 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "mask_struct_load-2.c"
+
+#define N 100
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ { \
+ OUTTYPE out[N]; \
+ INTYPE in[N * 3]; \
+ MASKTYPE mask[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 3; ++i) \
+ in[i] = i * 9 / 2; \
+ NAME##_3 (out, in, mask, N); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ OUTTYPE if_true = (in[i * 3] \
+ + in[i * 3 + 1] \
+ + in[i * 3 + 2]); \
+ OUTTYPE if_false = i * 7 / 2; \
+ if (out[i] != (mask[i] ? if_true : if_false)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,41 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "mask_struct_load-3.c"
+
+#define N 100
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ { \
+ OUTTYPE out[N]; \
+ INTYPE in[N * 4]; \
+ MASKTYPE mask[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 4; ++i) \
+ in[i] = i * 9 / 2; \
+ NAME##_4 (out, in, mask, N); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ OUTTYPE if_true = (in[i * 4] \
+ + in[i * 4 + 1] \
+ + in[i * 4 + 2] \
+ + in[i * 4 + 3]); \
+ OUTTYPE if_false = i * 7 / 2; \
+ if (out[i] != (mask[i] ? if_true : if_false)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,42 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "mask_struct_load-4.c"
+
+#define N 100
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ { \
+ OUTTYPE out[N]; \
+ INTYPE in[N * 5]; \
+ MASKTYPE mask[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 5; ++i) \
+ in[i] = i * 9 / 2; \
+ NAME##_5 (out, in, mask, N); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ OUTTYPE if_true = (in[i * 5] \
+ + in[i * 5 + 1] \
+ + in[i * 5 + 2] \
+ + in[i * 5 + 3] \
+ + in[i * 5 + 4]); \
+ OUTTYPE if_false = i * 7 / 2; \
+ if (out[i] != (mask[i] ? if_true : if_false)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "mask_struct_load-5.c"
+
+#define N 100
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ { \
+ OUTTYPE out[N]; \
+ INTYPE in[N * 6]; \
+ MASKTYPE mask[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 6; ++i) \
+ in[i] = i * 9 / 2; \
+ NAME##_6 (out, in, mask, N); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ OUTTYPE if_true = (in[i * 6] \
+ + in[i * 6 + 1] \
+ + in[i * 6 + 2] \
+ + in[i * 6 + 3] \
+ + in[i * 6 + 4] \
+ + in[i * 6 + 5]); \
+ OUTTYPE if_false = i * 7 / 2; \
+ if (out[i] != (mask[i] ? if_true : if_false)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,44 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "mask_struct_load-6.c"
+
+#define N 100
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ { \
+ OUTTYPE out[N]; \
+ INTYPE in[N * 7]; \
+ MASKTYPE mask[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 7; ++i) \
+ in[i] = i * 9 / 2; \
+ NAME##_7 (out, in, mask, N); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ OUTTYPE if_true = (in[i * 7] \
+ + in[i * 7 + 1] \
+ + in[i * 7 + 2] \
+ + in[i * 7 + 3] \
+ + in[i * 7 + 4] \
+ + in[i * 7 + 5] \
+ + in[i * 7 + 6]); \
+ OUTTYPE if_false = i * 7 / 2; \
+ if (out[i] != (mask[i] ? if_true : if_false)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "mask_struct_load-7.c"
+
+#define N 100
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ { \
+ OUTTYPE out[N]; \
+ INTYPE in[N * 8]; \
+ MASKTYPE mask[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 8; ++i) \
+ in[i] = i * 9 / 2; \
+ NAME##_8 (out, in, mask, N); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ OUTTYPE if_true = (in[i * 8] \
+ + in[i * 8 + 1] \
+ + in[i * 8 + 2] \
+ + in[i * 8 + 3] \
+ + in[i * 8 + 4] \
+ + in[i * 8 + 5] \
+ + in[i * 8 + 6] \
+ + in[i * 8 + 7]); \
+ OUTTYPE if_false = i * 7 / 2; \
+ if (out[i] != (mask[i] ? if_true : if_false)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
+ MASKTYPE *__restrict cond, INTYPE bias, intptr_t n) \
+ { \
+ for (intptr_t i = 0; i < n; ++i) \
+ { \
+ INTYPE value = src[i] + bias; \
+ if (cond[i]) \
+ { \
+ dest[i * 2] = value; \
+ dest[i * 2 + 1] = value; \
+ } \
+ } \
+ }
+
+#define TEST2(NAME, OUTTYPE, INTYPE) \
+ TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, int8_t) \
+ TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, uint16_t) \
+ TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \
+ TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double)
+
+#define TEST1(NAME, OUTTYPE) \
+ TEST2 (NAME##_i8, OUTTYPE, int8_t) \
+ TEST2 (NAME##_i16, OUTTYPE, uint16_t) \
+ TEST2 (NAME##_i32, OUTTYPE, int32_t) \
+ TEST2 (NAME##_i64, OUTTYPE, uint64_t)
+
+#define TEST(NAME) \
+ TEST1 (NAME##_i8, int8_t) \
+ TEST1 (NAME##_i16, uint16_t) \
+ TEST1 (NAME##_i32, int32_t) \
+ TEST1 (NAME##_i64, uint64_t) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
+ TEST2 (NAME##_f32_f32, float, float) \
+ TEST2 (NAME##_f64_f64, double, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vsseg2e8\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 16 } } */
+/* { dg-final { scan-assembler-times {vsseg2e16\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vsseg2e32\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vsseg2e64\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
new file mode 100644
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
+ MASKTYPE *__restrict cond, INTYPE bias, intptr_t n) \
+ { \
+ for (intptr_t i = 0; i < n; ++i) \
+ { \
+ INTYPE value = src[i] + bias; \
+ if (cond[i]) \
+ { \
+ dest[i * 3] = value; \
+ dest[i * 3 + 1] = value; \
+ dest[i * 3 + 2] = value; \
+ } \
+ } \
+ }
+
+#define TEST2(NAME, OUTTYPE, INTYPE) \
+ TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, int8_t) \
+ TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, uint16_t) \
+ TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \
+ TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double)
+
+#define TEST1(NAME, OUTTYPE) \
+ TEST2 (NAME##_i8, OUTTYPE, int8_t) \
+ TEST2 (NAME##_i16, OUTTYPE, uint16_t) \
+ TEST2 (NAME##_i32, OUTTYPE, int32_t) \
+ TEST2 (NAME##_i64, OUTTYPE, uint64_t)
+
+#define TEST(NAME) \
+ TEST1 (NAME##_i8, int8_t) \
+ TEST1 (NAME##_i16, uint16_t) \
+ TEST1 (NAME##_i32, int32_t) \
+ TEST1 (NAME##_i64, uint64_t) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
+ TEST2 (NAME##_f32_f32, float, float) \
+ TEST2 (NAME##_f64_f64, double, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vsseg3e8\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 16 } } */
+/* { dg-final { scan-assembler-times {vsseg3e16\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vsseg3e32\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vsseg3e64\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
+ MASKTYPE *__restrict cond, INTYPE bias, intptr_t n) \
+ { \
+ for (intptr_t i = 0; i < n; ++i) \
+ { \
+ INTYPE value = src[i] + bias; \
+ if (cond[i]) \
+ { \
+ dest[i * 4] = value; \
+ dest[i * 4 + 1] = value; \
+ dest[i * 4 + 2] = value; \
+ dest[i * 4 + 3] = value; \
+ } \
+ } \
+ }
+
+#define TEST2(NAME, OUTTYPE, INTYPE) \
+ TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, int8_t) \
+ TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, uint16_t) \
+ TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \
+ TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double)
+
+#define TEST1(NAME, OUTTYPE) \
+ TEST2 (NAME##_i8, OUTTYPE, int8_t) \
+ TEST2 (NAME##_i16, OUTTYPE, uint16_t) \
+ TEST2 (NAME##_i32, OUTTYPE, int32_t) \
+ TEST2 (NAME##_i64, OUTTYPE, uint64_t)
+
+#define TEST(NAME) \
+ TEST1 (NAME##_i8, int8_t) \
+ TEST1 (NAME##_i16, uint16_t) \
+ TEST1 (NAME##_i32, int32_t) \
+ TEST1 (NAME##_i64, uint64_t) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
+ TEST2 (NAME##_f32_f32, float, float) \
+ TEST2 (NAME##_f64_f64, double, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vsseg4e8\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 16 } } */
+/* { dg-final { scan-assembler-times {vsseg4e16\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vsseg4e32\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vsseg4e64\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME##_5 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
+ MASKTYPE *__restrict cond, INTYPE bias, intptr_t n) \
+ { \
+ for (intptr_t i = 0; i < n; ++i) \
+ { \
+ INTYPE value = src[i] + bias; \
+ if (cond[i]) \
+ { \
+ dest[i * 5] = value; \
+ dest[i * 5 + 1] = value; \
+ dest[i * 5 + 2] = value; \
+ dest[i * 5 + 3] = value; \
+ dest[i * 5 + 4] = value; \
+ } \
+ } \
+ }
+
+#define TEST2(NAME, OUTTYPE, INTYPE) \
+ TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, int8_t) \
+ TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, uint16_t) \
+ TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \
+ TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double)
+
+#define TEST1(NAME, OUTTYPE) \
+ TEST2 (NAME##_i8, OUTTYPE, int8_t) \
+ TEST2 (NAME##_i16, OUTTYPE, uint16_t) \
+ TEST2 (NAME##_i32, OUTTYPE, int32_t) \
+ TEST2 (NAME##_i64, OUTTYPE, uint64_t)
+
+#define TEST(NAME) \
+ TEST1 (NAME##_i8, int8_t) \
+ TEST1 (NAME##_i16, uint16_t) \
+ TEST1 (NAME##_i32, int32_t) \
+ TEST1 (NAME##_i64, uint64_t) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
+ TEST2 (NAME##_f32_f32, float, float) \
+ TEST2 (NAME##_f64_f64, double, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vsseg5e8\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 16 } } */
+/* { dg-final { scan-assembler-times {vsseg5e16\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vsseg5e32\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vsseg5e64\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
new file mode 100644
@@ -0,0 +1,52 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME##_6 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
+ MASKTYPE *__restrict cond, INTYPE bias, intptr_t n) \
+ { \
+ for (intptr_t i = 0; i < n; ++i) \
+ { \
+ INTYPE value = src[i] + bias; \
+ if (cond[i]) \
+ { \
+ dest[i * 6] = value; \
+ dest[i * 6 + 1] = value; \
+ dest[i * 6 + 2] = value; \
+ dest[i * 6 + 3] = value; \
+ dest[i * 6 + 4] = value; \
+ dest[i * 6 + 5] = value; \
+ } \
+ } \
+ }
+
+#define TEST2(NAME, OUTTYPE, INTYPE) \
+ TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, int8_t) \
+ TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, uint16_t) \
+ TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \
+ TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double)
+
+#define TEST1(NAME, OUTTYPE) \
+ TEST2 (NAME##_i8, OUTTYPE, int8_t) \
+ TEST2 (NAME##_i16, OUTTYPE, uint16_t) \
+ TEST2 (NAME##_i32, OUTTYPE, int32_t) \
+ TEST2 (NAME##_i64, OUTTYPE, uint64_t)
+
+#define TEST(NAME) \
+ TEST1 (NAME##_i8, int8_t) \
+ TEST1 (NAME##_i16, uint16_t) \
+ TEST1 (NAME##_i32, int32_t) \
+ TEST1 (NAME##_i64, uint64_t) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
+ TEST2 (NAME##_f32_f32, float, float) \
+ TEST2 (NAME##_f64_f64, double, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vsseg6e8\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 16 } } */
+/* { dg-final { scan-assembler-times {vsseg6e16\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vsseg6e32\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vsseg6e64\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
new file mode 100644
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME##_7 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
+ MASKTYPE *__restrict cond, INTYPE bias, intptr_t n) \
+ { \
+ for (intptr_t i = 0; i < n; ++i) \
+ { \
+ INTYPE value = src[i] + bias; \
+ if (cond[i]) \
+ { \
+ dest[i * 7] = value; \
+ dest[i * 7 + 1] = value; \
+ dest[i * 7 + 2] = value; \
+ dest[i * 7 + 3] = value; \
+ dest[i * 7 + 4] = value; \
+ dest[i * 7 + 5] = value; \
+ dest[i * 7 + 6] = value; \
+ } \
+ } \
+ }
+
+#define TEST2(NAME, OUTTYPE, INTYPE) \
+ TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, int8_t) \
+ TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, uint16_t) \
+ TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \
+ TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double)
+
+#define TEST1(NAME, OUTTYPE) \
+ TEST2 (NAME##_i8, OUTTYPE, int8_t) \
+ TEST2 (NAME##_i16, OUTTYPE, uint16_t) \
+ TEST2 (NAME##_i32, OUTTYPE, int32_t) \
+ TEST2 (NAME##_i64, OUTTYPE, uint64_t)
+
+#define TEST(NAME) \
+ TEST1 (NAME##_i8, int8_t) \
+ TEST1 (NAME##_i16, uint16_t) \
+ TEST1 (NAME##_i32, int32_t) \
+ TEST1 (NAME##_i64, uint64_t) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
+ TEST2 (NAME##_f32_f32, float, float) \
+ TEST2 (NAME##_f64_f64, double, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vsseg7e8\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 16 } } */
+/* { dg-final { scan-assembler-times {vsseg7e16\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vsseg7e32\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vsseg7e64\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
new file mode 100644
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME##_8 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
+ MASKTYPE *__restrict cond, INTYPE bias, intptr_t n) \
+ { \
+ for (intptr_t i = 0; i < n; ++i) \
+ { \
+ INTYPE value = src[i] + bias; \
+ if (cond[i]) \
+ { \
+ dest[i * 8] = value; \
+ dest[i * 8 + 1] = value; \
+ dest[i * 8 + 2] = value; \
+ dest[i * 8 + 3] = value; \
+ dest[i * 8 + 4] = value; \
+ dest[i * 8 + 5] = value; \
+ dest[i * 8 + 6] = value; \
+ dest[i * 8 + 7] = value; \
+ } \
+ } \
+ }
+
+#define TEST2(NAME, OUTTYPE, INTYPE) \
+ TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, int8_t) \
+ TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, uint16_t) \
+ TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \
+ TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double)
+
+#define TEST1(NAME, OUTTYPE) \
+ TEST2 (NAME##_i8, OUTTYPE, int8_t) \
+ TEST2 (NAME##_i16, OUTTYPE, uint16_t) \
+ TEST2 (NAME##_i32, OUTTYPE, int32_t) \
+ TEST2 (NAME##_i64, OUTTYPE, uint64_t)
+
+#define TEST(NAME) \
+ TEST1 (NAME##_i8, int8_t) \
+ TEST1 (NAME##_i16, uint16_t) \
+ TEST1 (NAME##_i32, int32_t) \
+ TEST1 (NAME##_i64, uint64_t) \
+ TEST2 (NAME##_f16_f16, _Float16, _Float16) \
+ TEST2 (NAME##_f32_f32, float, float) \
+ TEST2 (NAME##_f64_f64, double, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vsseg8e8\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 16 } } */
+/* { dg-final { scan-assembler-times {vsseg8e16\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vsseg8e32\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
+/* { dg-final { scan-assembler-times {vsseg8e64\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "mask_struct_store-1.c"
+
+#define N 100
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ { \
+ OUTTYPE out[N * 2]; \
+ INTYPE in[N]; \
+ MASKTYPE mask[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ in[i] = i * 7 / 2; \
+ mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 2; ++i) \
+ out[i] = i * 9 / 2; \
+ NAME##_2 (out, in, mask, 17, N); \
+ for (int i = 0; i < N * 2; ++i) \
+ { \
+ OUTTYPE if_true = (INTYPE) (in[i / 2] + 17); \
+ OUTTYPE if_false = i * 9 / 2; \
+ if (out[i] != (mask[i / 2] ? if_true : if_false)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "mask_struct_store-2.c"
+
+#define N 100
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ { \
+ OUTTYPE out[N * 3]; \
+ INTYPE in[N]; \
+ MASKTYPE mask[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ in[i] = i * 7 / 2; \
+ mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 3; ++i) \
+ out[i] = i * 9 / 2; \
+ NAME##_3 (out, in, mask, 11, N); \
+ for (int i = 0; i < N * 3; ++i) \
+ { \
+ OUTTYPE if_true = (INTYPE) (in[i / 3] + 11); \
+ OUTTYPE if_false = i * 9 / 2; \
+ if (out[i] != (mask[i / 3] ? if_true : if_false)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "mask_struct_store-3.c"
+
+#define N 100
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ { \
+ OUTTYPE out[N * 4]; \
+ INTYPE in[N]; \
+ MASKTYPE mask[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ in[i] = i * 7 / 2; \
+ mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 4; ++i) \
+ out[i] = i * 9 / 2; \
+ NAME##_4 (out, in, mask, 42, N); \
+ for (int i = 0; i < N * 4; ++i) \
+ { \
+ OUTTYPE if_true = (INTYPE) (in[i / 4] + 42); \
+ OUTTYPE if_false = i * 9 / 2; \
+ if (out[i] != (mask[i / 4] ? if_true : if_false)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "mask_struct_store-4.c"
+
+#define N 100
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ { \
+ OUTTYPE out[N * 5]; \
+ INTYPE in[N]; \
+ MASKTYPE mask[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ in[i] = i * 7 / 2; \
+ mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 5; ++i) \
+ out[i] = i * 9 / 2; \
+ NAME##_5 (out, in, mask, 42, N); \
+ for (int i = 0; i < N * 5; ++i) \
+ { \
+ OUTTYPE if_true = (INTYPE) (in[i / 5] + 42); \
+ OUTTYPE if_false = i * 9 / 2; \
+ if (out[i] != (mask[i / 5] ? if_true : if_false)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "mask_struct_store-5.c"
+
+#define N 100
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ { \
+ OUTTYPE out[N * 6]; \
+ INTYPE in[N]; \
+ MASKTYPE mask[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ in[i] = i * 7 / 2; \
+ mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 6; ++i) \
+ out[i] = i * 9 / 2; \
+ NAME##_6 (out, in, mask, 42, N); \
+ for (int i = 0; i < N * 6; ++i) \
+ { \
+ OUTTYPE if_true = (INTYPE) (in[i / 6] + 42); \
+ OUTTYPE if_false = i * 9 / 2; \
+ if (out[i] != (mask[i / 6] ? if_true : if_false)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "mask_struct_store-6.c"
+
+#define N 100
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ { \
+ OUTTYPE out[N * 7]; \
+ INTYPE in[N]; \
+ MASKTYPE mask[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ in[i] = i * 7 / 2; \
+ mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 7; ++i) \
+ out[i] = i * 9 / 2; \
+ NAME##_7 (out, in, mask, 42, N); \
+ for (int i = 0; i < N * 7; ++i) \
+ { \
+ OUTTYPE if_true = (INTYPE) (in[i / 7] + 42); \
+ OUTTYPE if_false = i * 9 / 2; \
+ if (out[i] != (mask[i / 7] ? if_true : if_false)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "mask_struct_store-7.c"
+
+#define N 100
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
+ { \
+ OUTTYPE out[N * 8]; \
+ INTYPE in[N]; \
+ MASKTYPE mask[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ in[i] = i * 8 / 2; \
+ mask[i] = i % 5 <= i % 3; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 8; ++i) \
+ out[i] = i * 9 / 2; \
+ NAME##_8 (out, in, mask, 42, N); \
+ for (int i = 0; i < N * 8; ++i) \
+ { \
+ OUTTYPE if_true = (INTYPE) (in[i / 8] + 42); \
+ OUTTYPE if_false = i * 9 / 2; \
+ if (out[i] != (mask[i / 8] ? if_true : if_false)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,232 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -funroll-all-loops -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include <stdint-gcc.h>
+#ifndef TYPE
+#define TYPE uint8_t
+#endif
+
+#ifndef NAME
+#define NAME(X) X
+#endif
+
+#ifndef N
+#define N 1024
+#endif
+
+void __attribute__ ((noinline, noclone))
+NAME(f2) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = c[i * 2];
+ b[i] = c[i * 2 + 1];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+NAME(f3) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = d[i * 3];
+ b[i] = d[i * 3 + 1];
+ c[i] = d[i * 3 + 2];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+NAME(f4) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = e[i * 4];
+ b[i] = e[i * 4 + 1];
+ c[i] = e[i * 4 + 2];
+ d[i] = e[i * 4 + 3];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+NAME(f5) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = f[i * 5];
+ b[i] = f[i * 5 + 1];
+ c[i] = f[i * 5 + 2];
+ d[i] = f[i * 5 + 3];
+ e[i] = f[i * 5 + 4];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+NAME(f6) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f,
+ TYPE *__restrict g)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = g[i * 6];
+ b[i] = g[i * 6 + 1];
+ c[i] = g[i * 6 + 2];
+ d[i] = g[i * 6 + 3];
+ e[i] = g[i * 6 + 4];
+ f[i] = g[i * 6 + 5];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+NAME(f7) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f,
+ TYPE *__restrict g, TYPE *__restrict h)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = h[i * 7];
+ b[i] = h[i * 7 + 1];
+ c[i] = h[i * 7 + 2];
+ d[i] = h[i * 7 + 3];
+ e[i] = h[i * 7 + 4];
+ f[i] = h[i * 7 + 5];
+ g[i] = h[i * 7 + 6];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+NAME(f8) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f,
+ TYPE *__restrict g, TYPE *__restrict h, TYPE *__restrict j)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = j[i * 8];
+ b[i] = j[i * 8 + 1];
+ c[i] = j[i * 8 + 2];
+ d[i] = j[i * 8 + 3];
+ e[i] = j[i * 8 + 4];
+ f[i] = j[i * 8 + 5];
+ g[i] = j[i * 8 + 6];
+ h[i] = j[i * 8 + 7];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+NAME(g2) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ c[i * 2] = a[i];
+ c[i * 2 + 1] = b[i];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+NAME(g3) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ d[i * 3] = a[i];
+ d[i * 3 + 1] = b[i];
+ d[i * 3 + 2] = c[i];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+NAME(g4) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ e[i * 4] = a[i];
+ e[i * 4 + 1] = b[i];
+ e[i * 4 + 2] = c[i];
+ e[i * 4 + 3] = d[i];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+NAME(g5) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ f[i * 5] = a[i];
+ f[i * 5 + 1] = b[i];
+ f[i * 5 + 2] = c[i];
+ f[i * 5 + 3] = d[i];
+ f[i * 5 + 4] = e[i];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+NAME(g6) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f,
+ TYPE *__restrict g)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ g[i * 6] = a[i];
+ g[i * 6 + 1] = b[i];
+ g[i * 6 + 2] = c[i];
+ g[i * 6 + 3] = d[i];
+ g[i * 6 + 4] = e[i];
+ g[i * 6 + 5] = f[i];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+NAME(g7) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f,
+ TYPE *__restrict g, TYPE *__restrict h)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ h[i * 7] = a[i];
+ h[i * 7 + 1] = b[i];
+ h[i * 7 + 2] = c[i];
+ h[i * 7 + 3] = d[i];
+ h[i * 7 + 4] = e[i];
+ h[i * 7 + 5] = f[i];
+ h[i * 7 + 6] = g[i];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+NAME(g8) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f,
+ TYPE *__restrict g, TYPE *__restrict h, TYPE *__restrict j)
+{
+ for (int i = 0; i < N; ++i)
+ {
+ j[i * 8] = a[i];
+ j[i * 8 + 1] = b[i];
+ j[i * 8 + 2] = c[i];
+ j[i * 8 + 3] = d[i];
+ j[i * 8 + 4] = e[i];
+ j[i * 8 + 5] = f[i];
+ j[i * 8 + 6] = g[i];
+ j[i * 8 + 7] = h[i];
+ }
+}
+
+/* { dg-final { scan-assembler-times {vlseg2e8\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg3e8\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg4e8\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg5e8\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg6e8\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg7e8\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vlseg8e8\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg2e8\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vsseg3e8\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vsseg4e8\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg5e8\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg6e8\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg7e8\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg8e8\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*16,\s*e8,\s*m1,\s*t[au],\s*m[au]} 14 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#define TYPE _Float16
+#define ITYPE int16_t
+#include "struct_vect-6.c"
+
+/* { dg-final { scan-assembler-times {vlseg2e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg3e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg4e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg5e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg6e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg7e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg8e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg2e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg3e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg4e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg5e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg6e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg7e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg8e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 14 } } */
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#define TYPE float
+#define ITYPE int32_t
+#include "struct_vect-6.c"
+
+/* { dg-final { scan-assembler-times {vlseg2e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg3e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg4e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg5e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg6e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg7e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg8e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg2e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg3e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg4e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg5e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg6e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg7e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg8e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 14 } } */
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#define TYPE double
+#define ITYPE int64_t
+#include "struct_vect-6.c"
+
+/* { dg-final { scan-assembler-times {vlseg2e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg3e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg4e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg5e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg6e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg7e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg8e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg2e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg3e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg4e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg5e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg6e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg7e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg8e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 14 } } */
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define N 2000
+
+#define TEST_LOOP(NAME, TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME (TYPE *restrict dest, TYPE *restrict src) \
+ { \
+ for (int i = 0; i < N; ++i) \
+ dest[i] += src[i * 3]; \
+ }
+
+#define TEST(NAME) \
+ TEST_LOOP (NAME##_i8, int8_t) \
+ TEST_LOOP (NAME##_i16, uint16_t) \
+ TEST_LOOP (NAME##_f32, float) \
+ TEST_LOOP (NAME##_f64, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vlseg3e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg3e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg3e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg3e64\.v} 1 } } */
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME (TYPE *restrict dest, TYPE *restrict src, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ dest[i] += src[i * 3]; \
+ }
+
+#define TEST(NAME) \
+ TEST_LOOP (NAME##_i8, int8_t) \
+ TEST_LOOP (NAME##_i16, uint16_t) \
+ TEST_LOOP (NAME##_f32, float) \
+ TEST_LOOP (NAME##_f64, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vlseg3e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg3e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg3e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg3e64\.v} 1 } } */
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define N 2000
+
+#define TEST_LOOP(NAME, TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME (TYPE *restrict dest, TYPE *restrict src) \
+ { \
+ for (int i = 0; i < N; ++i) \
+ dest[i] += src[i * 2]; \
+ }
+
+#define TEST(NAME) \
+ TEST_LOOP (NAME##_i8, int8_t) \
+ TEST_LOOP (NAME##_i16, uint16_t) \
+ TEST_LOOP (NAME##_f32, float) \
+ TEST_LOOP (NAME##_f64, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vlseg2e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg2e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg2e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg2e64\.v} 1 } } */
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME (TYPE *restrict dest, TYPE *restrict src, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ dest[i] += src[i * 2]; \
+ }
+
+#define TEST(NAME) \
+ TEST_LOOP (NAME##_i8, int8_t) \
+ TEST_LOOP (NAME##_i16, uint16_t) \
+ TEST_LOOP (NAME##_f32, float) \
+ TEST_LOOP (NAME##_f64, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vlseg2e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg2e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg2e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg2e64\.v} 1 } } */
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define N 2000
+
+#define TEST_LOOP(NAME, TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME (TYPE *restrict dest, TYPE *restrict src) \
+ { \
+ for (int i = 0; i < N; ++i) \
+ dest[i] += src[i * 4]; \
+ }
+
+#define TEST(NAME) \
+ TEST_LOOP (NAME##_i8, int8_t) \
+ TEST_LOOP (NAME##_i16, uint16_t) \
+ TEST_LOOP (NAME##_f32, float) \
+ TEST_LOOP (NAME##_f64, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vlseg4e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg4e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg4e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg4e64\.v} 1 } } */
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_LOOP(NAME, TYPE) \
+ void __attribute__ ((noinline, noclone)) \
+ NAME (TYPE *restrict dest, TYPE *restrict src, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ dest[i] += src[i * 4]; \
+ }
+
+#define TEST(NAME) \
+ TEST_LOOP (NAME##_i8, int8_t) \
+ TEST_LOOP (NAME##_i16, uint16_t) \
+ TEST_LOOP (NAME##_f32, float) \
+ TEST_LOOP (NAME##_f64, double)
+
+TEST (test)
+
+/* { dg-final { scan-assembler-times {vlseg4e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg4e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg4e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg4e64\.v} 1 } } */
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -funroll-all-loops -fno-schedule-insns -fno-schedule-insns2" } */
+
+#define TYPE uint16_t
+#include "struct_vect-1.c"
+
+/* { dg-final { scan-assembler-times {vlseg2e16\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg3e16\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg4e16\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg5e16\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg6e16\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vlseg7e16\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vlseg8e16\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg2e16\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vsseg3e16\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vsseg4e16\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg5e16\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg6e16\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg7e16\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vsseg8e16\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e16,\s*m1,\s*t[au],\s*m[au]} 14 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -funroll-all-loops -fno-schedule-insns -fno-schedule-insns2" } */
+
+#define TYPE uint32_t
+#include "struct_vect-1.c"
+
+/* { dg-final { scan-assembler-times {vlseg2e32\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg3e32\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg4e32\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg5e32\.v} 6 } } */
+/* { dg-final { scan-assembler-times {vlseg6e32\.v} 6 } } */
+/* { dg-final { scan-assembler-times {vlseg7e32\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vlseg8e32\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg2e32\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vsseg3e32\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vsseg4e32\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg5e32\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg6e32\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg7e32\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg8e32\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 14 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -funroll-all-loops -fno-schedule-insns -fno-schedule-insns2" } */
+
+#define TYPE uint64_t
+#include "struct_vect-1.c"
+
+/* { dg-final { scan-assembler-times {vlseg2e64\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg3e64\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg4e64\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg5e64\.v} 7 } } */
+/* { dg-final { scan-assembler-times {vlseg6e64\.v} 7 } } */
+/* { dg-final { scan-assembler-times {vlseg7e64\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vlseg8e64\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg2e64\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vsseg3e64\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vsseg4e64\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg5e64\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg6e64\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg7e64\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg8e64\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*2,\s*e64,\s*m1,\s*t[au],\s*m[au]} 14 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -funroll-all-loops -fno-schedule-insns -fno-schedule-insns2" } */
+
+#define TYPE float
+#include "struct_vect-1.c"
+
+/* { dg-final { scan-assembler-times {vlseg2e32\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg3e32\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg4e32\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vlseg5e32\.v} 6 } } */
+/* { dg-final { scan-assembler-times {vlseg6e32\.v} 6 } } */
+/* { dg-final { scan-assembler-times {vlseg7e32\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vlseg8e32\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg2e32\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vsseg3e32\.v} 8 } } */
+/* { dg-final { scan-assembler-times {vsseg4e32\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg5e32\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg6e32\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg7e32\.v} 4 } } */
+/* { dg-final { scan-assembler-times {vsseg8e32\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 14 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
new file mode 100644
@@ -0,0 +1,225 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#ifndef TYPE
+#define TYPE uint8_t
+#define ITYPE int8_t
+#endif
+
+void __attribute__ ((noinline, noclone))
+f2 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, ITYPE n)
+{
+ for (ITYPE i = 0; i < n; ++i)
+ {
+ a[i] = c[i * 2];
+ b[i] = c[i * 2 + 1];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+f3 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, ITYPE n)
+{
+ for (ITYPE i = 0; i < n; ++i)
+ {
+ a[i] = d[i * 3];
+ b[i] = d[i * 3 + 1];
+ c[i] = d[i * 3 + 2];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+f4 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, ITYPE n)
+{
+ for (ITYPE i = 0; i < n; ++i)
+ {
+ a[i] = e[i * 4];
+ b[i] = e[i * 4 + 1];
+ c[i] = e[i * 4 + 2];
+ d[i] = e[i * 4 + 3];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+f5 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f, ITYPE n)
+{
+ for (ITYPE i = 0; i < n; ++i)
+ {
+ a[i] = f[i * 5];
+ b[i] = f[i * 5 + 1];
+ c[i] = f[i * 5 + 2];
+ d[i] = f[i * 5 + 3];
+ e[i] = f[i * 5 + 4];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+f6 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f,
+ TYPE *__restrict g, ITYPE n)
+{
+ for (ITYPE i = 0; i < n; ++i)
+ {
+ a[i] = g[i * 6];
+ b[i] = g[i * 6 + 1];
+ c[i] = g[i * 6 + 2];
+ d[i] = g[i * 6 + 3];
+ e[i] = g[i * 6 + 4];
+ f[i] = g[i * 6 + 5];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+f7 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f,
+ TYPE *__restrict g, TYPE *__restrict h, ITYPE n)
+{
+ for (ITYPE i = 0; i < n; ++i)
+ {
+ a[i] = h[i * 7];
+ b[i] = h[i * 7 + 1];
+ c[i] = h[i * 7 + 2];
+ d[i] = h[i * 7 + 3];
+ e[i] = h[i * 7 + 4];
+ f[i] = h[i * 7 + 5];
+ g[i] = h[i * 7 + 6];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+f8 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f,
+ TYPE *__restrict g, TYPE *__restrict h, TYPE *__restrict j, ITYPE n)
+{
+ for (ITYPE i = 0; i < n; ++i)
+ {
+ a[i] = j[i * 8];
+ b[i] = j[i * 8 + 1];
+ c[i] = j[i * 8 + 2];
+ d[i] = j[i * 8 + 3];
+ e[i] = j[i * 8 + 4];
+ f[i] = j[i * 8 + 5];
+ g[i] = j[i * 8 + 6];
+ h[i] = j[i * 8 + 7];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+g2 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, ITYPE n)
+{
+ for (ITYPE i = 0; i < n; ++i)
+ {
+ c[i * 2] = a[i];
+ c[i * 2 + 1] = b[i];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+g3 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, ITYPE n)
+{
+ for (ITYPE i = 0; i < n; ++i)
+ {
+ d[i * 3] = a[i];
+ d[i * 3 + 1] = b[i];
+ d[i * 3 + 2] = c[i];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+g4 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, ITYPE n)
+{
+ for (ITYPE i = 0; i < n; ++i)
+ {
+ e[i * 4] = a[i];
+ e[i * 4 + 1] = b[i];
+ e[i * 4 + 2] = c[i];
+ e[i * 4 + 3] = d[i];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+g5 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f, ITYPE n)
+{
+ for (ITYPE i = 0; i < n; ++i)
+ {
+ f[i * 5] = a[i];
+ f[i * 5 + 1] = b[i];
+ f[i * 5 + 2] = c[i];
+ f[i * 5 + 3] = d[i];
+ f[i * 5 + 4] = e[i];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+g6 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f,
+ TYPE *__restrict g, ITYPE n)
+{
+ for (ITYPE i = 0; i < n; ++i)
+ {
+ g[i * 6] = a[i];
+ g[i * 6 + 1] = b[i];
+ g[i * 6 + 2] = c[i];
+ g[i * 6 + 3] = d[i];
+ g[i * 6 + 4] = e[i];
+ g[i * 6 + 5] = f[i];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+g7 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f,
+ TYPE *__restrict g, TYPE *__restrict h, ITYPE n)
+{
+ for (ITYPE i = 0; i < n; ++i)
+ {
+ h[i * 7] = a[i];
+ h[i * 7 + 1] = b[i];
+ h[i * 7 + 2] = c[i];
+ h[i * 7 + 3] = d[i];
+ h[i * 7 + 4] = e[i];
+ h[i * 7 + 5] = f[i];
+ h[i * 7 + 6] = g[i];
+ }
+}
+
+void __attribute__ ((noinline, noclone))
+g8 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
+ TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f,
+ TYPE *__restrict g, TYPE *__restrict h, TYPE *__restrict j, ITYPE n)
+{
+ for (ITYPE i = 0; i < n; ++i)
+ {
+ j[i * 8] = a[i];
+ j[i * 8 + 1] = b[i];
+ j[i * 8 + 2] = c[i];
+ j[i * 8 + 3] = d[i];
+ j[i * 8 + 4] = e[i];
+ j[i * 8 + 5] = f[i];
+ j[i * 8 + 6] = g[i];
+ j[i * 8 + 7] = h[i];
+ }
+}
+
+/* { dg-final { scan-assembler-times {vlseg2e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg3e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg4e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg5e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg6e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg7e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg8e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg2e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg3e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg4e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg5e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg6e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg7e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg8e8\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*m1,\s*t[au],\s*m[au]} 14 } } */
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#define TYPE uint16_t
+#define ITYPE int16_t
+#include "struct_vect-6.c"
+
+/* { dg-final { scan-assembler-times {vlseg2e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg3e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg4e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg5e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg6e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg7e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg8e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg2e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg3e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg4e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg5e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg6e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg7e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg8e16\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 14 } } */
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#define TYPE uint32_t
+#define ITYPE int32_t
+#include "struct_vect-6.c"
+
+/* { dg-final { scan-assembler-times {vlseg2e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg3e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg4e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg5e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg6e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg7e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg8e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg2e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg3e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg4e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg5e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg6e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg7e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg8e32\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 14 } } */
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#define TYPE uint64_t
+#define ITYPE int64_t
+#include "struct_vect-6.c"
+
+/* { dg-final { scan-assembler-times {vlseg2e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg3e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg4e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg5e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg6e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg7e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vlseg8e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg2e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg3e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg4e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg5e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg6e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg7e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsseg8e64\.v} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 14 } } */
new file mode 100644
@@ -0,0 +1,139 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 --param=riscv-autovec-preference=fixed-vlmax -funroll-all-loops -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "struct_vect-1.c"
+
+TYPE a[N], b[N], c[N], d[N], a2[N], b2[N], c2[N], d2[N], e[N * 8];
+
+void __attribute__ ((noinline, noclone))
+init_array (TYPE *array, int n, TYPE base, TYPE step)
+{
+ for (int i = 0; i < n; ++i)
+ array[i] = base + step * i;
+}
+
+void __attribute__ ((noinline, noclone))
+check_array (TYPE *array, int n, TYPE base, TYPE step)
+{
+ for (int i = 0; i < n; ++i)
+ if (array[i] != (TYPE) (base + step * i))
+ __builtin_abort ();
+}
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ init_array (e, 2 * N, 11, 5);
+ f2 (a, b, e);
+ check_array (a, N, 11, 10);
+ check_array (b, N, 16, 10);
+
+ init_array (e, 3 * N, 7, 6);
+ f3 (a, b, c, e);
+ check_array (a, N, 7, 18);
+ check_array (b, N, 13, 18);
+ check_array (c, N, 19, 18);
+
+ init_array (e, 4 * N, 4, 11);
+ f4 (a, b, c, d, e);
+ check_array (a, N, 4, 44);
+ check_array (b, N, 15, 44);
+ check_array (c, N, 26, 44);
+ check_array (d, N, 37, 44);
+
+ init_array (e, 5 * N, 3, 9);
+ f5 (a, b, c, d, a2, e);
+ check_array (a, N, 3, 45);
+ check_array (b, N, 12, 45);
+ check_array (c, N, 21, 45);
+ check_array (d, N, 30, 45);
+ check_array (a2, N, 39, 45);
+
+ init_array (e, 6 * N, 5, 5);
+ f6 (a, b, c, d, a2, b2, e);
+ check_array (a, N, 5, 30);
+ check_array (b, N, 10, 30);
+ check_array (c, N, 15, 30);
+ check_array (d, N, 20, 30);
+ check_array (a2, N, 25, 30);
+ check_array (b2, N, 30, 30);
+
+ init_array (e, 7 * N, 7, 3);
+ f7 (a, b, c, d, a2, b2, c2, e);
+ check_array (a, N, 7, 21);
+ check_array (b, N, 10, 21);
+ check_array (c, N, 13, 21);
+ check_array (d, N, 16, 21);
+ check_array (a2, N, 19, 21);
+ check_array (b2, N, 22, 21);
+ check_array (c2, N, 25, 21);
+
+ init_array (e, 8 * N, 5, 8);
+ f8 (a, b, c, d, a2, b2, c2, d2, e);
+ check_array (a, N, 5, 64);
+ check_array (b, N, 13, 64);
+ check_array (c, N, 21, 64);
+ check_array (d, N, 29, 64);
+ check_array (a2, N, 37, 64);
+ check_array (b2, N, 45, 64);
+ check_array (c2, N, 53, 64);
+ check_array (d2, N, 61, 64);
+
+ init_array (a, N, 2, 8);
+ init_array (b, N, 6, 8);
+ g2 (a, b, e);
+ check_array (e, 2 * N, 2, 4);
+
+ init_array (a, N, 4, 15);
+ init_array (b, N, 9, 15);
+ init_array (c, N, 14, 15);
+ g3 (a, b, c, e);
+ check_array (e, 3 * N, 4, 5);
+
+ init_array (a, N, 14, 36);
+ init_array (b, N, 23, 36);
+ init_array (c, N, 32, 36);
+ init_array (d, N, 41, 36);
+ g4 (a, b, c, d, e);
+ check_array (e, 4 * N, 14, 9);
+
+ init_array (a, N, 3, 45);
+ init_array (b, N, 12, 45);
+ init_array (c, N, 21, 45);
+ init_array (d, N, 30, 45);
+ init_array (a2, N, 39, 45);
+ g5 (a, b, c, d, a2, e);
+ check_array (e, 5 * N, 3, 9);
+
+ init_array (a, N, 5, 30);
+ init_array (b, N, 10, 30);
+ init_array (c, N, 15, 30);
+ init_array (d, N, 20, 30);
+ init_array (a2, N, 25, 30);
+ init_array (b2, N, 30, 30);
+ g6 (a, b, c, d, a2, b2, e);
+ check_array (e, 6 * N, 5, 5);
+
+ init_array (a, N, 7, 21);
+ init_array (b, N, 10, 21);
+ init_array (c, N, 13, 21);
+ init_array (d, N, 16, 21);
+ init_array (a2, N, 19, 21);
+ init_array (b2, N, 22, 21);
+ init_array (c2, N, 25, 21);
+ g7 (a, b, c, d, a2, b2, c2, e);
+ check_array (e, 7 * N, 7, 3);
+
+ init_array (a, N, 5, 64);
+ init_array (b, N, 13, 64);
+ init_array (c, N, 21, 64);
+ init_array (d, N, 29, 64);
+ init_array (a2, N, 37, 64);
+ init_array (b2, N, 45, 64);
+ init_array (c2, N, 53, 64);
+ init_array (d2, N, 61, 64);
+ g8 (a, b, c, d, a2, b2, c2, d2, e);
+ check_array (e, 8 * N, 5, 8);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,6 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-std=c99 --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#define TYPE _Float16
+#define ITYPE int16_t
+#include "struct_vect_run-6.c"
new file mode 100644
@@ -0,0 +1,6 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#define TYPE float
+#define ITYPE int32_t
+#include "struct_vect_run-6.c"
new file mode 100644
@@ -0,0 +1,6 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#define TYPE double
+#define ITYPE int64_t
+#include "struct_vect_run-6.c"
new file mode 100644
@@ -0,0 +1,36 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "struct_vect-13.c"
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, TYPE) \
+ { \
+ TYPE out[N]; \
+ TYPE in[N * 3]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 3; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ NAME (out, in); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE expected = i * 7 / 2 + in[i * 3]; \
+ if (out[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "struct_vect-14.c"
+
+#define N 1000
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, TYPE) \
+ { \
+ TYPE out[N]; \
+ TYPE in[N * 3]; \
+ int counts[] = { 0, 1, N - 1 }; \
+ for (int j = 0; j < 3; ++j) \
+ { \
+ int count = counts[j]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 3; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ NAME (out, in, count); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE expected = i * 7 / 2; \
+ if (i < count) \
+ expected += in[i * 3]; \
+ if (out[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,36 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "struct_vect-15.c"
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, TYPE) \
+ { \
+ TYPE out[N]; \
+ TYPE in[N * 2]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 2; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ NAME (out, in); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE expected = i * 7 / 2 + in[i * 2]; \
+ if (out[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "struct_vect-16.c"
+
+#define N 1000
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, TYPE) \
+ { \
+ TYPE out[N]; \
+ TYPE in[N * 2]; \
+ int counts[] = { 0, 1, N - 1 }; \
+ for (int j = 0; j < 3; ++j) \
+ { \
+ int count = counts[j]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 2; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ NAME (out, in, count); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE expected = i * 7 / 2; \
+ if (i < count) \
+ expected += in[i * 2]; \
+ if (out[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,36 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "struct_vect-17.c"
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, TYPE) \
+ { \
+ TYPE out[N]; \
+ TYPE in[N * 4]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 4; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ NAME (out, in); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE expected = i * 7 / 2 + in[i * 4]; \
+ if (out[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "struct_vect-18.c"
+
+#define N 1000
+
+#undef TEST_LOOP
+#define TEST_LOOP(NAME, TYPE) \
+ { \
+ TYPE out[N]; \
+ TYPE in[N * 4]; \
+ int counts[] = { 0, 1, N - 1 }; \
+ for (int j = 0; j < 3; ++j) \
+ { \
+ int count = counts[j]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ out[i] = i * 7 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ for (int i = 0; i < N * 4; ++i) \
+ { \
+ in[i] = i * 9 / 2; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ NAME (out, in, count); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE expected = i * 7 / 2; \
+ if (i < count) \
+ expected += in[i * 4]; \
+ if (out[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST (test);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,5 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 --param=riscv-autovec-preference=fixed-vlmax -funroll-all-loops -fno-schedule-insns -fno-schedule-insns2" } */
+
+#define TYPE uint16_t
+#include "struct_vect_run-1.c"
new file mode 100644
@@ -0,0 +1,5 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 --param=riscv-autovec-preference=fixed-vlmax -funroll-all-loops -fno-schedule-insns -fno-schedule-insns2" } */
+
+#define TYPE uint32_t
+#include "struct_vect_run-1.c"
new file mode 100644
@@ -0,0 +1,5 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 --param=riscv-autovec-preference=fixed-vlmax -funroll-all-loops -fno-schedule-insns -fno-schedule-insns2" } */
+
+#define TYPE uint64_t
+#include "struct_vect_run-1.c"
new file mode 100644
@@ -0,0 +1,5 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 --param=riscv-autovec-preference=fixed-vlmax -funroll-all-loops -fno-schedule-insns -fno-schedule-insns2" } */
+
+#define TYPE float
+#include "struct_vect_run-1.c"
new file mode 100644
@@ -0,0 +1,141 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "struct_vect-6.c"
+
+#define N 93
+
+TYPE a[N], b[N], c[N], d[N], a2[N], b2[N], c2[N], d2[N], e[N * 8];
+
+void __attribute__ ((noinline, noclone))
+init_array (TYPE *array, int n, TYPE base, TYPE step)
+{
+ for (int i = 0; i < n; ++i)
+ array[i] = base + step * i;
+}
+
+void __attribute__ ((noinline, noclone))
+check_array (TYPE *array, int n, TYPE base, TYPE step)
+{
+ for (int i = 0; i < n; ++i)
+ if (array[i] != (TYPE) (base + step * i))
+ __builtin_abort ();
+}
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ init_array (e, 2 * N, 11, 5);
+ f2 (a, b, e, N);
+ check_array (a, N, 11, 10);
+ check_array (b, N, 16, 10);
+
+ init_array (e, 3 * N, 7, 6);
+ f3 (a, b, c, e, N);
+ check_array (a, N, 7, 18);
+ check_array (b, N, 13, 18);
+ check_array (c, N, 19, 18);
+
+ init_array (e, 4 * N, 4, 11);
+ f4 (a, b, c, d, e, N);
+ check_array (a, N, 4, 44);
+ check_array (b, N, 15, 44);
+ check_array (c, N, 26, 44);
+ check_array (d, N, 37, 44);
+
+ init_array (e, 5 * N, 3, 9);
+ f5 (a, b, c, d, a2, e, N);
+ check_array (a, N, 3, 45);
+ check_array (b, N, 12, 45);
+ check_array (c, N, 21, 45);
+ check_array (d, N, 30, 45);
+ check_array (a2, N, 39, 45);
+
+ init_array (e, 6 * N, 5, 5);
+ f6 (a, b, c, d, a2, b2, e, N);
+ check_array (a, N, 5, 30);
+ check_array (b, N, 10, 30);
+ check_array (c, N, 15, 30);
+ check_array (d, N, 20, 30);
+ check_array (a2, N, 25, 30);
+ check_array (b2, N, 30, 30);
+
+ init_array (e, 7 * N, 7, 3);
+ f7 (a, b, c, d, a2, b2, c2, e, N);
+ check_array (a, N, 7, 21);
+ check_array (b, N, 10, 21);
+ check_array (c, N, 13, 21);
+ check_array (d, N, 16, 21);
+ check_array (a2, N, 19, 21);
+ check_array (b2, N, 22, 21);
+ check_array (c2, N, 25, 21);
+
+ init_array (e, 8 * N, 5, 8);
+ f8 (a, b, c, d, a2, b2, c2, d2, e, N);
+ check_array (a, N, 5, 64);
+ check_array (b, N, 13, 64);
+ check_array (c, N, 21, 64);
+ check_array (d, N, 29, 64);
+ check_array (a2, N, 37, 64);
+ check_array (b2, N, 45, 64);
+ check_array (c2, N, 53, 64);
+ check_array (d2, N, 61, 64);
+
+ init_array (a, N, 2, 8);
+ init_array (b, N, 6, 8);
+ g2 (a, b, e, N);
+ check_array (e, 2 * N, 2, 4);
+
+ init_array (a, N, 4, 15);
+ init_array (b, N, 9, 15);
+ init_array (c, N, 14, 15);
+ g3 (a, b, c, e, N);
+ check_array (e, 3 * N, 4, 5);
+
+ init_array (a, N, 14, 36);
+ init_array (b, N, 23, 36);
+ init_array (c, N, 32, 36);
+ init_array (d, N, 41, 36);
+ g4 (a, b, c, d, e, N);
+ check_array (e, 4 * N, 14, 9);
+
+ init_array (a, N, 3, 45);
+ init_array (b, N, 12, 45);
+ init_array (c, N, 21, 45);
+ init_array (d, N, 30, 45);
+ init_array (a2, N, 39, 45);
+ g5 (a, b, c, d, a2, e, N);
+ check_array (e, 5 * N, 3, 9);
+
+ init_array (a, N, 5, 30);
+ init_array (b, N, 10, 30);
+ init_array (c, N, 15, 30);
+ init_array (d, N, 20, 30);
+ init_array (a2, N, 25, 30);
+ init_array (b2, N, 30, 30);
+ g6 (a, b, c, d, a2, b2, e, N);
+ check_array (e, 6 * N, 5, 5);
+
+ init_array (a, N, 7, 21);
+ init_array (b, N, 10, 21);
+ init_array (c, N, 13, 21);
+ init_array (d, N, 16, 21);
+ init_array (a2, N, 19, 21);
+ init_array (b2, N, 22, 21);
+ init_array (c2, N, 25, 21);
+ g7 (a, b, c, d, a2, b2, c2, e, N);
+ check_array (e, 7 * N, 7, 3);
+
+ init_array (a, N, 5, 64);
+ init_array (b, N, 13, 64);
+ init_array (c, N, 21, 64);
+ init_array (d, N, 29, 64);
+ init_array (a2, N, 37, 64);
+ init_array (b2, N, 45, 64);
+ init_array (c2, N, 53, 64);
+ init_array (d2, N, 61, 64);
+ g8 (a, b, c, d, a2, b2, c2, d2, e, N);
+ check_array (e, 8 * N, 5, 8);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,6 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#define TYPE uint16_t
+#define ITYPE int16_t
+#include "struct_vect_run-6.c"
new file mode 100644
@@ -0,0 +1,6 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#define TYPE uint32_t
+#define ITYPE int32_t
+#include "struct_vect_run-6.c"
new file mode 100644
@@ -0,0 +1,6 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#define TYPE uint64_t
+#define ITYPE int64_t
+#include "struct_vect_run-6.c"
@@ -50,6 +50,8 @@ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/*.\[cS\]]] \
"-O3 -ftree-vectorize" $CFLAGS
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/vls/*.\[cS\]]] \
"-O3 -ftree-vectorize --param riscv-autovec-preference=scalable" $CFLAGS
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/struct/*.\[cS\]]] \
+ "" "-O3 -ftree-vectorize"
set AUTOVEC_TEST_OPTS [list \
{-ftree-vectorize -O3 --param riscv-autovec-lmul=m1} \