O
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> index c0293a306f9..e8a728ae226 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -219,6 +219,7 @@ rtx gen_avl_for_scalar_move (rtx);
> void expand_tuple_move (machine_mode, rtx *);
> machine_mode preferred_simd_mode (scalar_mode);
> opt_machine_mode get_mask_mode (machine_mode);
> +void expand_vec_series (rtx, rtx, rtx);
> }
>
> /* We classify builtin types into two classes:
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 7ca49ca67c1..0c3b1b4c40b 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -248,6 +248,111 @@ emit_nonvlmax_op (unsigned icode, rtx dest, rtx src, rtx len,
> emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode, false);
> }
>
> +/* Emit binary operations. */
> +
> +static void
> +emit_binop (unsigned icode, rtx *ops, machine_mode mask_mode,
> + machine_mode scalar_mode)
> +{
> + insn_expander<9> e;
> + machine_mode mode = GET_MODE (ops[0]);
> + e.add_output_operand (ops[0], mode);
> + e.add_all_one_mask_operand (mask_mode);
> + e.add_vundef_operand (mode);
> + if (VECTOR_MODE_P (GET_MODE (ops[1])))
> + e.add_input_operand (ops[1], GET_MODE (ops[1]));
> + else
> + e.add_input_operand (ops[1], scalar_mode);
> + if (VECTOR_MODE_P (GET_MODE (ops[2])))
> + e.add_input_operand (ops[2], GET_MODE (ops[2]));
> + else
> + e.add_input_operand (ops[2], scalar_mode);
> + rtx vlmax = gen_reg_rtx (Pmode);
> + emit_vlmax_vsetvl (mode, vlmax);
> + e.add_input_operand (vlmax, Pmode);
> + e.add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy ());
> + e.add_avl_type_operand (avl_type::VLMAX);
> + e.expand ((enum insn_code) icode, false);
> +}
> +
> +/* Emit vid.v instruction. */
> +
> +static void
> +emit_indexop (rtx target, machine_mode mask_mode)
nit: rename to emit_index_op
> +void
> +expand_vec_series (rtx dest, rtx base, rtx step)
> +{
> + machine_mode mode = GET_MODE (dest);
> + machine_mode inner_mode = GET_MODE_INNER (mode);
> + machine_mode mask_mode;
> + gcc_assert (get_mask_mode (mode).exists (&mask_mode));
> +
> + /* VECT_IV = BASE + I * STEP. */
> +
> + /* Step 1: Generate I = { 0, 1, 2, ... } by vid.v. */
> + rtx tmp = gen_reg_rtx (mode);
> + emit_indexop (tmp, mask_mode);
> + if (rtx_equal_p (step, const1_rtx) && rtx_equal_p (base, const0_rtx))
> + {
> + emit_move_insn (dest, tmp);
> + return;
> + }
> +
> + /* Step 2: Generate I * STEP.
> + - STEP is 1, we don't emit any instructions.
> + - STEP is power of 2, we use vsll.vi/vsll.vx.
> + - STEP is non-power of 2, we use vmul.vx. */
The comment seems mismatch the structure of the code, I am prefer to
restructure the code to match the comment.
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/series-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/series-1.c
> new file mode 100644
> index 00000000000..a01f6ce7411
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/series-1.c
> @@ -0,0 +1,50 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m4" } */
> +
> +#include <stdint.h>
Use stdint-gcc.h instead
@@ -58,3 +58,27 @@
DONE;
}
)
+
+;; =========================================================================
+;; == Vector creation
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Linear series
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vid.v
+;; - vmul.vx
+;; - vadd.vx/vadd.vi
+;; -------------------------------------------------------------------------
+
+(define_expand "@vec_series<mode>"
+ [(match_operand:VI 0 "register_operand")
+ (match_operand:<VEL> 1 "reg_or_int_operand")
+ (match_operand:<VEL> 2 "reg_or_int_operand")]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::expand_vec_series (operands[0], operands[1], operands[2]);
+ DONE;
+ }
+)
@@ -219,6 +219,7 @@ rtx gen_avl_for_scalar_move (rtx);
void expand_tuple_move (machine_mode, rtx *);
machine_mode preferred_simd_mode (scalar_mode);
opt_machine_mode get_mask_mode (machine_mode);
+void expand_vec_series (rtx, rtx, rtx);
}
/* We classify builtin types into two classes:
@@ -248,6 +248,111 @@ emit_nonvlmax_op (unsigned icode, rtx dest, rtx src, rtx len,
emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode, false);
}
+/* Emit binary operations. */
+
+static void
+emit_binop (unsigned icode, rtx *ops, machine_mode mask_mode,
+ machine_mode scalar_mode)
+{
+ insn_expander<9> e;
+ machine_mode mode = GET_MODE (ops[0]);
+ e.add_output_operand (ops[0], mode);
+ e.add_all_one_mask_operand (mask_mode);
+ e.add_vundef_operand (mode);
+ if (VECTOR_MODE_P (GET_MODE (ops[1])))
+ e.add_input_operand (ops[1], GET_MODE (ops[1]));
+ else
+ e.add_input_operand (ops[1], scalar_mode);
+ if (VECTOR_MODE_P (GET_MODE (ops[2])))
+ e.add_input_operand (ops[2], GET_MODE (ops[2]));
+ else
+ e.add_input_operand (ops[2], scalar_mode);
+ rtx vlmax = gen_reg_rtx (Pmode);
+ emit_vlmax_vsetvl (mode, vlmax);
+ e.add_input_operand (vlmax, Pmode);
+ e.add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy ());
+ e.add_avl_type_operand (avl_type::VLMAX);
+ e.expand ((enum insn_code) icode, false);
+}
+
+/* Emit vid.v instruction. */
+
+static void
+emit_indexop (rtx target, machine_mode mask_mode)
+{
+ insn_expander<7> e;
+ machine_mode mode = GET_MODE (target);
+ e.add_output_operand (target, mode);
+ e.add_all_one_mask_operand (mask_mode);
+ e.add_vundef_operand (mode);
+ rtx vlmax = gen_reg_rtx (Pmode);
+ emit_vlmax_vsetvl (mode, vlmax);
+ e.add_input_operand (vlmax, Pmode);
+ e.add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy ());
+ e.add_avl_type_operand (avl_type::VLMAX);
+ e.expand (code_for_pred_series (mode), false);
+}
+
+/* Expand series const vector. */
+
+void
+expand_vec_series (rtx dest, rtx base, rtx step)
+{
+ machine_mode mode = GET_MODE (dest);
+ machine_mode inner_mode = GET_MODE_INNER (mode);
+ machine_mode mask_mode;
+ gcc_assert (get_mask_mode (mode).exists (&mask_mode));
+
+ /* VECT_IV = BASE + I * STEP. */
+
+ /* Step 1: Generate I = { 0, 1, 2, ... } by vid.v. */
+ rtx tmp = gen_reg_rtx (mode);
+ emit_indexop (tmp, mask_mode);
+ if (rtx_equal_p (step, const1_rtx) && rtx_equal_p (base, const0_rtx))
+ {
+ emit_move_insn (dest, tmp);
+ return;
+ }
+
+ /* Step 2: Generate I * STEP.
+ - STEP is 1, we don't emit any instructions.
+ - STEP is power of 2, we use vsll.vi/vsll.vx.
+ - STEP is non-power of 2, we use vmul.vx. */
+ rtx tmp2 = gen_reg_rtx (mode);
+ if (!rtx_equal_p (step, const1_rtx))
+ {
+ if (CONST_INT_P (step) && pow2p_hwi (INTVAL (step)))
+ {
+ /* Emit logical left shift operation. */
+ int shift = exact_log2 (INTVAL (step));
+ rtx shift_amount = gen_int_mode (shift, Pmode);
+ rtx ops[3] = {tmp2, tmp, shift_amount};
+ insn_code icode = code_for_pred_scalar (ASHIFT, mode);
+ emit_binop (icode, ops, mask_mode, Pmode);
+ }
+ else
+ {
+ rtx ops[3] = {tmp2, tmp, step};
+ insn_code icode = code_for_pred_scalar (MULT, mode);
+ emit_binop (icode, ops, mask_mode, inner_mode);
+ }
+ if (rtx_equal_p (base, const0_rtx))
+ {
+ emit_move_insn (dest, tmp2);
+ return;
+ }
+ }
+
+ /* Step 3: Generate BASE + I * STEP.
+ - BASE is 0, we don't emit any instructions.
+ - BASE is not 0, we use vadd.vx/vadd.vi. */
+ rtx tmp3 = gen_reg_rtx (mode);
+ rtx ops[3] = {tmp3, rtx_equal_p (step, const1_rtx) ? tmp : tmp2, base};
+ insn_code icode = code_for_pred_scalar (PLUS, mode);
+ emit_binop (icode, ops, mask_mode, inner_mode);
+ emit_move_insn (dest, tmp3);
+}
+
static void
expand_const_vector (rtx target, rtx src, machine_mode mask_mode)
{
@@ -280,12 +385,19 @@ expand_const_vector (rtx target, rtx src, machine_mode mask_mode)
return;
}
+ /* Support scalable const series vector. */
+ rtx base, step;
+ if (const_vec_series_p (src, &base, &step))
+ {
+ emit_insn (gen_vec_series (mode, target, base, step));
+ return;
+ }
+
/* TODO: We only support const duplicate vector for now. More cases
will be supported when we support auto-vectorization:
- 1. series vector.
- 2. multiple elts duplicate vector.
- 3. multiple patterns with multiple elts. */
+ 1. multiple elts duplicate vector.
+ 2. multiple patterns with multiple elts. */
}
/* Expand a pre-RA RVV data move from SRC to DEST.
@@ -1266,9 +1266,34 @@ riscv_const_insns (rtx x)
}
case CONST_DOUBLE:
- case CONST_VECTOR:
/* We can use x0 to load floating-point zero. */
return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
+ case CONST_VECTOR:
+ {
+ /* TODO: This is not accurate, we will need to
+ adapt the COST of CONST_VECTOR in the future
+ for the following cases:
+
+ - 1. const duplicate vector with element value
+ in range of [-16, 15].
+ - 2. const duplicate vector with element value
+ out range of [-16, 15].
+ - 3. const series vector.
+ ...etc. */
+ if (riscv_v_ext_vector_mode_p (GET_MODE (x)))
+ {
+ /* const series vector. */
+ rtx base, step;
+ if (const_vec_series_p (x, &base, &step))
+ {
+ /* This is not accurate, we will need to adapt the COST
+ * accurately according to BASE && STEP. */
+ return 1;
+ }
+ }
+ /* TODO: We may support more const vector in the future. */
+ return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
+ }
case CONST:
/* See if we can refer to X directly. */
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m4" } */
+
+#include <stdint.h>
+
+#define NUM_ELEMS(TYPE) (64 / sizeof (TYPE))
+
+#define DEF_LOOP(TYPE, BASE, STEP, SUFFIX) \
+ void __attribute__ ((noinline, noclone)) \
+ loop_##TYPE##_##SUFFIX (TYPE *restrict a) \
+ { \
+ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \
+ a[i] = (BASE) + i * (STEP); \
+ }
+
+#define TEST_SEW32_TYPES(T, BASE, STEP, SUFFIX) \
+ T (uint32_t, BASE, STEP, SUFFIX) \
+ T (int32_t, BASE, STEP, SUFFIX)
+
+#define TEST_ALL(T) \
+ TEST_SEW32_TYPES (T, 0, 1, b0s1) \
+ TEST_SEW32_TYPES (T, 0, 2, b0s2) \
+ TEST_SEW32_TYPES (T, 0, 3, b0s3) \
+ TEST_SEW32_TYPES (T, 0, 8, b0s8) \
+ TEST_SEW32_TYPES (T, 0, 9, b0s9) \
+ TEST_SEW32_TYPES (T, 0, 16, b0s16) \
+ TEST_SEW32_TYPES (T, 0, 17, b0s17) \
+ TEST_SEW32_TYPES (T, 0, 32, b0s32) \
+ TEST_SEW32_TYPES (T, 0, 33, b0s33) \
+ TEST_SEW32_TYPES (T, -16, 1, bm16s1) \
+ TEST_SEW32_TYPES (T, 15, 1, b15s1) \
+ TEST_SEW32_TYPES (T, -17, 1, bm17s1) \
+ TEST_SEW32_TYPES (T, 16, 1, b16s1) \
+ TEST_SEW32_TYPES (T, -16, 128, bm16s128) \
+ TEST_SEW32_TYPES (T, 15, 128, b15s128) \
+ TEST_SEW32_TYPES (T, -17, 128, bm17s128) \
+ TEST_SEW32_TYPES (T, 16, 128, b16s128) \
+ TEST_SEW32_TYPES (T, -16, 179, bm16s179) \
+ TEST_SEW32_TYPES (T, 15, 179, b15s179) \
+ TEST_SEW32_TYPES (T, -17, 179, bm17s179) \
+ TEST_SEW32_TYPES (T, 16, 179, b16s179) \
+ TEST_SEW32_TYPES (T, -16, 65536, bm16s65536) \
+ TEST_SEW32_TYPES (T, 15, 65536, b15s65536) \
+ TEST_SEW32_TYPES (T, -17, 65536, bm17s65536) \
+ TEST_SEW32_TYPES (T, 16, 65536, b16s65536)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {vid\.v\s+v[0-9]+} 50 } } */
+/* { dg-final { scan-assembler-times {vsll\.vi\s+v[0-9]+} 24 } } */
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "--param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m4" } */
+
+#include "series-1.c"
+
+#define TEST_LOOP(TYPE, BASE, STEP, SUFFIX) \
+ { \
+ TYPE array[NUM_ELEMS (TYPE)] = {}; \
+ loop_##TYPE##_##SUFFIX (array); \
+ for (int i = 0; i < NUM_ELEMS (TYPE); i++) \
+ if (array[i] != (TYPE) (BASE + i * STEP)) \
+ __builtin_abort (); \
+ }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}