@@ -1309,6 +1309,7 @@ expand_vec_series (rtx dest, rtx base, rtx step)
machine_mode mode = GET_MODE (dest);
poly_int64 nunits_m1 = GET_MODE_NUNITS (mode) - 1;
poly_int64 value;
+ rtx result = register_operand (dest, mode) ? dest : gen_reg_rtx (mode);
/* VECT_IV = BASE + I * STEP. */
@@ -1317,15 +1318,10 @@ expand_vec_series (rtx dest, rtx base, rtx step)
rtx op[] = {vid};
emit_vlmax_insn (code_for_pred_series (mode), RVV_MISC_OP, op);
- /* Step 2: Generate I * STEP.
- - STEP is 1, we don't emit any instructions.
- - STEP is power of 2, we use vsll.vi/vsll.vx.
- - STEP is non-power of 2, we use vmul.vx. */
rtx step_adj;
- if (rtx_equal_p (step, const1_rtx))
- step_adj = vid;
- else if (rtx_equal_p (step, constm1_rtx) && poly_int_rtx_p (base, &value)
- && known_eq (nunits_m1, value))
+ if (rtx_equal_p (step, constm1_rtx)
+ && poly_int_rtx_p (base, &value)
+ && known_eq (nunits_m1, value))
{
/* Special case:
{nunits - 1, nunits - 2, ... , 0}.
@@ -1334,46 +1330,54 @@ expand_vec_series (rtx dest, rtx base, rtx step)
Code sequence:
vid.v v
vrsub nunits - 1, v. */
- rtx ops[] = {dest, vid, gen_int_mode (nunits_m1, GET_MODE_INNER (mode))};
+ rtx ops[]
+ = {result, vid, gen_int_mode (nunits_m1, GET_MODE_INNER (mode))};
insn_code icode = code_for_pred_sub_reverse_scalar (mode);
emit_vlmax_insn (icode, RVV_BINOP, ops);
- return;
}
else
{
- step_adj = gen_reg_rtx (mode);
- if (CONST_INT_P (step) && pow2p_hwi (INTVAL (step)))
+ /* Step 2: Generate I * STEP.
+ - STEP is 1, we don't emit any instructions.
+ - STEP is power of 2, we use vsll.vi/vsll.vx.
+ - STEP is non-power of 2, we use vmul.vx. */
+ if (rtx_equal_p (step, const1_rtx))
+ step_adj = vid;
+ else
{
- /* Emit logical left shift operation. */
- int shift = exact_log2 (INTVAL (step));
- rtx shift_amount = gen_int_mode (shift, Pmode);
- insn_code icode = code_for_pred_scalar (ASHIFT, mode);
- rtx ops[] = {step_adj, vid, shift_amount};
- emit_vlmax_insn (icode, RVV_BINOP, ops);
+ step_adj = gen_reg_rtx (mode);
+ if (CONST_INT_P (step) && pow2p_hwi (INTVAL (step)))
+ {
+ /* Emit logical left shift operation. */
+ int shift = exact_log2 (INTVAL (step));
+ rtx shift_amount = gen_int_mode (shift, Pmode);
+ insn_code icode = code_for_pred_scalar (ASHIFT, mode);
+ rtx ops[] = {step_adj, vid, shift_amount};
+ emit_vlmax_insn (icode, RVV_BINOP, ops);
+ }
+ else
+ {
+ insn_code icode = code_for_pred_scalar (MULT, mode);
+ rtx ops[] = {step_adj, vid, step};
+ emit_vlmax_insn (icode, RVV_BINOP, ops);
+ }
}
+
+ /* Step 3: Generate BASE + I * STEP.
+ - BASE is 0, use result of vid.
+ - BASE is not 0, we use vadd.vx/vadd.vi. */
+ if (rtx_equal_p (base, const0_rtx))
+ emit_move_insn (result, step_adj);
else
{
- insn_code icode = code_for_pred_scalar (MULT, mode);
- rtx ops[] = {step_adj, vid, step};
+ insn_code icode = code_for_pred_scalar (PLUS, mode);
+ rtx ops[] = {result, step_adj, base};
emit_vlmax_insn (icode, RVV_BINOP, ops);
}
}
- /* Step 3: Generate BASE + I * STEP.
- - BASE is 0, use result of vid.
- - BASE is not 0, we use vadd.vx/vadd.vi. */
- if (rtx_equal_p (base, const0_rtx))
- {
- emit_move_insn (dest, step_adj);
- }
- else
- {
- rtx result = gen_reg_rtx (mode);
- insn_code icode = code_for_pred_scalar (PLUS, mode);
- rtx ops[] = {result, step_adj, base};
- emit_vlmax_insn (icode, RVV_BINOP, ops);
- emit_move_insn (dest, result);
- }
+ if (result != dest)
+ emit_move_insn (dest, result);
}
static void
new file mode 100644
@@ -0,0 +1,90 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3 --param=riscv-autovec-preference=fixed-vlmax -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <stdint-gcc.h>
+
+typedef int16_t vnx16i __attribute__ ((vector_size (32)));
+
+/*
+** foo1:
+** vsetivli\s+zero,\s*16,\s*e16,\s*m1,\s*t[au],\s*m[au]
+** vid\.v\s+v[0-9]+
+** vrsub\.vi\s+v[0-9]+,\s*v[0-9]+,\s*15
+** vs1r\.v\s+v[0-9]+,\s*0\([a-x0-9]+\)
+** ret
+*/
+void
+foo1 (int16_t *__restrict out)
+{
+ vnx16i v = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
+ *(vnx16i *) out = v;
+}
+
+/*
+** foo2:
+** vsetivli\s+zero,\s*16,\s*e16,\s*m1,\s*t[au],\s*m[au]
+** vid\.v\s+v[0-9]+
+** li\s+[a-x0-9]+,\s*7
+** vmul\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vadd\.vi\s+v[0-9]+,\s*v[0-9]+,\s*3
+** vs1r\.v\s+v[0-9]+,\s*0\([a-x0-9]+\)
+** ret
+*/
+void
+foo2 (int16_t *__restrict out)
+{
+ vnx16i v
+ = {3, 3 + 7 * 1, 3 + 7 * 2, 3 + 7 * 3, 3 + 7 * 4, 3 + 7 * 5,
+ 3 + 7 * 6, 3 + 7 * 7, 3 + 7 * 8, 3 + 7 * 9, 3 + 7 * 10, 3 + 7 * 11,
+ 3 + 7 * 12, 3 + 7 * 13, 3 + 7 * 14, 3 + 7 * 15};
+ *(vnx16i *) out = v;
+}
+
+/*
+** foo3:
+** vsetivli\s+zero,\s*16,\s*e16,\s*m1,\s*t[au],\s*m[au]
+** vid\.v\s+v[0-9]+
+** vs1r\.v\s+v[0-9]+,\s*0\([a-x0-9]+\)
+** ret
+*/
+void
+foo3 (int16_t *__restrict out)
+{
+ vnx16i v
+ = {0, 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+ *(vnx16i *) out = v;
+}
+
+/*
+** foo4:
+** vsetivli\s+zero,\s*16,\s*e16,\s*m1,\s*t[au],\s*m[au]
+** vid\.v\s+v[0-9]+
+** li\s+[a-x0-9]+,\s*6
+** vmul\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+** vs1r\.v\s+v[0-9]+,\s*0\([a-x0-9]+\)
+** ret
+*/
+void
+foo4 (int16_t *__restrict out)
+{
+ vnx16i v
+ = {0*6, 1*6,2*6,3*6,4*6,5*6,6*6,7*6,8*6,9*6,10*6,11*6,12*6,13*6,14*6,15*6};
+ *(vnx16i *) out = v;
+}
+
+/*
+** foo5:
+** vsetivli\s+zero,\s*16,\s*e16,\s*m1,\s*t[au],\s*m[au]
+** vid\.v\s+v[0-9]+
+** vadd\.vi\s+v[0-9]+,\s*v[0-9]+,\s*-16
+** vs1r\.v\s+v[0-9]+,\s*0\([a-x0-9]+\)
+** ret
+*/
+void
+foo5 (int16_t *__restrict out)
+{
+ vnx16i v
+ = {0-16, 1-16,2-16,3-16,4-16,5-16,6-16,7-16,8-16,9-16,10-16,11-16,12-16,13-16,14-16,15-16};
+ *(vnx16i *) out = v;
+}