RISC-V: Support const series vector for RVV auto-vectorization

Message ID 20230510040035.2972636-1-juzhe.zhong@rivai.ai
State Accepted
Headers
Series RISC-V: Support const series vector for RVV auto-vectorization |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

juzhe.zhong@rivai.ai May 10, 2023, 4 a.m. UTC
  From: Juzhe-Zhong <juzhe.zhong@rivai.ai>

This patch is the prerequiste patch for more RVV auto-vectorization
support.

Since when we enable a very simple binary operations, we will end
up with such following ICE:

during RTL pass: expand
add_run-1.c: In function 'main':
add_run-1.c:28:1: internal compiler error: Segmentation fault
0x1618ea3 crash_signal
        ../../../riscv-gcc/gcc/toplev.cc:314
0xe76cd9 single_set(rtx_insn const*)
        ../../../riscv-gcc/gcc/rtl.h:3602
0x1080f8a emit_move_insn(rtx_def*, rtx_def*)
        ../../../riscv-gcc/gcc/expr.cc:4342
0x170c458 insert_value_copy_on_edge
        ../../../riscv-gcc/gcc/tree-outof-ssa.cc:352
0x170d58e eliminate_phi
        ../../../riscv-gcc/gcc/tree-outof-ssa.cc:785
0x170df17 expand_phi_nodes(ssaexpand*)
        ../../../riscv-gcc/gcc/tree-outof-ssa.cc:1024
0xef27e2 execute
        ../../../riscv-gcc/gcc/cfgexpand.cc:6818

This is because LoopVectorizer assume target is able to handle
series const vector when we enable binary operations.
Then it will be easily causing ICE like that.

gcc/ChangeLog:

        * config/riscv/autovec.md (@vec_series<mode>): New pattern
        * config/riscv/riscv-protos.h (expand_vec_series): New function.
        * config/riscv/riscv-v.cc (emit_binop): Ditto.
        (emit_indexop): Ditto.
        (expand_vec_series): Ditto.
        (expand_const_vector): Add series vector handling.
        * config/riscv/riscv.cc (riscv_const_insns): Enable series vector for testing.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/series-1.c: New test.
        * gcc.target/riscv/rvv/autovec/series_run-1.c: New test.

---
 gcc/config/riscv/autovec.md                   |  24 ++++
 gcc/config/riscv/riscv-protos.h               |   1 +
 gcc/config/riscv/riscv-v.cc                   | 118 +++++++++++++++++-
 gcc/config/riscv/riscv.cc                     |  27 +++-
 .../gcc.target/riscv/rvv/autovec/series-1.c   |  50 ++++++++
 .../riscv/rvv/autovec/series_run-1.c          |  20 +++
 6 files changed, 236 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/series-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/series_run-1.c
  

Comments

Kito Cheng May 11, 2023, 2:05 a.m. UTC | #1
O
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> index c0293a306f9..e8a728ae226 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -219,6 +219,7 @@ rtx gen_avl_for_scalar_move (rtx);
>  void expand_tuple_move (machine_mode, rtx *);
>  machine_mode preferred_simd_mode (scalar_mode);
>  opt_machine_mode get_mask_mode (machine_mode);
> +void expand_vec_series (rtx, rtx, rtx);
>  }
>
>  /* We classify builtin types into two classes:
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 7ca49ca67c1..0c3b1b4c40b 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -248,6 +248,111 @@ emit_nonvlmax_op (unsigned icode, rtx dest, rtx src, rtx len,
>    emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode, false);
>  }
>
> +/* Emit binary operations.  */
> +
> +static void
> +emit_binop (unsigned icode, rtx *ops, machine_mode mask_mode,
> +           machine_mode scalar_mode)
> +{
> +  insn_expander<9> e;
> +  machine_mode mode = GET_MODE (ops[0]);
> +  e.add_output_operand (ops[0], mode);
> +  e.add_all_one_mask_operand (mask_mode);
> +  e.add_vundef_operand (mode);
> +  if (VECTOR_MODE_P (GET_MODE (ops[1])))
> +    e.add_input_operand (ops[1], GET_MODE (ops[1]));
> +  else
> +    e.add_input_operand (ops[1], scalar_mode);
> +  if (VECTOR_MODE_P (GET_MODE (ops[2])))
> +    e.add_input_operand (ops[2], GET_MODE (ops[2]));
> +  else
> +    e.add_input_operand (ops[2], scalar_mode);
> +  rtx vlmax = gen_reg_rtx (Pmode);
> +  emit_vlmax_vsetvl (mode, vlmax);
> +  e.add_input_operand (vlmax, Pmode);
> +  e.add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy ());
> +  e.add_avl_type_operand (avl_type::VLMAX);
> +  e.expand ((enum insn_code) icode, false);
> +}
> +
> +/* Emit vid.v instruction.  */
> +
> +static void
> +emit_indexop (rtx target, machine_mode mask_mode)

nit: rename to emit_index_op


> +void
> +expand_vec_series (rtx dest, rtx base, rtx step)
> +{
> +  machine_mode mode = GET_MODE (dest);
> +  machine_mode inner_mode = GET_MODE_INNER (mode);
> +  machine_mode mask_mode;
> +  gcc_assert (get_mask_mode (mode).exists (&mask_mode));
> +
> +  /* VECT_IV = BASE + I * STEP.  */
> +
> +  /* Step 1: Generate I = { 0, 1, 2, ... } by vid.v.  */
> +  rtx tmp = gen_reg_rtx (mode);
> +  emit_indexop (tmp, mask_mode);
> +  if (rtx_equal_p (step, const1_rtx) && rtx_equal_p (base, const0_rtx))
> +    {
> +      emit_move_insn (dest, tmp);
> +      return;
> +    }
> +
> +  /* Step 2: Generate I * STEP.
> +     - STEP is 1, we don't emit any instructions.
> +     - STEP is power of 2, we use vsll.vi/vsll.vx.
> +     - STEP is non-power of 2, we use vmul.vx.  */

The comment seems mismatch the structure of the code, I am prefer to
restructure the code to match the comment.

> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/series-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/series-1.c
> new file mode 100644
> index 00000000000..a01f6ce7411
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/series-1.c
> @@ -0,0 +1,50 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m4" } */
> +
> +#include <stdint.h>

Use stdint-gcc.h instead
  

Patch

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index f1c5ff5951b..99dc4f046b0 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -58,3 +58,27 @@ 
     DONE;
   }
 )
+
+;; =========================================================================
+;; == Vector creation
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Linear series
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vid.v
+;; - vmul.vx
+;; - vadd.vx/vadd.vi
+;; -------------------------------------------------------------------------
+
+(define_expand "@vec_series<mode>"
+  [(match_operand:VI 0 "register_operand")
+   (match_operand:<VEL> 1 "reg_or_int_operand")
+   (match_operand:<VEL> 2 "reg_or_int_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_series (operands[0], operands[1], operands[2]);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index c0293a306f9..e8a728ae226 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -219,6 +219,7 @@  rtx gen_avl_for_scalar_move (rtx);
 void expand_tuple_move (machine_mode, rtx *);
 machine_mode preferred_simd_mode (scalar_mode);
 opt_machine_mode get_mask_mode (machine_mode);
+void expand_vec_series (rtx, rtx, rtx);
 }
 
 /* We classify builtin types into two classes:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 7ca49ca67c1..0c3b1b4c40b 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -248,6 +248,111 @@  emit_nonvlmax_op (unsigned icode, rtx dest, rtx src, rtx len,
   emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode, false);
 }
 
+/* Emit binary operations.  */
+
+static void
+emit_binop (unsigned icode, rtx *ops, machine_mode mask_mode,
+	    machine_mode scalar_mode)
+{
+  insn_expander<9> e;
+  machine_mode mode = GET_MODE (ops[0]);
+  e.add_output_operand (ops[0], mode);
+  e.add_all_one_mask_operand (mask_mode);
+  e.add_vundef_operand (mode);
+  if (VECTOR_MODE_P (GET_MODE (ops[1])))
+    e.add_input_operand (ops[1], GET_MODE (ops[1]));
+  else
+    e.add_input_operand (ops[1], scalar_mode);
+  if (VECTOR_MODE_P (GET_MODE (ops[2])))
+    e.add_input_operand (ops[2], GET_MODE (ops[2]));
+  else
+    e.add_input_operand (ops[2], scalar_mode);
+  rtx vlmax = gen_reg_rtx (Pmode);
+  emit_vlmax_vsetvl (mode, vlmax);
+  e.add_input_operand (vlmax, Pmode);
+  e.add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy ());
+  e.add_avl_type_operand (avl_type::VLMAX);
+  e.expand ((enum insn_code) icode, false);
+}
+
+/* Emit vid.v instruction.  */
+
+static void
+emit_indexop (rtx target, machine_mode mask_mode)
+{
+  insn_expander<7> e;
+  machine_mode mode = GET_MODE (target);
+  e.add_output_operand (target, mode);
+  e.add_all_one_mask_operand (mask_mode);
+  e.add_vundef_operand (mode);
+  rtx vlmax = gen_reg_rtx (Pmode);
+  emit_vlmax_vsetvl (mode, vlmax);
+  e.add_input_operand (vlmax, Pmode);
+  e.add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy ());
+  e.add_avl_type_operand (avl_type::VLMAX);
+  e.expand (code_for_pred_series (mode), false);
+}
+
+/* Expand series const vector.  */
+
+void
+expand_vec_series (rtx dest, rtx base, rtx step)
+{
+  machine_mode mode = GET_MODE (dest);
+  machine_mode inner_mode = GET_MODE_INNER (mode);
+  machine_mode mask_mode;
+  gcc_assert (get_mask_mode (mode).exists (&mask_mode));
+
+  /* VECT_IV = BASE + I * STEP.  */
+
+  /* Step 1: Generate I = { 0, 1, 2, ... } by vid.v.  */
+  rtx tmp = gen_reg_rtx (mode);
+  emit_indexop (tmp, mask_mode);
+  if (rtx_equal_p (step, const1_rtx) && rtx_equal_p (base, const0_rtx))
+    {
+      emit_move_insn (dest, tmp);
+      return;
+    }
+
+  /* Step 2: Generate I * STEP.
+     - STEP is 1, we don't emit any instructions.
+     - STEP is power of 2, we use vsll.vi/vsll.vx.
+     - STEP is non-power of 2, we use vmul.vx.  */
+  rtx tmp2 = gen_reg_rtx (mode);
+  if (!rtx_equal_p (step, const1_rtx))
+    {
+      if (CONST_INT_P (step) && pow2p_hwi (INTVAL (step)))
+	{
+	  /* Emit logical left shift operation.  */
+	  int shift = exact_log2 (INTVAL (step));
+	  rtx shift_amount = gen_int_mode (shift, Pmode);
+	  rtx ops[3] = {tmp2, tmp, shift_amount};
+	  insn_code icode = code_for_pred_scalar (ASHIFT, mode);
+	  emit_binop (icode, ops, mask_mode, Pmode);
+	}
+      else
+	{
+	  rtx ops[3] = {tmp2, tmp, step};
+	  insn_code icode = code_for_pred_scalar (MULT, mode);
+	  emit_binop (icode, ops, mask_mode, inner_mode);
+	}
+      if (rtx_equal_p (base, const0_rtx))
+	{
+	  emit_move_insn (dest, tmp2);
+	  return;
+	}
+    }
+
+  /* Step 3: Generate BASE + I * STEP.
+     - BASE is 0, we don't emit any instructions.
+     - BASE is not 0, we use vadd.vx/vadd.vi.  */
+  rtx tmp3 = gen_reg_rtx (mode);
+  rtx ops[3] = {tmp3, rtx_equal_p (step, const1_rtx) ? tmp : tmp2, base};
+  insn_code icode = code_for_pred_scalar (PLUS, mode);
+  emit_binop (icode, ops, mask_mode, inner_mode);
+  emit_move_insn (dest, tmp3);
+}
+
 static void
 expand_const_vector (rtx target, rtx src, machine_mode mask_mode)
 {
@@ -280,12 +385,19 @@  expand_const_vector (rtx target, rtx src, machine_mode mask_mode)
       return;
     }
 
+  /* Support scalable const series vector.  */
+  rtx base, step;
+  if (const_vec_series_p (src, &base, &step))
+    {
+      emit_insn (gen_vec_series (mode, target, base, step));
+      return;
+    }
+
   /* TODO: We only support const duplicate vector for now. More cases
      will be supported when we support auto-vectorization:
 
-       1. series vector.
-       2. multiple elts duplicate vector.
-       3. multiple patterns with multiple elts.  */
+       1. multiple elts duplicate vector.
+       2. multiple patterns with multiple elts.  */
 }
 
 /* Expand a pre-RA RVV data move from SRC to DEST.
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index ff90c44d811..84e9267bcb2 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1266,9 +1266,34 @@  riscv_const_insns (rtx x)
       }
 
     case CONST_DOUBLE:
-    case CONST_VECTOR:
       /* We can use x0 to load floating-point zero.  */
       return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
+    case CONST_VECTOR:
+      {
+	/* TODO: This is not accurate, we will need to
+	   adapt the COST of CONST_VECTOR in the future
+	   for the following cases:
+
+	  - 1. const duplicate vector with element value
+	       in range of [-16, 15].
+	  - 2. const duplicate vector with element value
+	       out range of [-16, 15].
+	  - 3. const series vector.
+	  ...etc.  */
+	if (riscv_v_ext_vector_mode_p (GET_MODE (x)))
+	  {
+	    /* const series vector.  */
+	    rtx base, step;
+	    if (const_vec_series_p (x, &base, &step))
+	      {
+		/* This is not accurate, we will need to adapt the COST
+		 * accurately according to BASE && STEP.  */
+		return 1;
+	      }
+	  }
+	/* TODO: We may support more const vector in the future.  */
+	return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
+      }
 
     case CONST:
       /* See if we can refer to X directly.  */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/series-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/series-1.c
new file mode 100644
index 00000000000..a01f6ce7411
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/series-1.c
@@ -0,0 +1,50 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m4" } */
+
+#include <stdint.h>
+
+#define NUM_ELEMS(TYPE) (64 / sizeof (TYPE))
+
+#define DEF_LOOP(TYPE, BASE, STEP, SUFFIX)                                     \
+  void __attribute__ ((noinline, noclone))                                     \
+  loop_##TYPE##_##SUFFIX (TYPE *restrict a)                                    \
+  {                                                                            \
+    for (int i = 0; i < NUM_ELEMS (TYPE); ++i)                                 \
+      a[i] = (BASE) + i * (STEP);                                              \
+  }
+
+#define TEST_SEW32_TYPES(T, BASE, STEP, SUFFIX)                                \
+  T (uint32_t, BASE, STEP, SUFFIX)                                             \
+  T (int32_t, BASE, STEP, SUFFIX)
+
+#define TEST_ALL(T)                                                            \
+  TEST_SEW32_TYPES (T, 0, 1, b0s1)                                             \
+  TEST_SEW32_TYPES (T, 0, 2, b0s2)                                             \
+  TEST_SEW32_TYPES (T, 0, 3, b0s3)                                             \
+  TEST_SEW32_TYPES (T, 0, 8, b0s8)                                             \
+  TEST_SEW32_TYPES (T, 0, 9, b0s9)                                             \
+  TEST_SEW32_TYPES (T, 0, 16, b0s16)                                           \
+  TEST_SEW32_TYPES (T, 0, 17, b0s17)                                           \
+  TEST_SEW32_TYPES (T, 0, 32, b0s32)                                           \
+  TEST_SEW32_TYPES (T, 0, 33, b0s33)                                           \
+  TEST_SEW32_TYPES (T, -16, 1, bm16s1)                                         \
+  TEST_SEW32_TYPES (T, 15, 1, b15s1)                                           \
+  TEST_SEW32_TYPES (T, -17, 1, bm17s1)                                         \
+  TEST_SEW32_TYPES (T, 16, 1, b16s1)                                           \
+  TEST_SEW32_TYPES (T, -16, 128, bm16s128)                                     \
+  TEST_SEW32_TYPES (T, 15, 128, b15s128)                                       \
+  TEST_SEW32_TYPES (T, -17, 128, bm17s128)                                     \
+  TEST_SEW32_TYPES (T, 16, 128, b16s128)                                       \
+  TEST_SEW32_TYPES (T, -16, 179, bm16s179)                                     \
+  TEST_SEW32_TYPES (T, 15, 179, b15s179)                                       \
+  TEST_SEW32_TYPES (T, -17, 179, bm17s179)                                     \
+  TEST_SEW32_TYPES (T, 16, 179, b16s179)                                       \
+  TEST_SEW32_TYPES (T, -16, 65536, bm16s65536)                                 \
+  TEST_SEW32_TYPES (T, 15, 65536, b15s65536)                                   \
+  TEST_SEW32_TYPES (T, -17, 65536, bm17s65536)                                 \
+  TEST_SEW32_TYPES (T, 16, 65536, b16s65536)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {vid\.v\s+v[0-9]+} 50 } } */
+/* { dg-final { scan-assembler-times {vsll\.vi\s+v[0-9]+} 24 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/series_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/series_run-1.c
new file mode 100644
index 00000000000..09a20809c65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/series_run-1.c
@@ -0,0 +1,20 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "--param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m4" } */
+
+#include "series-1.c"
+
+#define TEST_LOOP(TYPE, BASE, STEP, SUFFIX)	\
+  {						\
+    TYPE array[NUM_ELEMS (TYPE)] = {};		\
+    loop_##TYPE##_##SUFFIX (array);		\
+    for (int i = 0; i < NUM_ELEMS (TYPE); i++)	\
+      if (array[i] != (TYPE) (BASE + i * STEP))	\
+	__builtin_abort ();			\
+  }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}