diff mbox series

[2/3,V2] RISC-V: Enable basic auto-vectorization for RVV

Message ID	20230419164214.1032017-3-juzhe.zhong@rivai.ai
State	Accepted
Headers	Received-SPF: pass (google.com: domain of gcc-patches-bounces+ouuuleilei=gmail.com@gcc.gnu.org designates 8.43.85.97 as permitted sender) client-ip=8.43.85.97; DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 66CB3385356C From: juzhe.zhong@rivai.ai To: gcc-patches@gcc.gnu.org Cc: kito.cheng@gmail.com, palmer@dabbelt.com, jeffreyalaw@gmail.com, Ju-Zhe Zhong <juzhe.zhong@rivai.ai> Subject: [PATCH 2/3 V2] RISC-V: Enable basic auto-vectorization for RVV Date: Thu, 20 Apr 2023 00:42:13 +0800 Message-Id: <20230419164214.1032017-3-juzhe.zhong@rivai.ai> In-Reply-To: <20230419164214.1032017-1-juzhe.zhong@rivai.ai> References: <20230419164214.1032017-1-juzhe.zhong@rivai.ai> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Feedback-ID: bizesmtp:rivai.ai:qybglogicsvr:qybglogicsvr7 Precedence: list Errors-To: gcc-patches-bounces+ouuuleilei=gmail.com@gcc.gnu.org Sender: "Gcc-patches" <gcc-patches-bounces+ouuuleilei=gmail.com@gcc.gnu.org> X-getmail-retrieved-from-mailbox: =?utf-8?q?INBOX?=
Series	RISC-V: Basic enable RVV auto-vectorizaiton \| [0/3,V2] RISC-V: Basic enable RVV auto-vectorizaiton [1/3,V2] RISC-V: Add auto-vectorization compile option for RVV [2/3,V2] RISC-V: Enable basic auto-vectorization for RVV [3/3,V2] RISC-V: Add sanity testcases for RVV auto-vectorization

Checks

Context	Check	Description
snail/gcc-patch-check	success	Github commit url

Commit Message

juzhe.zhong@rivai.ai April 19, 2023, 4:42 p.m. UTC

  From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>

This patch enables auto-vectorization accurately according to '-march'
And add len_load/len_store pattern.

For example, for -march=rv32gc_zve32x, we should allow SEW = 64 RVV 
auto-vectorization.

gcc/ChangeLog:

        * config/riscv/riscv-protos.h (preferred_simd_mode): Enable basic auto-vectorization support.
        * config/riscv/riscv-v.cc (autovec_use_vlmax_p): New function.
        (preferred_simd_mode): Ditto.
        * config/riscv/riscv.cc (riscv_convert_vector_bits): Enable basic auto-vectorization support.
        (riscv_preferred_simd_mode): New function.
        (TARGET_VECTORIZE_PREFERRED_SIMD_MODE): New target hook.
        * config/riscv/vector.md: include autovec.md
        * config/riscv/autovec.md: New file.

---
 gcc/config/riscv/autovec.md     | 49 ++++++++++++++++++++++++++++++
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv-v.cc     | 53 +++++++++++++++++++++++++++++++++
 gcc/config/riscv/riscv.cc       | 24 ++++++++++++++-
 gcc/config/riscv/vector.md      |  4 ++-
 5 files changed, 129 insertions(+), 2 deletions(-)
 create mode 100644 gcc/config/riscv/autovec.md

Comments

Kito Cheng April 20, 2023, 2:26 a.m. UTC | #1

> +/* Return the vectorization machine mode for RVV according to LMUL.  */
> +machine_mode
> +preferred_simd_mode (scalar_mode mode)
> +{
> +  /* We only enable auto-vectorization when TARGET_MIN_VLEN >= 128
> +     which is -march=rv64gcv. Since GCC loop vectorizer report ICE
> +     when we enable -march=rv64gc_zve32* and -march=rv32gc_zve64*.
> +     in the 'can_duplicate_and_interleave_p' of tree-vect-slp.cc. Since we have
> +     VNx1SImode in -march=*zve32* and VNx1DImode in -march=*zve64*, they are
> +     enabled in targetm. vector_mode_supported_p and SLP vectorizer will try to
> +     use them. Currently, we can support auto-vectorization in
> +     -march=rv32_zve32x_zvl128b. Wheras, -march=rv32_zve32x_zvl32b or
> +     -march=rv32_zve32x_zvl64b are disabled.
> + */

The comment above might not sync with your implementation?

> +  if (autovec_use_vlmax_p ())
> +    {
> +      /* If TARGET_MIN_VLEN < 128, we don't allow LMUL < 2
> +        auto-vectorization since Loop Vectorizer may use VNx1SImode or
> +        VNx1DImode to vectorize which will create ICE in the
> +        'can_duplicate_and_interleave_p' of tree-vect-slp.cc.  */
> +      if (TARGET_MIN_VLEN < 128 && riscv_autovec_lmul < RVV_M2)
> +       return word_mode;

Actually, you've allowed TARGET_MIN_VLEN < 128 && riscv_autovec_lmul < RVV_M2

> +      /* We use LMUL = 1 as base bytesize which is BYTES_PER_RISCV_VECTOR and
> +        riscv_autovec_lmul as multiply factor to calculate the the NUNITS to
> +        get the auto-vectorization mode.  */
> +      poly_uint64 nunits;
> +      poly_uint64 vector_size
> +       = BYTES_PER_RISCV_VECTOR * ((int) riscv_autovec_lmul);
> +      poly_uint64 scalar_size = GET_MODE_SIZE (mode);
> +      if (!multiple_p (vector_size, scalar_size, &nunits))
> +       return word_mode;

Could you put a gcc_unreachable or assertion here? I assume this
should never false?
if (!multiple_p (vector_size, scalar_size, &nunits))
  {
    gcc_unreachable ();
    return word_mode;
  }

> +      machine_mode rvv_mode;
> +      if (get_vector_mode (mode, nunits).exists (&rvv_mode))
> +       return rvv_mode;
> +    }
> +  /* TODO: We will support minimum length VLS auto-vectorization in the future.
> +   */
> +  return word_mode;
> +}
> +
> +
>  } // namespace riscv_vector
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 5d2550871c7..c601389b540 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -6228,7 +6228,15 @@ riscv_convert_vector_bits (void)
>       to set RVV mode size. The RVV machine modes size are run-time constant if
>       TARGET_VECTOR is enabled. The RVV machine modes size remains default
>       compile-time constant if TARGET_VECTOR is disabled.  */
> -  return TARGET_VECTOR ? poly_uint16 (1, 1) : 1;
> +  if (TARGET_VECTOR)
> +    {
> +      if (riscv_autovec_preference == RVV_FIXED_VLMAX)
> +       return (int) TARGET_MIN_VLEN / (riscv_bytes_per_vector_chunk * 8);

I realized this will also effect intrinsic stuffs.

So I would prefer to drop RVV_FIXED_VLMAX stuffs at this moment.

e.g.
$ riscv64-unknown-linux-gnu-gcc
--param=riscv-autovec-preference=fixed-vlmax
gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c -O2 -march=rv64gcv
-S
../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:
In function 'stach_check_alloca_1':
../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:41:1:
error: insn does not satisfy its constraints:
   41 | }
      | ^
(insn 37 26 40 2 (set (reg:VNx8QI 120 v24 [orig:158 data ] [158])
        (reg:VNx8QI 10 a0 [ data ]))
"../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c":28:1
727 {*movvnx8qi_whole}
     (nil))
during RTL pass: reload
../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:41:1:
internal compiler error: in extract_constrain_insn, at recog.cc:2692

juzhe.zhong@rivai.ai April 20, 2023, 2:55 a.m. UTC | #2

>> The comment above might not sync with your implementation?
Address comment.

>> Actually, you've allowed TARGET_MIN_VLEN < 128 && riscv_autovec_lmul < RVV_M2
Not sure I am on the same page with you. I return word_mode for this situation, the auto-vectorization
will be disabled. I have testcase to test it and I didn't see issue. Would you mind giving me more informations?

>> Could you put a gcc_unreachable or assertion here? I assume this
>>should never false?
>>if (!multiple_p (vector_size, scalar_size, &nunits))
>>  {
  >>  gcc_unreachable ();
  >>  return word_mode;
 >> }
ok.

>> ../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:
>>In function 'stach_check_alloca_1':
>>../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:41:1:
>>error: insn does not satisfy its constraints:
>>   41 | }
>>      | ^
>>(insn 37 26 40 2 (set (reg:VNx8QI 120 v24 [orig:158 data ] [158])
>>        (reg:VNx8QI 10 a0 [ data ]))
>>"../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c":28:1
>>727 {*movvnx8qi_whole}
 >>    (nil))

Oh, I see. According to your situation, the LMUL = 1 is 128bit. VNx8QImode is MF2 which is 64bit size.
GCC tie VNx8QI into a scalar register. I think it can be easily fixed in the backend but yes, I agree with you we drop
this option at the first time.



juzhe.zhong@rivai.ai
 
From: Kito Cheng
Date: 2023-04-20 10:26
To: juzhe.zhong
CC: gcc-patches; palmer; jeffreyalaw
Subject: Re: [PATCH 2/3 V2] RISC-V: Enable basic auto-vectorization for RVV
> +/* Return the vectorization machine mode for RVV according to LMUL.  */
> +machine_mode
> +preferred_simd_mode (scalar_mode mode)
> +{
> +  /* We only enable auto-vectorization when TARGET_MIN_VLEN >= 128
> +     which is -march=rv64gcv. Since GCC loop vectorizer report ICE
> +     when we enable -march=rv64gc_zve32* and -march=rv32gc_zve64*.
> +     in the 'can_duplicate_and_interleave_p' of tree-vect-slp.cc. Since we have
> +     VNx1SImode in -march=*zve32* and VNx1DImode in -march=*zve64*, they are
> +     enabled in targetm. vector_mode_supported_p and SLP vectorizer will try to
> +     use them. Currently, we can support auto-vectorization in
> +     -march=rv32_zve32x_zvl128b. Wheras, -march=rv32_zve32x_zvl32b or
> +     -march=rv32_zve32x_zvl64b are disabled.
> + */
 
The comment above might not sync with your implementation?
 
> +  if (autovec_use_vlmax_p ())
> +    {
> +      /* If TARGET_MIN_VLEN < 128, we don't allow LMUL < 2
> +        auto-vectorization since Loop Vectorizer may use VNx1SImode or
> +        VNx1DImode to vectorize which will create ICE in the
> +        'can_duplicate_and_interleave_p' of tree-vect-slp.cc.  */
> +      if (TARGET_MIN_VLEN < 128 && riscv_autovec_lmul < RVV_M2)
> +       return word_mode;
 
Actually, you've allowed TARGET_MIN_VLEN < 128 && riscv_autovec_lmul < RVV_M2
 
> +      /* We use LMUL = 1 as base bytesize which is BYTES_PER_RISCV_VECTOR and
> +        riscv_autovec_lmul as multiply factor to calculate the the NUNITS to
> +        get the auto-vectorization mode.  */
> +      poly_uint64 nunits;
> +      poly_uint64 vector_size
> +       = BYTES_PER_RISCV_VECTOR * ((int) riscv_autovec_lmul);
> +      poly_uint64 scalar_size = GET_MODE_SIZE (mode);
> +      if (!multiple_p (vector_size, scalar_size, &nunits))
> +       return word_mode;
 
Could you put a gcc_unreachable or assertion here? I assume this
should never false?
if (!multiple_p (vector_size, scalar_size, &nunits))
  {
    gcc_unreachable ();
    return word_mode;
  }
 
> +      machine_mode rvv_mode;
> +      if (get_vector_mode (mode, nunits).exists (&rvv_mode))
> +       return rvv_mode;
> +    }
> +  /* TODO: We will support minimum length VLS auto-vectorization in the future.
> +   */
> +  return word_mode;
> +}
> +
> +
>  } // namespace riscv_vector
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 5d2550871c7..c601389b540 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -6228,7 +6228,15 @@ riscv_convert_vector_bits (void)
>       to set RVV mode size. The RVV machine modes size are run-time constant if
>       TARGET_VECTOR is enabled. The RVV machine modes size remains default
>       compile-time constant if TARGET_VECTOR is disabled.  */
> -  return TARGET_VECTOR ? poly_uint16 (1, 1) : 1;
> +  if (TARGET_VECTOR)
> +    {
> +      if (riscv_autovec_preference == RVV_FIXED_VLMAX)
> +       return (int) TARGET_MIN_VLEN / (riscv_bytes_per_vector_chunk * 8);
 
I realized this will also effect intrinsic stuffs.
 
So I would prefer to drop RVV_FIXED_VLMAX stuffs at this moment.
 
e.g.
$ riscv64-unknown-linux-gnu-gcc
--param=riscv-autovec-preference=fixed-vlmax
gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c -O2 -march=rv64gcv
-S
../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:
In function 'stach_check_alloca_1':
../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:41:1:
error: insn does not satisfy its constraints:
   41 | }
      | ^
(insn 37 26 40 2 (set (reg:VNx8QI 120 v24 [orig:158 data ] [158])
        (reg:VNx8QI 10 a0 [ data ]))
"../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c":28:1
727 {*movvnx8qi_whole}
     (nil))
during RTL pass: reload
../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:41:1:
internal compiler error: in extract_constrain_insn, at recog.cc:2692

Kito Cheng April 20, 2023, 2:59 a.m. UTC | #3

On Thu, Apr 20, 2023 at 10:56 AM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> >> The comment above might not sync with your implementation?
> Address comment.
>
> >> Actually, you've allowed TARGET_MIN_VLEN < 128 && riscv_autovec_lmul < RVV_M2
> Not sure I am on the same page with you. I return word_mode for this situation, the auto-vectorization
> will be disabled. I have testcase to test it and I didn't see issue. Would you mind giving me more informations?

Oh, I just said something opposite, I mean you allow TARGET_MIN_VLEN <
128 with LMUL >2 for vectorize,
which is inconsistent with "We only enable auto-vectorization when
TARGET_MIN_VLEN >= 128"

Robin Dapp April 20, 2023, 8:58 a.m. UTC | #4

> $ riscv64-unknown-linux-gnu-gcc
> --param=riscv-autovec-preference=fixed-vlmax
> gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c -O2 -march=rv64gcv
> -S
> ../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:
> In function 'stach_check_alloca_1':
> ../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:41:1:
> error: insn does not satisfy its constraints:
>    41 | }
>       | ^
> (insn 37 26 40 2 (set (reg:VNx8QI 120 v24 [orig:158 data ] [158])
>         (reg:VNx8QI 10 a0 [ data ]))
> "../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c":28:1
> 727 {*movvnx8qi_whole}
>      (nil))
> during RTL pass: reload
> ../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:41:1:
> internal compiler error: in extract_constrain_insn, at recog.cc:2692

For a slightly adjusted testcase

void
foo0 (int32_t *__restrict f, int32_t *__restrict d, int n)
{
  for (int i = 0; i < n; ++i)
    {
      f[i * 2 + 0] = 1;
      f[i * 2 + 1] = 2;
      d[i] = 3;
    }
}

compiled with -fno-vect-cost-model --param=riscv-autovec-preference=scalable
I see an ICE:

during GIMPLE pass: vect
dump file: foo3.c.172t.vect
foo3.c: In function 'foo0':
foo3.c:4:1: internal compiler error: in exact_div, at poly-int.h:2232
    4 | foo0 (int32_t *__restrict f, int32_t *__restrict d, int n)
      | ^~~~
0x7bb237 poly_int<2u, poly_result<unsigned long, if_nonpoly<int, int, poly_int_traits<int>::is_poly>::type, poly_coeff_pair_traits<unsigned long, if_nonpoly<int, int, poly_int_traits<int>::is_poly>::type>::result_kind>::type> exact_div<2u, unsigned long, int>(poly_int_pod<2u, unsigned long> const&, int)
        ../../gcc/poly-int.h:2232
0x7bbf91 poly_int<2u, poly_result<unsigned long, if_nonpoly<int, int, poly_int_traits<int>::is_poly>::type, poly_coeff_pair_traits<unsigned long, if_nonpoly<int, int, poly_int_traits<int>::is_poly>::type>::result_kind>::type> exact_div<2u, unsigned long, int>(poly_int_pod<2u, unsigned long> const&, int)
        ../../gcc/tree.h:3663
0x7bbf91 can_duplicate_and_interleave_p(vec_info*, unsigned int, tree_node*, unsigned int*, tree_node**, tree_node**)
        ../../gcc/tree-vect-slp.cc:437
[..]

With --param=riscv-autovec-preference=fixed-vlmax, however, the output is
reasonable.  BTW please use --param instead of -param in the description to
avoid confusion.

Now the patches don't explicitly note that they only work for certain marchs,
configurations or so but they certainly shouldn't introduce ICEs for
unsupported configurations.

Are the "fixed-vlmax" vs "scalable" names based on ARM's SVE?  I haven't thought
this through but I think I'd prefer "fixed" vs "varying" or more explicitly
"fixed vector size" vs "dynamic vector size".  Certainly room for discussion here.
What about the -mriscv-vector-bits=... (which would be vlen in v-spec parlance)
from your "rvv-next" branch?  Is this orthogonal to the new parameter here? Are you
thinking of introducing this as well?

Regards
 Robin

juzhe.zhong@rivai.ai April 20, 2023, 9:07 a.m. UTC | #5

>> With --param=riscv-autovec-preference=fixed-vlmax, however, the output is
>> reasonable.  BTW please use --param instead of -param in the description to
>> avoid confusion.
>>Now the patches don't explicitly note that they only work for certain marchs,
>>configurations or so but they certainly shouldn't introduce ICEs for
>>unsupported configurations.

Address comments.  And fix that soon. Thank you so much.

>>Are the "fixed-vlmax" vs "scalable" names based on ARM's SVE?  I haven't thought
>>this through but I think I'd prefer "fixed" vs "varying" or more explicitly
>>"fixed vector size" vs "dynamic vector size".  Certainly room for discussion here.
>>What about the -mriscv-vector-bits=... (which would be vlen in v-spec parlance)
>>from your "rvv-next" branch?  Is this orthogonal to the new parameter here? Are you
>>thinking of introducing this as well?

The current compile options are suggested by Kito. They are internal GCC compile option.
I was trying to add -mriscv-vector-bits-...., However, it was objected by LLVM community.
https://github.com/riscv-non-isa/riscv-toolchain-conventions/issues/33 

I think in case of compile options, Kito may give more comments since he is the RISC-V ABI and convention maintainer.
I develop this patch following his order.

Thanks.


juzhe.zhong@rivai.ai
 
From: Robin Dapp
Date: 2023-04-20 16:58
To: Kito Cheng; juzhe.zhong
CC: gcc-patches; palmer; jeffreyalaw
Subject: Re: [PATCH 2/3 V2] RISC-V: Enable basic auto-vectorization for RVV
> $ riscv64-unknown-linux-gnu-gcc
> --param=riscv-autovec-preference=fixed-vlmax
> gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c -O2 -march=rv64gcv
> -S
> ../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:
> In function 'stach_check_alloca_1':
> ../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:41:1:
> error: insn does not satisfy its constraints:
>    41 | }
>       | ^
> (insn 37 26 40 2 (set (reg:VNx8QI 120 v24 [orig:158 data ] [158])
>         (reg:VNx8QI 10 a0 [ data ]))
> "../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c":28:1
> 727 {*movvnx8qi_whole}
>      (nil))
> during RTL pass: reload
> ../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:41:1:
> internal compiler error: in extract_constrain_insn, at recog.cc:2692
 
For a slightly adjusted testcase
 
void
foo0 (int32_t *__restrict f, int32_t *__restrict d, int n)
{
  for (int i = 0; i < n; ++i)
    {
      f[i * 2 + 0] = 1;
      f[i * 2 + 1] = 2;
      d[i] = 3;
    }
}
 
compiled with -fno-vect-cost-model --param=riscv-autovec-preference=scalable
I see an ICE:
 
during GIMPLE pass: vect
dump file: foo3.c.172t.vect
foo3.c: In function 'foo0':
foo3.c:4:1: internal compiler error: in exact_div, at poly-int.h:2232
    4 | foo0 (int32_t *__restrict f, int32_t *__restrict d, int n)
      | ^~~~
0x7bb237 poly_int<2u, poly_result<unsigned long, if_nonpoly<int, int, poly_int_traits<int>::is_poly>::type, poly_coeff_pair_traits<unsigned long, if_nonpoly<int, int, poly_int_traits<int>::is_poly>::type>::result_kind>::type> exact_div<2u, unsigned long, int>(poly_int_pod<2u, unsigned long> const&, int)
        ../../gcc/poly-int.h:2232
0x7bbf91 poly_int<2u, poly_result<unsigned long, if_nonpoly<int, int, poly_int_traits<int>::is_poly>::type, poly_coeff_pair_traits<unsigned long, if_nonpoly<int, int, poly_int_traits<int>::is_poly>::type>::result_kind>::type> exact_div<2u, unsigned long, int>(poly_int_pod<2u, unsigned long> const&, int)
        ../../gcc/tree.h:3663
0x7bbf91 can_duplicate_and_interleave_p(vec_info*, unsigned int, tree_node*, unsigned int*, tree_node**, tree_node**)
        ../../gcc/tree-vect-slp.cc:437
[..]
 
With --param=riscv-autovec-preference=fixed-vlmax, however, the output is
reasonable.  BTW please use --param instead of -param in the description to
avoid confusion.
 
Now the patches don't explicitly note that they only work for certain marchs,
configurations or so but they certainly shouldn't introduce ICEs for
unsupported configurations.
 
Are the "fixed-vlmax" vs "scalable" names based on ARM's SVE?  I haven't thought
this through but I think I'd prefer "fixed" vs "varying" or more explicitly
"fixed vector size" vs "dynamic vector size".  Certainly room for discussion here.
What about the -mriscv-vector-bits=... (which would be vlen in v-spec parlance)
from your "rvv-next" branch?  Is this orthogonal to the new parameter here? Are you
thinking of introducing this as well?
 
Regards
Robin

juzhe.zhong@rivai.ai April 20, 2023, 9:30 a.m. UTC | #6

Hi, kito. Can you give more comments for us in case of compile options?
I think I should fix this patch after we have done all discussions of compile option
of choosing vector-length && LMUL && auto-vectorization mode (VLA/VLS).

I just received Richard Sandiford comments of "WHILE_LEN" pattern.
Overall the global reviewers accept our RVV loop control mechanism in middle-end, 
I am going to support RVV loop control mechanism in middle-end first. Then, we can 
have perfect codegen like RVV ISA example soon.

Thanks.


juzhe.zhong@rivai.ai
 
From: Robin Dapp
Date: 2023-04-20 16:58
To: Kito Cheng; juzhe.zhong
CC: gcc-patches; palmer; jeffreyalaw
Subject: Re: [PATCH 2/3 V2] RISC-V: Enable basic auto-vectorization for RVV
> $ riscv64-unknown-linux-gnu-gcc
> --param=riscv-autovec-preference=fixed-vlmax
> gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c -O2 -march=rv64gcv
> -S
> ../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:
> In function 'stach_check_alloca_1':
> ../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:41:1:
> error: insn does not satisfy its constraints:
>    41 | }
>       | ^
> (insn 37 26 40 2 (set (reg:VNx8QI 120 v24 [orig:158 data ] [158])
>         (reg:VNx8QI 10 a0 [ data ]))
> "../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c":28:1
> 727 {*movvnx8qi_whole}
>      (nil))
> during RTL pass: reload
> ../riscv-gnu-toolchain-trunk/riscv-gcc/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c:41:1:
> internal compiler error: in extract_constrain_insn, at recog.cc:2692
 
For a slightly adjusted testcase
 
void
foo0 (int32_t *__restrict f, int32_t *__restrict d, int n)
{
  for (int i = 0; i < n; ++i)
    {
      f[i * 2 + 0] = 1;
      f[i * 2 + 1] = 2;
      d[i] = 3;
    }
}
 
compiled with -fno-vect-cost-model --param=riscv-autovec-preference=scalable
I see an ICE:
 
during GIMPLE pass: vect
dump file: foo3.c.172t.vect
foo3.c: In function 'foo0':
foo3.c:4:1: internal compiler error: in exact_div, at poly-int.h:2232
    4 | foo0 (int32_t *__restrict f, int32_t *__restrict d, int n)
      | ^~~~
0x7bb237 poly_int<2u, poly_result<unsigned long, if_nonpoly<int, int, poly_int_traits<int>::is_poly>::type, poly_coeff_pair_traits<unsigned long, if_nonpoly<int, int, poly_int_traits<int>::is_poly>::type>::result_kind>::type> exact_div<2u, unsigned long, int>(poly_int_pod<2u, unsigned long> const&, int)
        ../../gcc/poly-int.h:2232
0x7bbf91 poly_int<2u, poly_result<unsigned long, if_nonpoly<int, int, poly_int_traits<int>::is_poly>::type, poly_coeff_pair_traits<unsigned long, if_nonpoly<int, int, poly_int_traits<int>::is_poly>::type>::result_kind>::type> exact_div<2u, unsigned long, int>(poly_int_pod<2u, unsigned long> const&, int)
        ../../gcc/tree.h:3663
0x7bbf91 can_duplicate_and_interleave_p(vec_info*, unsigned int, tree_node*, unsigned int*, tree_node**, tree_node**)
        ../../gcc/tree-vect-slp.cc:437
[..]
 
With --param=riscv-autovec-preference=fixed-vlmax, however, the output is
reasonable.  BTW please use --param instead of -param in the description to
avoid confusion.
 
Now the patches don't explicitly note that they only work for certain marchs,
configurations or so but they certainly shouldn't introduce ICEs for
unsupported configurations.
 
Are the "fixed-vlmax" vs "scalable" names based on ARM's SVE?  I haven't thought
this through but I think I'd prefer "fixed" vs "varying" or more explicitly
"fixed vector size" vs "dynamic vector size".  Certainly room for discussion here.
What about the -mriscv-vector-bits=... (which would be vlen in v-spec parlance)
from your "rvv-next" branch?  Is this orthogonal to the new parameter here? Are you
thinking of introducing this as well?
 
Regards
Robin

Kito Cheng April 20, 2023, 9:31 a.m. UTC | #7

On Thu, Apr 20, 2023 at 5:07 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> >> With --param=riscv-autovec-preference=fixed-vlmax, however, the output is
> >> reasonable.  BTW please use --param instead of -param in the description to
> >> avoid confusion.
> >>Now the patches don't explicitly note that they only work for certain marchs,
> >>configurations or so but they certainly shouldn't introduce ICEs for
> >>unsupported configurations.
>
> Address comments.  And fix that soon. Thank you so much.
>
> >>Are the "fixed-vlmax" vs "scalable" names based on ARM's SVE?  I haven't thought
> >>this through but I think I'd prefer "fixed" vs "varying" or more explicitly
> >>"fixed vector size" vs "dynamic vector size".  Certainly room for discussion here.
> >>What about the -mriscv-vector-bits=... (which would be vlen in v-spec parlance)
> >>from your "rvv-next" branch?  Is this orthogonal to the new parameter here? Are you
> >>thinking of introducing this as well?
>
> The current compile options are suggested by Kito. They are internal GCC compile option.
> I was trying to add -mriscv-vector-bits-...., However, it was objected by LLVM community.
> https://github.com/riscv-non-isa/riscv-toolchain-conventions/issues/33


Wait, -mriscv-vector-bits= isn't objected by LLVM, what they objected
to is lmul option.
LLVM community has try to implmenat that:
https://reviews.llvm.org/D145088

But personally I would prefer not to rush to implement that feature on upstream,
we could implement that and have more conversion with LLVM community and then
document that into https://github.com/riscv-non-isa/rvv-intrinsic-doc
or https://github.com/riscv-non-isa/riscv-toolchain-conventions

> I think in case of compile options, Kito may give more comments since he is the RISC-V ABI and convention maintainer.
> I develop this patch following his order.

juzhe.zhong@rivai.ai April 20, 2023, 9:34 a.m. UTC | #8

Ahhh.  Thanks kito.

Can you give more comments about Robin's opinion that he want to change into
"fixed" vs "varying" or "fixed vector size" vs "dynamic vector size" ?

I am Ok with any of them. 



juzhe.zhong@rivai.ai
 
From: Kito Cheng
Date: 2023-04-20 17:31
To: juzhe.zhong@rivai.ai
CC: Robin Dapp; gcc-patches; palmer; jeffreyalaw
Subject: Re: Re: [PATCH 2/3 V2] RISC-V: Enable basic auto-vectorization for RVV
On Thu, Apr 20, 2023 at 5:07 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> >> With --param=riscv-autovec-preference=fixed-vlmax, however, the output is
> >> reasonable.  BTW please use --param instead of -param in the description to
> >> avoid confusion.
> >>Now the patches don't explicitly note that they only work for certain marchs,
> >>configurations or so but they certainly shouldn't introduce ICEs for
> >>unsupported configurations.
>
> Address comments.  And fix that soon. Thank you so much.
>
> >>Are the "fixed-vlmax" vs "scalable" names based on ARM's SVE?  I haven't thought
> >>this through but I think I'd prefer "fixed" vs "varying" or more explicitly
> >>"fixed vector size" vs "dynamic vector size".  Certainly room for discussion here.
> >>What about the -mriscv-vector-bits=... (which would be vlen in v-spec parlance)
> >>from your "rvv-next" branch?  Is this orthogonal to the new parameter here? Are you
> >>thinking of introducing this as well?
>
> The current compile options are suggested by Kito. They are internal GCC compile option.
> I was trying to add -mriscv-vector-bits-...., However, it was objected by LLVM community.
> https://github.com/riscv-non-isa/riscv-toolchain-conventions/issues/33
 
 
Wait, -mriscv-vector-bits= isn't objected by LLVM, what they objected
to is lmul option.
LLVM community has try to implmenat that:
https://reviews.llvm.org/D145088
 
But personally I would prefer not to rush to implement that feature on upstream,
we could implement that and have more conversion with LLVM community and then
document that into https://github.com/riscv-non-isa/rvv-intrinsic-doc
or https://github.com/riscv-non-isa/riscv-toolchain-conventions
 
> I think in case of compile options, Kito may give more comments since he is the RISC-V ABI and convention maintainer.
> I develop this patch following his order.

Robin Dapp April 20, 2023, 9:42 a.m. UTC | #9

> Can you give more comments about Robin's opinion that he want to change into
> "fixed" vs "varying" or "fixed vector size" vs "dynamic vector size" ?

It's not necessary to decide on this now as --params are not supposed
to be stable and can be changed quickly.  I was just curious if this had
already been discussed or finalized elsewhere.

Regards
 Robin

juzhe.zhong@rivai.ai April 20, 2023, 9:47 a.m. UTC | #10

Ahhh. These compile options are not finalized. 
I just ask kito provide me some compile option that I can specify LMUL && auto-vectorization mode && vector-length (scalable or fixed-length)
in order to have chances test auto-vectorizaiton fully for example: fully testing LMUL = 1/2/4/8 auto-vectorization (You can see the codes in rvv.exp). 
Then, he let me add --param options.

I can change compile option as you suggested.

Thanks.


juzhe.zhong@rivai.ai
 
From: Robin Dapp
Date: 2023-04-20 17:42
To: juzhe.zhong@rivai.ai; kito.cheng
CC: gcc-patches; palmer; jeffreyalaw
Subject: Re: [PATCH 2/3 V2] RISC-V: Enable basic auto-vectorization for RVV
> Can you give more comments about Robin's opinion that he want to change into
> "fixed" vs "varying" or "fixed vector size" vs "dynamic vector size" ?
 
It's not necessary to decide on this now as --params are not supposed
to be stable and can be changed quickly.  I was just curious if this had
already been discussed or finalized elsewhere.
 
Regards
Robin

Kito Cheng April 20, 2023, 10:37 a.m. UTC | #11

Hi Robin:

Share with you more context that I've discussed with Ju-Zhe, and look
for comments from you :)

There is 3 different auto vectorization flavor:
- VLA
- VLS fixed-vlmax (Name TBD)
- (Traditional) VLS

I think I don't need to explain too much on VLA.
So let we focus on second and third:

VLS fixed-vlmax, that's something like -mriscv-vector-bits= or
-msve-vector-bits, assume VLEN is a static length, and evaluate
scalable vector mode as fixed length vector mode (e.g. evaluate (8x +
8) byte to 16 byte), so that stack allocation could be done by static
instead computed by vlenb register, and vlvmax could be evaluate to a
static value too, but the code generated by this mode is not portable,
when you compile with -mriscv-vector-bits=128, then the code can't run
on machine which VLEN is not exactly equal to 128.

(Traditional) VLS, vectorized to something like int32x4_t, stack
allocation can be determined in static too since the size is fixed,
but size of vector register size is still a poly_int16 value (scalable
vector), not evaluated to fixed length vector like VLS fixed-vlmax
mode, this mode could be useful to handle those loop can't vectorized
by VLA mode, or used by SLP vectorizor, and this mode is more portable
than VLS fixed-vlmax mode since it only require VLEN is larger than
specific length, rather than require must equal to specific length.





On Thu, Apr 20, 2023 at 5:47 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> Ahhh. These compile options are not finalized.
> I just ask kito provide me some compile option that I can specify LMUL && auto-vectorization mode && vector-length (scalable or fixed-length)
> in order to have chances test auto-vectorizaiton fully for example: fully testing LMUL = 1/2/4/8 auto-vectorization (You can see the codes in rvv.exp).
> Then, he let me add --param options.
>
> I can change compile option as you suggested.
>
> Thanks.
>
>
> juzhe.zhong@rivai.ai
>
> From: Robin Dapp
> Date: 2023-04-20 17:42
> To: juzhe.zhong@rivai.ai; kito.cheng
> CC: gcc-patches; palmer; jeffreyalaw
> Subject: Re: [PATCH 2/3 V2] RISC-V: Enable basic auto-vectorization for RVV
> > Can you give more comments about Robin's opinion that he want to change into
> > "fixed" vs "varying" or "fixed vector size" vs "dynamic vector size" ?
>
> It's not necessary to decide on this now as --params are not supposed
> to be stable and can be changed quickly.  I was just curious if this had
> already been discussed or finalized elsewhere.
>
> Regards
> Robin
>

diff mbox series

Patch

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
new file mode 100644
index 00000000000..b5d46ff57ab
--- /dev/null
+++ b/gcc/config/riscv/autovec.md
@@ -0,0 +1,49 @@ 
+;; Machine description for auto-vectorization using RVV for GNU compiler.
+;; Copyright (C) 2023 Free Software Foundation, Inc.
+;; Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; =========================================================================
+;; == Loads/Stores
+;; =========================================================================
+
+;; len_load/len_store is a sub-optimal pattern for RVV auto-vectorization support.
+;; We will replace them when len_maskload/len_maskstore is supported in loop vectorizer.
+(define_expand "len_load_<mode>"
+  [(match_operand:V 0 "register_operand")
+   (match_operand:V 1 "memory_operand")
+   (match_operand 2 "vector_length_operand")
+   (match_operand 3 "const_0_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::emit_nonvlmax_op (code_for_pred_mov (<MODE>mode), operands[0],
+				  operands[1], operands[2], <VM>mode);
+  DONE;
+})
+
+(define_expand "len_store_<mode>"
+  [(match_operand:V 0 "memory_operand")
+   (match_operand:V 1 "register_operand")
+   (match_operand 2 "vector_length_operand")
+   (match_operand 3 "const_0_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::emit_nonvlmax_op (code_for_pred_mov (<MODE>mode), operands[0],
+				  operands[1], operands[2], <VM>mode);
+  DONE;
+})
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5244e8dcbf0..2de9d40be46 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -207,6 +207,7 @@  enum vlen_enum
 bool slide1_sew64_helper (int, machine_mode, machine_mode,
 			  machine_mode, rtx *);
 rtx gen_avl_for_scalar_move (rtx);
+machine_mode preferred_simd_mode (scalar_mode);
 }
 
 /* We classify builtin types into two classes:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 99c414cc910..5e69427ac54 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -43,6 +43,7 @@ 
 #include "optabs.h"
 #include "tm-constrs.h"
 #include "rtx-vector-builder.h"
+#include "targhooks.h"
 
 using namespace riscv_vector;
 
@@ -742,4 +743,56 @@  gen_avl_for_scalar_move (rtx avl)
     }
 }
 
+/* SCALABLE means that the vector-length is agnostic (run-time invariant and
+   compile-time unknown). FIXED meands that the vector-length is specific
+   (compile-time known). Both RVV_SCALABLE and RVV_FIXED_VLMAX are doing
+   auto-vectorization using VLMAX vsetvl configuration.  */
+static bool
+autovec_use_vlmax_p (void)
+{
+  return riscv_autovec_preference == RVV_SCALABLE
+	 || riscv_autovec_preference == RVV_FIXED_VLMAX;
+}
+
+/* Return the vectorization machine mode for RVV according to LMUL.  */
+machine_mode
+preferred_simd_mode (scalar_mode mode)
+{
+  /* We only enable auto-vectorization when TARGET_MIN_VLEN >= 128
+     which is -march=rv64gcv. Since GCC loop vectorizer report ICE
+     when we enable -march=rv64gc_zve32* and -march=rv32gc_zve64*.
+     in the 'can_duplicate_and_interleave_p' of tree-vect-slp.cc. Since we have
+     VNx1SImode in -march=*zve32* and VNx1DImode in -march=*zve64*, they are
+     enabled in targetm. vector_mode_supported_p and SLP vectorizer will try to
+     use them. Currently, we can support auto-vectorization in
+     -march=rv32_zve32x_zvl128b. Wheras, -march=rv32_zve32x_zvl32b or
+     -march=rv32_zve32x_zvl64b are disabled.
+ */
+  if (autovec_use_vlmax_p ())
+    {
+      /* If TARGET_MIN_VLEN < 128, we don't allow LMUL < 2
+	 auto-vectorization since Loop Vectorizer may use VNx1SImode or
+	 VNx1DImode to vectorize which will create ICE in the
+	 'can_duplicate_and_interleave_p' of tree-vect-slp.cc.  */
+      if (TARGET_MIN_VLEN < 128 && riscv_autovec_lmul < RVV_M2)
+	return word_mode;
+      /* We use LMUL = 1 as base bytesize which is BYTES_PER_RISCV_VECTOR and
+	 riscv_autovec_lmul as multiply factor to calculate the the NUNITS to
+	 get the auto-vectorization mode.  */
+      poly_uint64 nunits;
+      poly_uint64 vector_size
+	= BYTES_PER_RISCV_VECTOR * ((int) riscv_autovec_lmul);
+      poly_uint64 scalar_size = GET_MODE_SIZE (mode);
+      if (!multiple_p (vector_size, scalar_size, &nunits))
+	return word_mode;
+      machine_mode rvv_mode;
+      if (get_vector_mode (mode, nunits).exists (&rvv_mode))
+	return rvv_mode;
+    }
+  /* TODO: We will support minimum length VLS auto-vectorization in the future.
+   */
+  return word_mode;
+}
+
+
 } // namespace riscv_vector
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 5d2550871c7..c601389b540 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -6228,7 +6228,15 @@  riscv_convert_vector_bits (void)
      to set RVV mode size. The RVV machine modes size are run-time constant if
      TARGET_VECTOR is enabled. The RVV machine modes size remains default
      compile-time constant if TARGET_VECTOR is disabled.  */
-  return TARGET_VECTOR ? poly_uint16 (1, 1) : 1;
+  if (TARGET_VECTOR)
+    {
+      if (riscv_autovec_preference == RVV_FIXED_VLMAX)
+	return (int) TARGET_MIN_VLEN / (riscv_bytes_per_vector_chunk * 8);
+      else
+	return poly_uint16 (1, 1);
+    }
+  else
+    return 1;
 }
 
 /* Implement TARGET_OPTION_OVERRIDE.  */
@@ -7158,6 +7166,17 @@  riscv_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
 							& ~zeroed_hardregs);
 }
 
+/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
+
+static machine_mode
+riscv_preferred_simd_mode (scalar_mode mode)
+{
+  if (TARGET_VECTOR)
+    return riscv_vector::preferred_simd_mode (mode);
+
+  return word_mode;
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -7412,6 +7431,9 @@  riscv_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
 #undef TARGET_ZERO_CALL_USED_REGS
 #define TARGET_ZERO_CALL_USED_REGS riscv_zero_call_used_regs
 
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE riscv_preferred_simd_mode
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-riscv.h"
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 0fda11ed67d..3f06ab574c1 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -23,7 +23,7 @@ 
 ;; This file include :
 ;;
 ;; - Intrinsics (https://github.com/riscv/rvv-intrinsic-doc)
-;; - Auto-vectorization (TBD)
+;; - Auto-vectorization (autovec.md)
 ;; - Combine optimization (TBD)
 
 (include "vector-iterators.md")
@@ -7419,3 +7419,5 @@ 
   "vle<sew>ff.v\t%0,%3%p1"
   [(set_attr "type" "vldff")
    (set_attr "mode" "<MODE>")])
+
+(include "autovec.md")