RISC-V: Support TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT to optimize codegen of RVV auto-vectorization
Checks
Commit Message
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
This patch support vector alignement for RVV auto-vectorization.
Consider this following case:
void __attribute__((noinline, noclone))
f (int * __restrict dst, int * __restrict op1, int * __restrict op2, int count)
{
for (int i = 0; i < count; ++i)
dst[i] = op1[i] + op2[i];
}
Before this patch:
ble a3,zero,.L1
srli a4,a1,2
negw a4,a4
andi a5,a4,3
sext.w a3,a3
beq a5,zero,.L3
lw a7,0(a1)
lw a6,0(a2)
andi a4,a4,2
addw a6,a6,a7
sw a6,0(a0)
beq a4,zero,.L3
lw a7,4(a1)
lw a4,4(a2)
li a6,3
addw a4,a4,a7
sw a4,4(a0)
bne a5,a6,.L3
lw a6,8(a2)
lw a4,8(a1)
addw a4,a4,a6
sw a4,8(a0)
.L3:
subw a3,a3,a5
slli a4,a3,32
csrr a6,vlenb
srli a4,a4,32
srli a6,a6,2
slli a3,a5,2
mv a5,a4
bgtu a4,a6,.L17
.L5:
csrr a6,vlenb
add a1,a1,a3
add a2,a2,a3
add a0,a0,a3
srli a7,a6,2
li a3,0
.L8:
vsetvli zero,a5,e32,m1,ta,ma
vle32.v v1,0(a1)
vle32.v v2,0(a2)
vsetvli t1,zero,e32,m1,ta,ma
add a3,a3,a7
vadd.vv v1,v1,v2
vsetvli zero,a5,e32,m1,ta,ma
vse32.v v1,0(a0)
mv a5,a4
bleu a4,a3,.L6
mv a5,a3
.L6:
sub a5,a4,a5
bleu a5,a7,.L7
mv a5,a7
.L7:
add a1,a1,a6
add a2,a2,a6
add a0,a0,a6
bne a5,zero,.L8
.L1:
ret
.L17:
mv a5,a6
j .L5
After this patch:
f:
ble a3,zero,.L1
csrr a4,vlenb
srli a4,a4,2
mv a5,a3
bgtu a3,a4,.L9
.L3:
csrr a6,vlenb
li a4,0
srli a7,a6,2
.L6:
vsetvli zero,a5,e32,m1,ta,ma
vle32.v v2,0(a1)
vle32.v v1,0(a2)
vsetvli t1,zero,e32,m1,ta,ma
add a4,a4,a7
vadd.vv v1,v1,v2
vsetvli zero,a5,e32,m1,ta,ma
vse32.v v1,0(a0)
mv a5,a3
bleu a3,a4,.L4
mv a5,a4
.L4:
sub a5,a3,a5
bleu a5,a7,.L5
mv a5,a7
.L5:
add a0,a0,a6
add a2,a2,a6
add a1,a1,a6
bne a5,zero,.L6
.L1:
ret
.L9:
mv a5,a4
j .L3
gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_vectorize_preferred_vector_alignment): New function.
(TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT): New target hook.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/binop/shift-rv32gcv.c: Adapt testcase.
* gcc.target/riscv/rvv/autovec/align-1.c: New test.
---
gcc/config/riscv/riscv.cc | 22 +++++++++++++++++++
.../gcc.target/riscv/rvv/autovec/align-1.c | 12 ++++++++++
.../riscv/rvv/autovec/binop/shift-rv32gcv.c | 10 +++++----
3 files changed, 40 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/align-1.c
Comments
The implementation is copied directly from ARM SVE.
I applied in my downstream GCC for a year and there is no issue so far.
Ok for trunk ?
juzhe.zhong@rivai.ai
From: juzhe.zhong
Date: 2023-05-13 19:44
To: gcc-patches
CC: kito.cheng; palmer; rdapp.gcc; jeffreyalaw; Juzhe-Zhong
Subject: [PATCH] RISC-V: Support TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT to optimize codegen of RVV auto-vectorization
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
This patch support vector alignement for RVV auto-vectorization.
Consider this following case:
void __attribute__((noinline, noclone))
f (int * __restrict dst, int * __restrict op1, int * __restrict op2, int count)
{
for (int i = 0; i < count; ++i)
dst[i] = op1[i] + op2[i];
}
Before this patch:
ble a3,zero,.L1
srli a4,a1,2
negw a4,a4
andi a5,a4,3
sext.w a3,a3
beq a5,zero,.L3
lw a7,0(a1)
lw a6,0(a2)
andi a4,a4,2
addw a6,a6,a7
sw a6,0(a0)
beq a4,zero,.L3
lw a7,4(a1)
lw a4,4(a2)
li a6,3
addw a4,a4,a7
sw a4,4(a0)
bne a5,a6,.L3
lw a6,8(a2)
lw a4,8(a1)
addw a4,a4,a6
sw a4,8(a0)
.L3:
subw a3,a3,a5
slli a4,a3,32
csrr a6,vlenb
srli a4,a4,32
srli a6,a6,2
slli a3,a5,2
mv a5,a4
bgtu a4,a6,.L17
.L5:
csrr a6,vlenb
add a1,a1,a3
add a2,a2,a3
add a0,a0,a3
srli a7,a6,2
li a3,0
.L8:
vsetvli zero,a5,e32,m1,ta,ma
vle32.v v1,0(a1)
vle32.v v2,0(a2)
vsetvli t1,zero,e32,m1,ta,ma
add a3,a3,a7
vadd.vv v1,v1,v2
vsetvli zero,a5,e32,m1,ta,ma
vse32.v v1,0(a0)
mv a5,a4
bleu a4,a3,.L6
mv a5,a3
.L6:
sub a5,a4,a5
bleu a5,a7,.L7
mv a5,a7
.L7:
add a1,a1,a6
add a2,a2,a6
add a0,a0,a6
bne a5,zero,.L8
.L1:
ret
.L17:
mv a5,a6
j .L5
After this patch:
f:
ble a3,zero,.L1
csrr a4,vlenb
srli a4,a4,2
mv a5,a3
bgtu a3,a4,.L9
.L3:
csrr a6,vlenb
li a4,0
srli a7,a6,2
.L6:
vsetvli zero,a5,e32,m1,ta,ma
vle32.v v2,0(a1)
vle32.v v1,0(a2)
vsetvli t1,zero,e32,m1,ta,ma
add a4,a4,a7
vadd.vv v1,v1,v2
vsetvli zero,a5,e32,m1,ta,ma
vse32.v v1,0(a0)
mv a5,a3
bleu a3,a4,.L4
mv a5,a4
.L4:
sub a5,a3,a5
bleu a5,a7,.L5
mv a5,a7
.L5:
add a0,a0,a6
add a2,a2,a6
add a1,a1,a6
bne a5,zero,.L6
.L1:
ret
.L9:
mv a5,a4
j .L3
gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_vectorize_preferred_vector_alignment): New function.
(TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT): New target hook.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/binop/shift-rv32gcv.c: Adapt testcase.
* gcc.target/riscv/rvv/autovec/align-1.c: New test.
---
gcc/config/riscv/riscv.cc | 22 +++++++++++++++++++
.../gcc.target/riscv/rvv/autovec/align-1.c | 12 ++++++++++
.../riscv/rvv/autovec/binop/shift-rv32gcv.c | 10 +++++----
3 files changed, 40 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/align-1.c
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index de578b5b899..a5776a550b2 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -7499,6 +7499,24 @@ riscv_preferred_simd_mode (scalar_mode mode)
return word_mode;
}
+/* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
+
+static poly_uint64
+riscv_vectorize_preferred_vector_alignment (const_tree type)
+{
+ if (riscv_v_ext_vector_mode_p (TYPE_MODE (type)))
+ {
+ /* If the length of the vector is a fixed power of 2, try to align
+ to that length, otherwise don't try to align at all. */
+ HOST_WIDE_INT result;
+ if (!GET_MODE_BITSIZE (TYPE_MODE (type)).is_constant (&result)
+ || !pow2p_hwi (result))
+ result = TYPE_ALIGN (TREE_TYPE (type));
+ return result;
+ }
+ return TYPE_ALIGN (type);
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -7771,6 +7789,10 @@ riscv_preferred_simd_mode (scalar_mode mode)
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE riscv_preferred_simd_mode
+#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
+#define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
+ riscv_vectorize_preferred_vector_alignment
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-riscv.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/align-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/align-1.c
new file mode 100644
index 00000000000..14201e1f7e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/align-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 --param riscv-autovec-preference=scalable" } */
+
+void __attribute__((noinline, noclone))
+f (int * __restrict dst, int * __restrict op1, int * __restrict op2, int count)
+{
+ for (int i = 0; i < count; ++i)
+ dst[i] = op1[i] + op2[i];
+}
+
+/* { dg-final { scan-assembler-not "lw" } } */
+/* { dg-final { scan-assembler-not "sw" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/shift-rv32gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/shift-rv32gcv.c
index da0f79a1cf0..d98100b3276 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/shift-rv32gcv.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/shift-rv32gcv.c
@@ -4,8 +4,10 @@
#include "shift-template.h"
/* TODO: For int16_t and uint16_t we need widening/promotion patterns.
- Therefore, expect only 4 vsll.vv instead of 6 for now. */
+ We don't check the assembler number since lacking patterns make
+ auto-vectorization inconsistent in LMUL = 1/2/4/8. */
+
+/* { dg-final { scan-assembler {\tvsll\.vv} } } */
+/* { dg-final { scan-assembler {\tvsrl\.vv} } } */
+/* { dg-final { scan-assembler {\tvsra\.vv} } } */
-/* { dg-final { scan-assembler-times {\tvsll\.vv} 4 } } */
-/* { dg-final { scan-assembler-times {\tvsrl\.vv} 3 } } */
-/* { dg-final { scan-assembler-times {\tvsra\.vv} 3 } } */
--
2.36.1
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index de578b5b899..a5776a550b2 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -7499,6 +7499,24 @@ riscv_preferred_simd_mode (scalar_mode mode)
> return word_mode;
> }
> +/* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
> +
> +static poly_uint64
> +riscv_vectorize_preferred_vector_alignment (const_tree type)
> +{
> + if (riscv_v_ext_vector_mode_p (TYPE_MODE (type)))
> + {
> + /* If the length of the vector is a fixed power of 2, try to align
> + to that length, otherwise don't try to align at all. */
> + HOST_WIDE_INT result;
> + if (!GET_MODE_BITSIZE (TYPE_MODE (type)).is_constant (&result)
> + || !pow2p_hwi (result))
> + result = TYPE_ALIGN (TREE_TYPE (type));
> + return result;
Why we block VLS here and then relaxed by different way in another patch?
Oh, I see. I try this way:
static poly_uint64
riscv_vectorize_preferred_vector_alignment (const_tree type)
{
if (riscv_v_ext_vector_mode_p (TYPE_MODE (type)))
return TYPE_ALIGN (TREE_TYPE (type));
return TYPE_ALIGN (type);
}
And disable both TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
and TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
Both VLA and VLS works well after the testing of align-1.c && align-2.c
Is the way you prefer ?
But I am not sure whether it's safe since both my downstream GCC and ARM SVE didn't use this approach.
I am Ok of both for now. So do you prefer the second approach ? Then I can send V2 patch.
juzhe.zhong@rivai.ai
From: Kito Cheng
Date: 2023-05-15 10:38
To: juzhe.zhong@rivai.ai
CC: gcc-patches; palmer; Robin Dapp; jeffreyalaw
Subject: Re: [PATCH] RISC-V: Support TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT to optimize codegen of RVV auto-vectorization
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index de578b5b899..a5776a550b2 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -7499,6 +7499,24 @@ riscv_preferred_simd_mode (scalar_mode mode)
> return word_mode;
> }
> +/* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
> +
> +static poly_uint64
> +riscv_vectorize_preferred_vector_alignment (const_tree type)
> +{
> + if (riscv_v_ext_vector_mode_p (TYPE_MODE (type)))
> + {
> + /* If the length of the vector is a fixed power of 2, try to align
> + to that length, otherwise don't try to align at all. */
> + HOST_WIDE_INT result;
> + if (!GET_MODE_BITSIZE (TYPE_MODE (type)).is_constant (&result)
> + || !pow2p_hwi (result))
> + result = TYPE_ALIGN (TREE_TYPE (type));
> + return result;
Why we block VLS here and then relaxed by different way in another patch?
On Mon, May 15, 2023 at 10:48 AM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> Oh, I see. I try this way:
> static poly_uint64
> riscv_vectorize_preferred_vector_alignment (const_tree type)
> {
> if (riscv_v_ext_vector_mode_p (TYPE_MODE (type)))
> return TYPE_ALIGN (TREE_TYPE (type));
> return TYPE_ALIGN (type);
> }
>
> And disable both TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
> and TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
>
> Both VLA and VLS works well after the testing of align-1.c && align-2.c
> Is the way you prefer ?
>
> But I am not sure whether it's safe since both my downstream GCC and ARM SVE didn't use this approach.
> I am Ok of both for now. So do you prefer the second approach ? Then I can send V2 patch.
I am not sure it's safe either, but I am not convinced we need
TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE for improving the
VLS-VLMAX alignment, so I would prefer just using
TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT for now unless we found
it's not safe or we know why it's better way.
Hi, address comment and send V2 patch here:
https://gcc.gnu.org/pipermail/gcc-patches/2023-May/618477.html
Thanks.
juzhe.zhong@rivai.ai
From: Kito Cheng
Date: 2023-05-15 10:56
To: juzhe.zhong@rivai.ai
CC: gcc-patches; palmer; Robin Dapp; jeffreyalaw
Subject: Re: Re: [PATCH] RISC-V: Support TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT to optimize codegen of RVV auto-vectorization
On Mon, May 15, 2023 at 10:48 AM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> Oh, I see. I try this way:
> static poly_uint64
> riscv_vectorize_preferred_vector_alignment (const_tree type)
> {
> if (riscv_v_ext_vector_mode_p (TYPE_MODE (type)))
> return TYPE_ALIGN (TREE_TYPE (type));
> return TYPE_ALIGN (type);
> }
>
> And disable both TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
> and TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
>
> Both VLA and VLS works well after the testing of align-1.c && align-2.c
> Is the way you prefer ?
>
> But I am not sure whether it's safe since both my downstream GCC and ARM SVE didn't use this approach.
> I am Ok of both for now. So do you prefer the second approach ? Then I can send V2 patch.
I am not sure it's safe either, but I am not convinced we need
TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE for improving the
VLS-VLMAX alignment, so I would prefer just using
TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT for now unless we found
it's not safe or we know why it's better way.
Hi,
we need to discern what we want to achieve here. The goal might
be to prevent the vectorizer from performing peeling or versioning
for alignment. I realize the peeling code looks ugly but it's
actually for a good cause when the target does not support
misaligned vector access or only with severe penalty.
So while I was still commenting on the other V1 patch I realized
that V2 is already in now :)
> return TYPE_ALIGN (TREE_TYPE (type));
I'm not sure this is what we want or rather what the hardware wants.
The preferred alignment of a VNx4SI (or larger) is 32 bit/4 bytes
then - even 1 byte for a QI vector?
Of course, we don't peel for alignment then anymore but we
might actually want to, depending on the hardware.
I mean as a temporary workaround it might be acceptable but do we
actually need to get rid of the peeling code? If the hardware
supports unaligned access without penalty we should specify in
the vectorization costs
e.g.
case unaligned_load:
case unaligned_store:
return 1;
(Your "return 1" for riscv_builtin_vectorization_cost will do
that as well of course).
and the peeling cost check should do the rest (i.e. nothing :) ).
So I'd much rather prefer that over the current approach as it
is more localized and will need an mtune-related approach later
anyway.
Regards
Robin
> we need to discern what we want to achieve here. The goal might
> be to prevent the vectorizer from performing peeling or versioning
> for alignment. I realize the peeling code looks ugly but it's
> actually for a good cause when the target does not support
> misaligned vector access or only with severe penalty.
Vector spec says it should support element alignment, so my
understanding is default behavior should be just aligned to
vector-spec said :)
I guess Ju-Zhe might have different thoughts on that, we might need
some more comment from him.
> So I'd much rather prefer that over the current approach as it
> is more localized and will need an mtune-related approach later
> anyway.
I know there is some HW implementation that might be faster if the
address is aligned to 128 bit or 256 bit, and some HW implementation
might only get a few penalties from the first iteration if not aligned
to some alignment.
Anyway those are all mtune-related, so I guess eventually both
riscv_builtin_vectorization_cost and
riscv_vectorize_preferred_vector_alignment should get info from mtune.
>
> Regards
> Robin
After this patch, RVV GCC by default support alignment of RVV modes according to riscv-modes.def.
In riscv-modes.def, we define each RVV modes are element align which is aligned to RVV ISA spec.
If you want to support other alignment, you should add tunning info for this in the future.
And the default behavior in case of alignment which is already in this patch should not be changed in the future.
Thanks.
juzhe.zhong@rivai.ai
From: Kito Cheng
Date: 2023-05-15 16:02
To: Robin Dapp
CC: juzhe.zhong@rivai.ai; gcc-patches; palmer; jeffreyalaw
Subject: Re: [PATCH] RISC-V: Support TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT to optimize codegen of RVV auto-vectorization
> we need to discern what we want to achieve here. The goal might
> be to prevent the vectorizer from performing peeling or versioning
> for alignment. I realize the peeling code looks ugly but it's
> actually for a good cause when the target does not support
> misaligned vector access or only with severe penalty.
Vector spec says it should support element alignment, so my
understanding is default behavior should be just aligned to
vector-spec said :)
I guess Ju-Zhe might have different thoughts on that, we might need
some more comment from him.
> So I'd much rather prefer that over the current approach as it
> is more localized and will need an mtune-related approach later
> anyway.
I know there is some HW implementation that might be faster if the
address is aligned to 128 bit or 256 bit, and some HW implementation
might only get a few penalties from the first iteration if not aligned
to some alignment.
Anyway those are all mtune-related, so I guess eventually both
riscv_builtin_vectorization_cost and
riscv_vectorize_preferred_vector_alignment should get info from mtune.
>
> Regards
> Robin
> After this patch, RVV GCC by default support alignment of RVV modes
> according to riscv-modes.def. In riscv-modes.def, we define each RVV
> modes are element align which is aligned to RVV ISA spec.
>
> If you want to support other alignment, you should add tunning info
> for this in the future. And the default behavior in case of alignment
> which is already in this patch should not be changed in the future.
We're changing the preferred_vector_alignment hook, not the
vector_alignment hook. The way I see it, we will never peel for alignment
when setting the preferred_alignment to the alignment of a single
element, no matter the cost of misaligned loads/stores. Is the idea
then to adjust the preferred_alignment hook depending on mtune?
IMHO it's much more "natural" to set the vectorization costs according to
mtune (done anyway at some point), not change the preferred_alignment and
let the vectorizer decide whether is has enough information on whether
to peel or not.
Maybe Richard can comment here on why the vector_alignment_preferred hook
was originally introduced or what wasn't possible without it (i.e. why not
go via costs). At some point the vectorizer would peel even if the costs
were low for misaligned accesses but that had already been fixed before the
hook was introduced.
And ABI-wise (and that's what the V spec documents) we are already safe
by using MIN (128, TYPE_SIZE (type)). We could probably even lower that?
Regards
Robin
No, ARM SVE is 128bit alignment instead of element align (in aarch64-modes.def).
If you want to tune the alignment, you should add tunning info into riscv-modes.def
instead of this target hook.
Thanks.
juzhe.zhong@rivai.ai
From: Robin Dapp
Date: 2023-05-15 16:58
To: juzhe.zhong@rivai.ai; Kito.cheng; richard.sandiford
CC: rdapp.gcc; gcc-patches; palmer; jeffreyalaw
Subject: Re: [PATCH] RISC-V: Support TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT to optimize codegen of RVV auto-vectorization
> After this patch, RVV GCC by default support alignment of RVV modes
> according to riscv-modes.def. In riscv-modes.def, we define each RVV
> modes are element align which is aligned to RVV ISA spec.
>
> If you want to support other alignment, you should add tunning info
> for this in the future. And the default behavior in case of alignment
> which is already in this patch should not be changed in the future.
We're changing the preferred_vector_alignment hook, not the
vector_alignment hook. The way I see it, we will never peel for alignment
when setting the preferred_alignment to the alignment of a single
element, no matter the cost of misaligned loads/stores. Is the idea
then to adjust the preferred_alignment hook depending on mtune?
IMHO it's much more "natural" to set the vectorization costs according to
mtune (done anyway at some point), not change the preferred_alignment and
let the vectorizer decide whether is has enough information on whether
to peel or not.
Maybe Richard can comment here on why the vector_alignment_preferred hook
was originally introduced or what wasn't possible without it (i.e. why not
go via costs). At some point the vectorizer would peel even if the costs
were low for misaligned accesses but that had already been fixed before the
hook was introduced.
And ABI-wise (and that's what the V spec documents) we are already safe
by using MIN (128, TYPE_SIZE (type)). We could probably even lower that?
Regards
Robin
@@ -7499,6 +7499,24 @@ riscv_preferred_simd_mode (scalar_mode mode)
return word_mode;
}
+/* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
+
+static poly_uint64
+riscv_vectorize_preferred_vector_alignment (const_tree type)
+{
+ if (riscv_v_ext_vector_mode_p (TYPE_MODE (type)))
+ {
+ /* If the length of the vector is a fixed power of 2, try to align
+ to that length, otherwise don't try to align at all. */
+ HOST_WIDE_INT result;
+ if (!GET_MODE_BITSIZE (TYPE_MODE (type)).is_constant (&result)
+ || !pow2p_hwi (result))
+ result = TYPE_ALIGN (TREE_TYPE (type));
+ return result;
+ }
+ return TYPE_ALIGN (type);
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -7771,6 +7789,10 @@ riscv_preferred_simd_mode (scalar_mode mode)
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE riscv_preferred_simd_mode
+#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
+#define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
+ riscv_vectorize_preferred_vector_alignment
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-riscv.h"
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 --param riscv-autovec-preference=scalable" } */
+
+void __attribute__((noinline, noclone))
+f (int * __restrict dst, int * __restrict op1, int * __restrict op2, int count)
+{
+ for (int i = 0; i < count; ++i)
+ dst[i] = op1[i] + op2[i];
+}
+
+/* { dg-final { scan-assembler-not "lw" } } */
+/* { dg-final { scan-assembler-not "sw" } } */
@@ -4,8 +4,10 @@
#include "shift-template.h"
/* TODO: For int16_t and uint16_t we need widening/promotion patterns.
- Therefore, expect only 4 vsll.vv instead of 6 for now. */
+ We don't check the assembler number since lacking patterns make
+ auto-vectorization inconsistent in LMUL = 1/2/4/8. */
+
+/* { dg-final { scan-assembler {\tvsll\.vv} } } */
+/* { dg-final { scan-assembler {\tvsrl\.vv} } } */
+/* { dg-final { scan-assembler {\tvsra\.vv} } } */
-/* { dg-final { scan-assembler-times {\tvsll\.vv} 4 } } */
-/* { dg-final { scan-assembler-times {\tvsrl\.vv} 3 } } */
-/* { dg-final { scan-assembler-times {\tvsra\.vv} 3 } } */