Middle-end: Do not model address cost for SELECT_VL style vectorization
Checks
Commit Message
Follow Richard's suggestions, we should not model address cost in the loop
vectorizer for select_vl or decrement IV since other style vectorization doesn't
do that.
To make cost model comparison apple to apple.
This patch set COST from 2 to 1 which turns out have better codegen
in various codegen for RVV.
Ok for trunk ?
PR target/111153
gcc/ChangeLog:
* tree-vect-loop.cc (vect_estimate_min_profitable_iters): Remove address cost for select_vl/decrement IV.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/costmodel/riscv/rvv/pr111153.c: Moved to...
* gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c: ...here.
* gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c: New test.
---
.../vect/costmodel/riscv/rvv/pr111153-1.c | 18 ++++++++++++++++++
.../riscv/rvv/{pr111153.c => pr11153-2.c} | 4 ++--
gcc/tree-vect-loop.cc | 10 ++++------
3 files changed, 24 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
rename gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/{pr111153.c => pr11153-2.c} (93%)
Comments
On Thu, 14 Dec 2023, Juzhe-Zhong wrote:
> Follow Richard's suggestions, we should not model address cost in the loop
> vectorizer for select_vl or decrement IV since other style vectorization doesn't
> do that.
>
> To make cost model comparison apple to apple.
> This patch set COST from 2 to 1 which turns out have better codegen
> in various codegen for RVV.
>
> Ok for trunk ?
OK with me.
Richard.
> PR target/111153
>
> gcc/ChangeLog:
>
> * tree-vect-loop.cc (vect_estimate_min_profitable_iters): Remove address cost for select_vl/decrement IV.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/vect/costmodel/riscv/rvv/pr111153.c: Moved to...
> * gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c: ...here.
> * gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c: New test.
>
> ---
> .../vect/costmodel/riscv/rvv/pr111153-1.c | 18 ++++++++++++++++++
> .../riscv/rvv/{pr111153.c => pr11153-2.c} | 4 ++--
> gcc/tree-vect-loop.cc | 10 ++++------
> 3 files changed, 24 insertions(+), 8 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
> rename gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/{pr111153.c => pr11153-2.c} (93%)
>
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
> new file mode 100644
> index 00000000000..51c91f7410c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mtune=generic-ooo -ffast-math" } */
> +
> +#define DEF_REDUC_PLUS(TYPE) \
> + TYPE __attribute__ ((noinline, noclone)) \
> + reduc_plus_##TYPE (TYPE *__restrict a, int n) \
> + { \
> + TYPE r = 0; \
> + for (int i = 0; i < n; ++i) \
> + r += a[i]; \
> + return r; \
> + }
> +
> +#define TEST_PLUS(T) T (int) T (float)
> +
> +TEST_PLUS (DEF_REDUC_PLUS)
> +
> +/* { dg-final { scan-assembler-not {vsetivli\s+zero,\s*4} } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c
> similarity index 93%
> rename from gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
> rename to gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c
> index 06e08ec5f2e..d361f1fc7fa 100644
> --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c
> @@ -1,5 +1,5 @@
> /* { dg-do compile } */
> -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mtune=generic-ooo" } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -ffast-math" } */
>
> #define DEF_REDUC_PLUS(TYPE) \
> TYPE __attribute__ ((noinline, noclone)) \
> @@ -11,7 +11,7 @@
> return r; \
> }
>
> -#define TEST_PLUS(T) T (int)
> +#define TEST_PLUS(T) T (int) T (float)
>
> TEST_PLUS (DEF_REDUC_PLUS)
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 19e38b8637b..7a3db5f098b 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -4872,12 +4872,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
>
> unsigned int length_update_cost = 0;
> if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
> - /* For decrement IV style, we use a single SELECT_VL since
> - beginning to calculate the number of elements need to be
> - processed in current iteration, and a SHIFT operation to
> - compute the next memory address instead of adding vectorization
> - factor. */
> - length_update_cost = 2;
> + /* For decrement IV style, Each only need a single SELECT_VL
> + or MIN since beginning to calculate the number of elements
> + need to be processed in current iteration. */
> + length_update_cost = 1;
> else
> /* For increment IV stype, Each may need two MINs and one MINUS to
> update lengths in body for next iteration. */
>
Thanks Richard. Committed.
juzhe.zhong@rivai.ai
From: Richard Biener
Date: 2023-12-14 23:10
To: Juzhe-Zhong
CC: gcc-patches; richard.sandiford
Subject: Re: [PATCH] Middle-end: Do not model address cost for SELECT_VL style vectorization
On Thu, 14 Dec 2023, Juzhe-Zhong wrote:
> Follow Richard's suggestions, we should not model address cost in the loop
> vectorizer for select_vl or decrement IV since other style vectorization doesn't
> do that.
>
> To make cost model comparison apple to apple.
> This patch set COST from 2 to 1 which turns out have better codegen
> in various codegen for RVV.
>
> Ok for trunk ?
OK with me.
Richard.
> PR target/111153
>
> gcc/ChangeLog:
>
> * tree-vect-loop.cc (vect_estimate_min_profitable_iters): Remove address cost for select_vl/decrement IV.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/vect/costmodel/riscv/rvv/pr111153.c: Moved to...
> * gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c: ...here.
> * gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c: New test.
>
> ---
> .../vect/costmodel/riscv/rvv/pr111153-1.c | 18 ++++++++++++++++++
> .../riscv/rvv/{pr111153.c => pr11153-2.c} | 4 ++--
> gcc/tree-vect-loop.cc | 10 ++++------
> 3 files changed, 24 insertions(+), 8 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
> rename gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/{pr111153.c => pr11153-2.c} (93%)
>
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
> new file mode 100644
> index 00000000000..51c91f7410c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mtune=generic-ooo -ffast-math" } */
> +
> +#define DEF_REDUC_PLUS(TYPE) \
> + TYPE __attribute__ ((noinline, noclone)) \
> + reduc_plus_##TYPE (TYPE *__restrict a, int n) \
> + { \
> + TYPE r = 0; \
> + for (int i = 0; i < n; ++i) \
> + r += a[i]; \
> + return r; \
> + }
> +
> +#define TEST_PLUS(T) T (int) T (float)
> +
> +TEST_PLUS (DEF_REDUC_PLUS)
> +
> +/* { dg-final { scan-assembler-not {vsetivli\s+zero,\s*4} } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c
> similarity index 93%
> rename from gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
> rename to gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c
> index 06e08ec5f2e..d361f1fc7fa 100644
> --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c
> @@ -1,5 +1,5 @@
> /* { dg-do compile } */
> -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mtune=generic-ooo" } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -ffast-math" } */
>
> #define DEF_REDUC_PLUS(TYPE) \
> TYPE __attribute__ ((noinline, noclone)) \
> @@ -11,7 +11,7 @@
> return r; \
> }
>
> -#define TEST_PLUS(T) T (int)
> +#define TEST_PLUS(T) T (int) T (float)
>
> TEST_PLUS (DEF_REDUC_PLUS)
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 19e38b8637b..7a3db5f098b 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -4872,12 +4872,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
>
> unsigned int length_update_cost = 0;
> if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
> - /* For decrement IV style, we use a single SELECT_VL since
> - beginning to calculate the number of elements need to be
> - processed in current iteration, and a SHIFT operation to
> - compute the next memory address instead of adding vectorization
> - factor. */
> - length_update_cost = 2;
> + /* For decrement IV style, Each only need a single SELECT_VL
> + or MIN since beginning to calculate the number of elements
> + need to be processed in current iteration. */
> + length_update_cost = 1;
> else
> /* For increment IV stype, Each may need two MINs and one MINUS to
> update lengths in body for next iteration. */
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mtune=generic-ooo -ffast-math" } */
+
+#define DEF_REDUC_PLUS(TYPE) \
+ TYPE __attribute__ ((noinline, noclone)) \
+ reduc_plus_##TYPE (TYPE *__restrict a, int n) \
+ { \
+ TYPE r = 0; \
+ for (int i = 0; i < n; ++i) \
+ r += a[i]; \
+ return r; \
+ }
+
+#define TEST_PLUS(T) T (int) T (float)
+
+TEST_PLUS (DEF_REDUC_PLUS)
+
+/* { dg-final { scan-assembler-not {vsetivli\s+zero,\s*4} } } */
similarity index 93%
rename from gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
rename to gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mtune=generic-ooo" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -ffast-math" } */
#define DEF_REDUC_PLUS(TYPE) \
TYPE __attribute__ ((noinline, noclone)) \
@@ -11,7 +11,7 @@
return r; \
}
-#define TEST_PLUS(T) T (int)
+#define TEST_PLUS(T) T (int) T (float)
TEST_PLUS (DEF_REDUC_PLUS)
@@ -4872,12 +4872,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
unsigned int length_update_cost = 0;
if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
- /* For decrement IV style, we use a single SELECT_VL since
- beginning to calculate the number of elements need to be
- processed in current iteration, and a SHIFT operation to
- compute the next memory address instead of adding vectorization
- factor. */
- length_update_cost = 2;
+ /* For decrement IV style, Each only need a single SELECT_VL
+ or MIN since beginning to calculate the number of elements
+ need to be processed in current iteration. */
+ length_update_cost = 1;
else
/* For increment IV stype, Each may need two MINs and one MINUS to
update lengths in body for next iteration. */