[vect] Enhance NARROW FLOAT_EXPR vectorization by truncating integer to lower precision.
Checks
Commit Message
Similar like WIDEN FLOAT_EXPR, when direct_optab is not existed, try
intermediate integer type whenever gimple ranger can tell it's safe.
.i.e.
When there's no direct optab for vector long long -> vector float, but
the value range of integer can be represented as int, try vector int
-> vector float if availble.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?
gcc/ChangeLog:
PR tree-optimization/108804
* tree-vect-patterns.cc (vect_get_range_info): Remove static.
* tree-vect-stmts.cc (vect_create_vectorized_demotion_stmts):
Add new parameter last_stmt_p.
(vectorizable_conversion): Enhance NARROW FLOAT_EXPR
vectorization by truncating to lower precision.
* tree-vectorizer.h (vect_get_range_info): New declare.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr108804.c: New test.
---
gcc/testsuite/gcc.target/i386/pr108804.c | 15 ++++
gcc/tree-vect-patterns.cc | 2 +-
gcc/tree-vect-stmts.cc | 106 ++++++++++++++++++-----
gcc/tree-vectorizer.h | 1 +
4 files changed, 100 insertions(+), 24 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr108804.c
Comments
On Wed, Apr 26, 2023 at 9:36 AM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Similar like WIDEN FLOAT_EXPR, when direct_optab is not existed, try
> intermediate integer type whenever gimple ranger can tell it's safe.
>
> .i.e.
> When there's no direct optab for vector long long -> vector float, but
> the value range of integer can be represented as int, try vector int
> -> vector float if availble.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:
>
> PR tree-optimization/108804
> * tree-vect-patterns.cc (vect_get_range_info): Remove static.
> * tree-vect-stmts.cc (vect_create_vectorized_demotion_stmts):
> Add new parameter last_stmt_p.
> (vectorizable_conversion): Enhance NARROW FLOAT_EXPR
> vectorization by truncating to lower precision.
> * tree-vectorizer.h (vect_get_range_info): New declare.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr108804.c: New test.
> ---
> gcc/testsuite/gcc.target/i386/pr108804.c | 15 ++++
> gcc/tree-vect-patterns.cc | 2 +-
> gcc/tree-vect-stmts.cc | 106 ++++++++++++++++++-----
> gcc/tree-vectorizer.h | 1 +
> 4 files changed, 100 insertions(+), 24 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr108804.c
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr108804.c b/gcc/testsuite/gcc.target/i386/pr108804.c
> new file mode 100644
> index 00000000000..2a43c1e1848
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr108804.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx2 -Ofast -fdump-tree-vect-details" } */
> +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 1 "vect" } } */
> +
> +typedef unsigned long long uint64_t;
> +uint64_t d[512];
> +float f[1024];
> +
> +void foo() {
> + for (int i=0; i<512; ++i) {
> + uint64_t k = d[i];
> + f[i]=(k & 0x3F30FFFF);
> + }
> +}
> +
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index a49b0953977..dd546b488a4 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -61,7 +61,7 @@ along with GCC; see the file COPYING3. If not see
> /* Return true if we have a useful VR_RANGE range for VAR, storing it
> in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
>
> -static bool
> +bool
> vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
> {
> value_range vr;
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 6b7dbfd4a23..d79a1409d24 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3. If not see
> #include "internal-fn.h"
> #include "tree-vector-builder.h"
> #include "vec-perm-indices.h"
> +#include "gimple-range.h"
> #include "tree-ssa-loop-niter.h"
> #include "gimple-fold.h"
> #include "regs.h"
> @@ -4799,7 +4800,8 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
> stmt_vec_info stmt_info,
> vec<tree> &vec_dsts,
> gimple_stmt_iterator *gsi,
> - slp_tree slp_node, enum tree_code code)
> + slp_tree slp_node, enum tree_code code,
> + bool last_stmt_p)
Can you please document this new parameter?
> {
> unsigned int i;
> tree vop0, vop1, new_tmp, vec_dest;
> @@ -4815,9 +4817,9 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
> new_tmp = make_ssa_name (vec_dest, new_stmt);
> gimple_assign_set_lhs (new_stmt, new_tmp);
> vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
> -
> - if (multi_step_cvt)
> - /* Store the resulting vector for next recursive call. */
> + if (multi_step_cvt || !last_stmt_p)
> + /* Store the resulting vector for next recursive call,
> + or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
> (*vec_oprnds)[i/2] = new_tmp;
> else
> {
> @@ -4843,7 +4845,8 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
> vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
> multi_step_cvt - 1,
> stmt_info, vec_dsts, gsi,
> - slp_node, VEC_PACK_TRUNC_EXPR);
> + slp_node, VEC_PACK_TRUNC_EXPR,
> + last_stmt_p);
> }
>
> vec_dsts.quick_push (vec_dest);
> @@ -5248,22 +5251,53 @@ vectorizable_conversion (vec_info *vinfo,
> &interm_types))
> break;
>
> - if (code != FIX_TRUNC_EXPR
> - || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
> + if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
> goto unsupported;
>
> - cvt_type
> - = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
> - cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
> - if (cvt_type == NULL_TREE)
> - goto unsupported;
> - if (!supportable_convert_operation (code, cvt_type, vectype_in,
> - &codecvt1))
> - goto unsupported;
> - if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
> - &code1, &multi_step_cvt,
> - &interm_types))
> - break;
> + if (code == FIX_TRUNC_EXPR)
> + {
> + cvt_type
> + = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
> + cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
> + if (cvt_type == NULL_TREE)
> + goto unsupported;
> + if (!supportable_convert_operation (code, cvt_type, vectype_in,
> + &codecvt1))
> + goto unsupported;
> + if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
> + &code1, &multi_step_cvt,
> + &interm_types))
> + break;
> + }
> + /* If op0 can be represented with low precision integer,
> + truncate it to cvt_type and the do FLOAT_EXPR. */
> + else if (code == FLOAT_EXPR)
> + {
> + wide_int op_min_value, op_max_value;
> + if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
> + goto unsupported;
> +
> + cvt_type
> + = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
> + if (cvt_type == NULL_TREE
> + || (wi::min_precision (op_max_value, SIGNED)
> + > TYPE_PRECISION (cvt_type))
> + || (wi::min_precision (op_min_value, SIGNED)
> + > TYPE_PRECISION (cvt_type)))
> + goto unsupported;
> +
> + cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
> + if (cvt_type == NULL_TREE)
> + goto unsupported;
> + if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
> + &code1, &multi_step_cvt,
> + &interm_types))
> + goto unsupported;
> + if (supportable_convert_operation (code, vectype_out,
> + cvt_type, &codecvt1))
> + break;
> + }
> +
> goto unsupported;
>
> default:
> @@ -5335,8 +5369,11 @@ vectorizable_conversion (vec_info *vinfo,
> from supportable_*_operation, and store them in the correct order
> for future use in vect_create_vectorized_*_stmts (). */
> auto_vec<tree> vec_dsts (multi_step_cvt + 1);
> + bool widen_or_narrow_float_p
> + = cvt_type && (modifier == WIDEN
> + || (modifier == NARROW && code == FLOAT_EXPR));
> vec_dest = vect_create_destination_var (scalar_dest,
> - (cvt_type && modifier == WIDEN)
> + widen_or_narrow_float_p
> ? cvt_type : vectype_out);
> vec_dsts.quick_push (vec_dest);
>
> @@ -5353,7 +5390,7 @@ vectorizable_conversion (vec_info *vinfo,
>
> if (cvt_type)
> vec_dest = vect_create_destination_var (scalar_dest,
> - modifier == WIDEN
> + widen_or_narrow_float_p
> ? vectype_out : cvt_type);
>
> int ninputs = 1;
> @@ -5456,7 +5493,7 @@ vectorizable_conversion (vec_info *vinfo,
> vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
> op0, &vec_oprnds0);
> /* Arguments are ready. Create the new vector stmts. */
> - if (cvt_type)
> + if (cvt_type && code != FLOAT_EXPR)
> FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
> {
> gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
> @@ -5470,7 +5507,30 @@ vectorizable_conversion (vec_info *vinfo,
> vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
> multi_step_cvt,
> stmt_info, vec_dsts, gsi,
> - slp_node, code1);
> + slp_node, code1,
> + !cvt_type || code != FLOAT_EXPR);
I understand what you are doing, but somehow it looks a bit awkward?
Maybe we should split the NARROW case into NARROW_SRC and NARROW_DST?
The case of narrowing the source because we know its range isn't a
good fit for the
flow.
> + /* After demoting op0 to cvt_type, convert it to dest. */
> + if (cvt_type && code == FLOAT_EXPR)
> + {
> + for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
> + {
> + /* Arguments are ready, create the new vector stmt. */
> + gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
> + gassign *new_stmt
> + = gimple_build_assign (vec_dest, codecvt1, vec_oprnds0[i]);
> + new_temp = make_ssa_name (vec_dest, new_stmt);
> + gimple_assign_set_lhs (new_stmt, new_temp);
> + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
> +
> + /* This is the last step of the conversion sequence. Store the
> + vectors in SLP_NODE or in vector info of the scalar statement
> + (or in STMT_VINFO_RELATED_STMT chain). */
> + if (slp_node)
> + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
> + else
> + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
> + }
> + }
> break;
> }
> if (!slp_node)
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 9cf2fb23fe3..27fa2306a5e 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2384,6 +2384,7 @@ extern bool compatible_calls_p (gcall *, gcall *);
> /* In tree-vect-patterns.cc. */
> extern void
> vect_mark_pattern_stmts (vec_info *, stmt_vec_info, gimple *, tree);
> +extern bool vect_get_range_info (tree, wide_int*, wide_int*);
>
> /* Pattern recognition functions.
> Additional pattern recognition functions can (and will) be added
> --
> 2.39.1.388.g2fc9e9ca3c
>
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -Ofast -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 1 "vect" } } */
+
+typedef unsigned long long uint64_t;
+uint64_t d[512];
+float f[1024];
+
+void foo() {
+ for (int i=0; i<512; ++i) {
+ uint64_t k = d[i];
+ f[i]=(k & 0x3F30FFFF);
+ }
+}
+
@@ -61,7 +61,7 @@ along with GCC; see the file COPYING3. If not see
/* Return true if we have a useful VR_RANGE range for VAR, storing it
in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
-static bool
+bool
vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
{
value_range vr;
@@ -51,6 +51,7 @@ along with GCC; see the file COPYING3. If not see
#include "internal-fn.h"
#include "tree-vector-builder.h"
#include "vec-perm-indices.h"
+#include "gimple-range.h"
#include "tree-ssa-loop-niter.h"
#include "gimple-fold.h"
#include "regs.h"
@@ -4799,7 +4800,8 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
stmt_vec_info stmt_info,
vec<tree> &vec_dsts,
gimple_stmt_iterator *gsi,
- slp_tree slp_node, enum tree_code code)
+ slp_tree slp_node, enum tree_code code,
+ bool last_stmt_p)
{
unsigned int i;
tree vop0, vop1, new_tmp, vec_dest;
@@ -4815,9 +4817,9 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
new_tmp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_tmp);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
-
- if (multi_step_cvt)
- /* Store the resulting vector for next recursive call. */
+ if (multi_step_cvt || !last_stmt_p)
+ /* Store the resulting vector for next recursive call,
+ or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
(*vec_oprnds)[i/2] = new_tmp;
else
{
@@ -4843,7 +4845,8 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
multi_step_cvt - 1,
stmt_info, vec_dsts, gsi,
- slp_node, VEC_PACK_TRUNC_EXPR);
+ slp_node, VEC_PACK_TRUNC_EXPR,
+ last_stmt_p);
}
vec_dsts.quick_push (vec_dest);
@@ -5248,22 +5251,53 @@ vectorizable_conversion (vec_info *vinfo,
&interm_types))
break;
- if (code != FIX_TRUNC_EXPR
- || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
+ if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
goto unsupported;
- cvt_type
- = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
- cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
- if (cvt_type == NULL_TREE)
- goto unsupported;
- if (!supportable_convert_operation (code, cvt_type, vectype_in,
- &codecvt1))
- goto unsupported;
- if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
- &code1, &multi_step_cvt,
- &interm_types))
- break;
+ if (code == FIX_TRUNC_EXPR)
+ {
+ cvt_type
+ = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
+ cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
+ if (cvt_type == NULL_TREE)
+ goto unsupported;
+ if (!supportable_convert_operation (code, cvt_type, vectype_in,
+ &codecvt1))
+ goto unsupported;
+ if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
+ &code1, &multi_step_cvt,
+ &interm_types))
+ break;
+ }
+ /* If op0 can be represented with low precision integer,
+ truncate it to cvt_type and the do FLOAT_EXPR. */
+ else if (code == FLOAT_EXPR)
+ {
+ wide_int op_min_value, op_max_value;
+ if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
+ goto unsupported;
+
+ cvt_type
+ = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
+ if (cvt_type == NULL_TREE
+ || (wi::min_precision (op_max_value, SIGNED)
+ > TYPE_PRECISION (cvt_type))
+ || (wi::min_precision (op_min_value, SIGNED)
+ > TYPE_PRECISION (cvt_type)))
+ goto unsupported;
+
+ cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
+ if (cvt_type == NULL_TREE)
+ goto unsupported;
+ if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
+ &code1, &multi_step_cvt,
+ &interm_types))
+ goto unsupported;
+ if (supportable_convert_operation (code, vectype_out,
+ cvt_type, &codecvt1))
+ break;
+ }
+
goto unsupported;
default:
@@ -5335,8 +5369,11 @@ vectorizable_conversion (vec_info *vinfo,
from supportable_*_operation, and store them in the correct order
for future use in vect_create_vectorized_*_stmts (). */
auto_vec<tree> vec_dsts (multi_step_cvt + 1);
+ bool widen_or_narrow_float_p
+ = cvt_type && (modifier == WIDEN
+ || (modifier == NARROW && code == FLOAT_EXPR));
vec_dest = vect_create_destination_var (scalar_dest,
- (cvt_type && modifier == WIDEN)
+ widen_or_narrow_float_p
? cvt_type : vectype_out);
vec_dsts.quick_push (vec_dest);
@@ -5353,7 +5390,7 @@ vectorizable_conversion (vec_info *vinfo,
if (cvt_type)
vec_dest = vect_create_destination_var (scalar_dest,
- modifier == WIDEN
+ widen_or_narrow_float_p
? vectype_out : cvt_type);
int ninputs = 1;
@@ -5456,7 +5493,7 @@ vectorizable_conversion (vec_info *vinfo,
vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
op0, &vec_oprnds0);
/* Arguments are ready. Create the new vector stmts. */
- if (cvt_type)
+ if (cvt_type && code != FLOAT_EXPR)
FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
{
gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
@@ -5470,7 +5507,30 @@ vectorizable_conversion (vec_info *vinfo,
vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
multi_step_cvt,
stmt_info, vec_dsts, gsi,
- slp_node, code1);
+ slp_node, code1,
+ !cvt_type || code != FLOAT_EXPR);
+ /* After demoting op0 to cvt_type, convert it to dest. */
+ if (cvt_type && code == FLOAT_EXPR)
+ {
+ for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
+ {
+ /* Arguments are ready, create the new vector stmt. */
+ gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
+ gassign *new_stmt
+ = gimple_build_assign (vec_dest, codecvt1, vec_oprnds0[i]);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+
+ /* This is the last step of the conversion sequence. Store the
+ vectors in SLP_NODE or in vector info of the scalar statement
+ (or in STMT_VINFO_RELATED_STMT chain). */
+ if (slp_node)
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+ else
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ }
+ }
break;
}
if (!slp_node)
@@ -2384,6 +2384,7 @@ extern bool compatible_calls_p (gcall *, gcall *);
/* In tree-vect-patterns.cc. */
extern void
vect_mark_pattern_stmts (vec_info *, stmt_vec_info, gimple *, tree);
+extern bool vect_get_range_info (tree, wide_int*, wide_int*);
/* Pattern recognition functions.
Additional pattern recognition functions can (and will) be added