[1/2] Mid engine setup [SU]ABDL
Checks
Commit Message
From: oluade01 <oluwatamilore.adebayo@arm.com>
This updates vect_recog_abd_pattern to recognize the widening
variant of absolute difference (ABDL, ABDL2).
gcc/ChangeLog:
* internal-fn.cc (widening_fn_p, decomposes_to_hilo_fn_p):
Add IFN_VEC_WIDEN_ABD to the switch statement.
* internal-fn.def (VEC_WIDEN_ABD): New internal hilo optab.
* optabs.def (vec_widen_sabd_optab,
vec_widen_sabd_hi_optab, vec_widen_sabd_lo_optab,
vec_widen_sabd_odd_even, vec_widen_sabd_even_optab,
vec_widen_uabd_optab,
vec_widen_uabd_hi_optab, vec_widen_uabd_lo_optab,
vec_widen_uabd_odd_even, vec_widen_uabd_even_optab):
New optabs.
* tree-vect-patterns.cc (vect_recog_abd_pattern): Update to
to build a VEC_WIDEN_ABD call if the input precision is smaller
than the precision of the output.
(vect_recog_widen_abd_pattern): Should an ABD expression be
found preceeding an extension, replace the two with a
VEC_WIDEN_ABD.
---
gcc/doc/md.texi | 11 ++
gcc/internal-fn.def | 5 +
gcc/optabs.def | 10 ++
gcc/tree-vect-patterns.cc | 205 +++++++++++++++++++++++++++++---------
4 files changed, 183 insertions(+), 48 deletions(-)
Comments
Oluwatamilore Adebayo <oluwatamilore.adebayo@arm.com> writes:
> From: oluade01 <oluwatamilore.adebayo@arm.com>
>
> This updates vect_recog_abd_pattern to recognize the widening
> variant of absolute difference (ABDL, ABDL2).
>
> gcc/ChangeLog:
>
> * internal-fn.cc (widening_fn_p, decomposes_to_hilo_fn_p):
> Add IFN_VEC_WIDEN_ABD to the switch statement.
> * internal-fn.def (VEC_WIDEN_ABD): New internal hilo optab.
> * optabs.def (vec_widen_sabd_optab,
> vec_widen_sabd_hi_optab, vec_widen_sabd_lo_optab,
> vec_widen_sabd_odd_even, vec_widen_sabd_even_optab,
> vec_widen_uabd_optab,
> vec_widen_uabd_hi_optab, vec_widen_uabd_lo_optab,
> vec_widen_uabd_odd_even, vec_widen_uabd_even_optab):
> New optabs.
> * tree-vect-patterns.cc (vect_recog_abd_pattern): Update to
> to build a VEC_WIDEN_ABD call if the input precision is smaller
> than the precision of the output.
> (vect_recog_widen_abd_pattern): Should an ABD expression be
> found preceeding an extension, replace the two with a
> VEC_WIDEN_ABD.
> ---
> gcc/doc/md.texi | 11 ++
> gcc/internal-fn.def | 5 +
> gcc/optabs.def | 10 ++
> gcc/tree-vect-patterns.cc | 205 +++++++++++++++++++++++++++++---------
> 4 files changed, 183 insertions(+), 48 deletions(-)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index e11b10d2fca11016232921bc85e47975f700e6c6..2ae6182b925d0cf8950dc830d083cf93baf2eaa1 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5617,6 +5617,17 @@ signed/unsigned elements of size S@. Subtract the high/low elements of 2 from
> 1 and widen the resulting elements. Put the N/2 results of size 2*S in the
> output vector (operand 0).
>
> +@cindex @code{vec_widen_sabdl_hi_@var{m}} instruction pattern
> +@cindex @code{vec_widen_sabdl_lo_@var{m}} instruction pattern
> +@cindex @code{vec_widen_uabdl_hi_@var{m}} instruction pattern
> +@cindex @code{vec_widen_uabdl_lo_@var{m}} instruction pattern
> +@item @samp{vec_widen_uabdl_hi_@var{m}}, @samp{vec_widen_uabdl_lo_@var{m}}
> +@itemx @samp{vec_widen_sabdl_hi_@var{m}}, @samp{vec_widen_sabdl_lo_@var{m}}
The optabs don't have the trailing “l” (long). (Which is a good thing!)
The list should include the even/odd patterns as well.
> +Signed/Unsigned widening absolute difference long. Operands 1 and 2 are
Similarly no “long” here.
> +vectors with N signed/unsigned elements of size S@. Find the absolute
> +difference between 1 and 2 and widen the resulting elements. Put the N/2
Maybe “operands 1 and 2”, or just “them”.
> +results of size 2*S in the output vector (operand 0).
> +
> @cindex @code{vec_addsub@var{m}3} instruction pattern
> @item @samp{vec_addsub@var{m}3}
> Alternating subtract, add with even lanes doing subtract and odd
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 116965f4830cec8f60642ff011a86b6562e2c509..d67274d68b49943a88c531e903fd03b42343ab97 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -352,6 +352,11 @@ DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_MINUS,
> first,
> vec_widen_ssub, vec_widen_usub,
> binary)
> +DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_ABD,
> + ECF_CONST | ECF_NOTHROW,
> + first,
> + vec_widen_sabd, vec_widen_uabd,
> + binary)
> DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary)
> DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary)
>
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 35b835a6ac56d72417dac8ddfd77a8a7e2475e65..68dfa1550f791a2fe833012157601ecfa68f1e09 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -418,6 +418,11 @@ OPTAB_D (vec_widen_sadd_hi_optab, "vec_widen_sadd_hi_$a")
> OPTAB_D (vec_widen_sadd_lo_optab, "vec_widen_sadd_lo_$a")
> OPTAB_D (vec_widen_sadd_odd_optab, "vec_widen_sadd_odd_$a")
> OPTAB_D (vec_widen_sadd_even_optab, "vec_widen_sadd_even_$a")
> +OPTAB_D (vec_widen_sabd_optab, "vec_widen_sabd_$a")
> +OPTAB_D (vec_widen_sabd_hi_optab, "vec_widen_sabd_hi_$a")
> +OPTAB_D (vec_widen_sabd_lo_optab, "vec_widen_sabd_lo_$a")
> +OPTAB_D (vec_widen_sabd_odd_optab, "vec_widen_sabd_odd_$a")
> +OPTAB_D (vec_widen_sabd_even_optab, "vec_widen_sabd_even_$a")
> OPTAB_D (vec_widen_sshiftl_hi_optab, "vec_widen_sshiftl_hi_$a")
> OPTAB_D (vec_widen_sshiftl_lo_optab, "vec_widen_sshiftl_lo_$a")
> OPTAB_D (vec_widen_umult_even_optab, "vec_widen_umult_even_$a")
> @@ -436,6 +441,11 @@ OPTAB_D (vec_widen_uadd_hi_optab, "vec_widen_uadd_hi_$a")
> OPTAB_D (vec_widen_uadd_lo_optab, "vec_widen_uadd_lo_$a")
> OPTAB_D (vec_widen_uadd_odd_optab, "vec_widen_uadd_odd_$a")
> OPTAB_D (vec_widen_uadd_even_optab, "vec_widen_uadd_even_$a")
> +OPTAB_D (vec_widen_uabd_optab, "vec_widen_uabd_$a")
> +OPTAB_D (vec_widen_uabd_hi_optab, "vec_widen_uabd_hi_$a")
> +OPTAB_D (vec_widen_uabd_lo_optab, "vec_widen_uabd_lo_$a")
> +OPTAB_D (vec_widen_uabd_odd_optab, "vec_widen_uabd_odd_$a")
> +OPTAB_D (vec_widen_uabd_even_optab, "vec_widen_uabd_even_$a")
> OPTAB_D (vec_addsub_optab, "vec_addsub$a3")
> OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4")
> OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4")
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index e2392113bff4065c909aefc760b4c48978b73a5a..281d7bc2e9945ee415be051f5ec1cce19251fbbf 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -1404,15 +1404,28 @@ vect_recog_sad_pattern (vec_info *vinfo,
> gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
> if (!abd_stmt
> || !gimple_call_internal_p (abd_stmt)
> - || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
> + || gimple_call_num_args (abd_stmt) != 2)
> return NULL;
>
> tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
> tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
>
> - if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
> - || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
> - &unprom[1]))
> + if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
> + {
> + if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
> + &unprom[0])
> + || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
> + &unprom[1]))
> + return NULL;
> + }
> + else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
> + {
> + unprom[0].op = abd_oprnd0;
> + unprom[0].type = TREE_TYPE (abd_oprnd0);
> + unprom[1].op = abd_oprnd1;
> + unprom[1].type = TREE_TYPE (abd_oprnd1);
> + }
> + else
> return NULL;
>
> half_type = unprom[0].type;
> @@ -1442,16 +1455,19 @@ vect_recog_sad_pattern (vec_info *vinfo,
>
> /* Function vect_recog_abd_pattern
>
> - Try to find the following ABsolute Difference (ABD) pattern:
> + Try to find the following ABsolute Difference (ABD) or
> + widening ABD (WIDEN_ABD) pattern:
>
> - VTYPE x, y, out;
> - type diff;
> - loop i in range:
> - S1 diff = x[i] - y[i]
> - S2 out[i] = ABS_EXPR <diff>;
> + TYPE1 x;
> + TYPE2 y;
> + TYPE3 x_cast = (TYPE3) x; // widening or no-op
> + TYPE3 y_cast = (TYPE3) y; // widening or no-op
> + TYPE3 diff = x_cast - y_cast;
> + TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
> + TYPE5 abs = ABS(U)_EXPR <diff_cast>;
>
> - where 'type' is a integer and 'VTYPE' is a vector of integers
> - the same size as 'type'
> + WIDEN_ABD exists to optimize the case where WTYPE is at least
> + twice as wide as VTYPE.
Sorry, my fault. I was using the original type names in this
suggestion, rather than the TYPE1…TYPE5 ones. Should be:
WIDEN_ABD exists to optimize the case where TYPE4 is at least
twice as wide as TYPE3.
>
> Input:
>
> @@ -1459,30 +1475,18 @@ vect_recog_sad_pattern (vec_info *vinfo,
>
> Output:
>
> - * TYPE_out: The type of the output of this pattern
> + * TYPE_OUT: The type of the output of this pattern
>
> * Return value: A new stmt that will be used to replace the sequence of
> - stmts that constitute the pattern; either SABD or UABD:
> - SABD_EXPR<x, y, out>
> - UABD_EXPR<x, y, out>
> + stmts that constitute the pattern; either SABD, UABD, SABDL or UABDL:
> + IFN_ABD<x, y, out>
> + IFN_WIDEN_ABD<x, y, out>
Lingering use of “L” suffixes here. Maybe:
stmts that constitute the pattern, principally:
out = IFN_ABD (x, y)
out = IFN_WIDEN_ABD (x, y)
> */
>
> static gimple *
> vect_recog_abd_pattern (vec_info *vinfo,
> stmt_vec_info stmt_vinfo, tree *type_out)
> {
> - /* Look for the following patterns
> - X = x[i]
> - Y = y[i]
> - DIFF = X - Y
> - DAD = ABS_EXPR<DIFF>
> - out[i] = DAD
> -
> - In which
> - - X, Y, DIFF, DAD all have the same type
> - - x, y, out are all vectors of the same type
> - */
> -
> gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> if (!last_stmt)
> return NULL;
> @@ -1496,54 +1500,83 @@ vect_recog_abd_pattern (vec_info *vinfo,
> unprom, &diff_stmt))
> return NULL;
>
> - tree abd_type = out_type, vectype;
> - tree abd_oprnds[2];
> - bool extend = false;
> + tree abd_in_type, abd_out_type;
> +
> if (half_type)
> {
> - vectype = get_vectype_for_scalar_type (vinfo, half_type);
> - abd_type = half_type;
> - extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
> + abd_in_type = half_type;
> + abd_out_type = abd_in_type;
> }
> else
> {
> unprom[0].op = gimple_assign_rhs1 (diff_stmt);
> unprom[1].op = gimple_assign_rhs2 (diff_stmt);
> - tree signed_out = signed_type_for (out_type);
> - vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> + abd_in_type = signed_type_for (out_type);
> + abd_out_type = abd_in_type;
> }
>
> - vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> + tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
> + if (!vectype_in)
> + return NULL;
>
> - if (!vectype
> - || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> + internal_fn ifn = IFN_ABD;
> + tree vectype_out = vectype_in;
> +
> + if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
> + && TYPE_PRECISION (abd_out_type) != stmt_vinfo->min_output_precision)
Sorry for not noticing last time, but I think the second condition
would be more natural as:
&& stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
(There's no distinction between abs_in_type and abs_out_type at this point,
so it seems clearer to use the same value in both conditions.)
> + {
> + tree mid_type
> + = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
> + TYPE_UNSIGNED (abd_in_type));
> + tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
> +
> + code_helper dummy_code;
> + int dummy_int;
> + auto_vec<tree> dummy_vec;
> + if (mid_vectype
> + && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
> + stmt_vinfo, mid_vectype,
> + vectype_in,
> + &dummy_code, &dummy_code,
> + &dummy_int, &dummy_vec))
> + {
> + ifn = IFN_VEC_WIDEN_ABD;
> + abd_out_type = mid_type;
> + vectype_out = mid_vectype;
> + }
> + }
> +
> + if (ifn == IFN_ABD
> + && !direct_internal_fn_supported_p (ifn, vectype_in,
> OPTIMIZE_FOR_SPEED))
> return NULL;
>
> + vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> +
> + tree abd_oprnds[2];
> vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> - TREE_TYPE (vectype), unprom, vectype);
> + abd_in_type, unprom, vectype_in);
>
> *type_out = get_vectype_for_scalar_type (vinfo, out_type);
>
> - tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
> - gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> + tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
> + gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
> abd_oprnds[0], abd_oprnds[1]);
> gimple_call_set_lhs (abd_stmt, abd_result);
> gimple_set_location (abd_stmt, gimple_location (last_stmt));
>
> - if (!extend)
> - return abd_stmt;
> -
> gimple *stmt = abd_stmt;
> - if (!TYPE_UNSIGNED (abd_type))
> + if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
> + && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
> + && !TYPE_UNSIGNED (abd_out_type))
> {
> - tree unsign = unsigned_type_for (abd_type);
> + tree unsign = unsigned_type_for (abd_out_type);
> tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
> stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
> unsign_vectype);
> }
>
> - return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
> + return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
> }
>
> /* Recognize an operation that performs ORIG_CODE on widened inputs,
> @@ -1703,6 +1736,81 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
> &subtype);
> }
>
> +/* Try to detect abd on widened inputs, converting IFN_ABD
> + to IFN_VEC_WIDEN_ABD. */
> +static gimple *
> +vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
> + tree *type_out)
> +{
> + gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> + if (!last_stmt || !gimple_assign_cast_p (last_stmt))
I think this should be:
if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
gimple_assign_cast_p is more general, and allows conversions
between integral and non-integral types.
> + return NULL;
> +
> + tree last_rhs = gimple_assign_rhs1 (last_stmt);
> +
> + tree in_type = TREE_TYPE (last_rhs);
> + tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> + if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type))
> + return NULL;
I think this also needs to require TYPE_UNSIGNED (in_type):
if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
|| !TYPE_UNSIGNED (in_type))
return NULL;
That is, the extension has to be a zero extension rather than
a sign extension.
For example:
int32_t a, b, c;
int64_t d;
c = IFN_ABD (a, b);
d = (int64_t) c;
sign-extends the ABD result to 64 bits, and so a == INT_MAX
&& b == INT_MIN gives:
c = -1 (UINT_MAX converted to signed)
d = -1
But IFN_WIDEN_ABD would give d == UINT_MAX instead.
> +
> + stmt_vec_info abs_vinfo = vect_get_internal_def (vinfo, last_rhs);
> + if (!abs_vinfo)
> + return NULL;
> +
> + stmt_vec_info abd_pattern_vinfo = STMT_VINFO_RELATED_STMT (abs_vinfo);
> + if (!abd_pattern_vinfo)
> + return NULL;
> +
> + gimple *pattern_stmt = STMT_VINFO_STMT (abd_pattern_vinfo);
> + if (gimple_assign_cast_p (pattern_stmt))
> + {
> + tree op = gimple_assign_rhs1 (pattern_stmt);
> + vect_unpromoted_value unprom;
> + op = vect_look_through_possible_promotion (vinfo, op, &unprom);
> +
> + if (!op)
> + return NULL;
> +
> + abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
> + if (!abd_pattern_vinfo)
> + return NULL;
> +
> + pattern_stmt = STMT_VINFO_STMT (abd_pattern_vinfo);
> + }
I think the code quoted above reduces to:
vect_unpromoted_value unprom;
tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
return NULL;
stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
if (!abd_pattern_vinfo)
return NULL;
abd_pattern_vinfo = vect_stmt_to_vectorize (abd_pattern_vinfo);
> +
> + gcall *abd_stmt = dyn_cast <gcall *> (pattern_stmt);
> + if (!abd_stmt || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
> + return NULL;
> +
> + tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
> + tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
> + if (TYPE_PRECISION (TREE_TYPE (abd_oprnd0)) != TYPE_PRECISION (in_type))
> + return NULL;
With the changes above, this check would not be necessary.
LGTM otherwise, thanks.
Richard
> +
> + tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
> + tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
> +
> + code_helper dummy_code;
> + int dummy_int;
> + auto_vec<tree> dummy_vec;
> + if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
> + vectype_out, vectype_in,
> + &dummy_code, &dummy_code,
> + &dummy_int, &dummy_vec))
> + return NULL;
> +
> + vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
> +
> + *type_out = vectype_out;
> +
> + tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
> + gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
> + abd_oprnd0, abd_oprnd1);
> + gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
> + gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
> + return widen_abd_stmt;
> +}
> +
> /* Function vect_recog_ctz_ffs_pattern
>
> Try to find the following pattern:
> @@ -6670,6 +6778,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
> { vect_recog_mask_conversion_pattern, "mask_conversion" },
> { vect_recog_widen_plus_pattern, "widen_plus" },
> { vect_recog_widen_minus_pattern, "widen_minus" },
> + { vect_recog_widen_abd_pattern, "widen_abd" },
> /* These must come after the double widening ones. */
> };
> Sorry, my fault. I was using the original type names in this
> suggestion, rather than the TYPE1…TYPE5 ones. Should be:
>
> WIDEN_ABD exists to optimize the case where TYPE4 is at least
> twice as wide as TYPE3.
Change made.
> Lingering use of “L” suffixes here. Maybe:
>
> stmts that constitute the pattern, principally:
> out = IFN_ABD (x, y)
> out = IFN_WIDEN_ABD (x, y)
Change made.
> > + if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
> > + && TYPE_PRECISION (abd_out_type) != stmt_vinfo->min_output_precision)
>
> Sorry for not noticing last time, but I think the second condition
> would be more natural as:
>
> && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
>
> (There's no distinction between abs_in_type and abs_out_type at this point,
> so it seems clearer to use the same value in both conditions.)
Change made.
> > + gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> > + if (!last_stmt || !gimple_assign_cast_p (last_stmt))
>
> I think this should be:
>
> if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
>
> gimple_assign_cast_p is more general, and allows conversions
> between integral and non-integral types.
Change made.
> > + tree in_type = TREE_TYPE (last_rhs);
> > + tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> > + if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type))
> > + return NULL;
>
> I think this also needs to require TYPE_UNSIGNED (in_type):
>
> if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
> || !TYPE_UNSIGNED (in_type))
> return NULL;
>
> That is, the extension has to be a zero extension rather than
> a sign extension.
>
> For example:
>
> int32_t a, b, c;
> int64_t d;
>
> c = IFN_ABD (a, b);
> d = (int64_t) c;
>
> sign-extends the ABD result to 64 bits, and so a == INT_MAX
> && b == INT_MIN gives:
>
> c = -1 (UINT_MAX converted to signed)
> d = -1
>
> But IFN_WIDEN_ABD would give d == UINT_MAX instead.
Change made.
> > + gimple *pattern_stmt = STMT_VINFO_STMT (abd_pattern_vinfo);
> > + if (gimple_assign_cast_p (pattern_stmt))
> > + {
> > + tree op = gimple_assign_rhs1 (pattern_stmt);
> > + vect_unpromoted_value unprom;
> > + op = vect_look_through_possible_promotion (vinfo, op, &unprom);
> > +
> > + if (!op)
> > + return NULL;
> > +
> > + abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
> > + if (!abd_pattern_vinfo)
> > + return NULL;
> > +
> > + pattern_stmt = STMT_VINFO_STMT (abd_pattern_vinfo);
> > + }
>
> I think the code quoted above reduces to:
>
> vect_unpromoted_value unprom;
> tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
> if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
> return NULL;
>
> stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
> if (!abd_pattern_vinfo)
> return NULL;
> abd_pattern_vinfo = vect_stmt_to_vectorize (abd_pattern_vinfo);
>
> ...
>
> > + tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
> > + tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
> > + if (TYPE_PRECISION (TREE_TYPE (abd_oprnd0)) != TYPE_PRECISION (in_type))
> > + return NULL;
>
> With the changes above, this check would not be necessary.
Both changes made.
Updated patch will be in the next email.
@@ -5617,6 +5617,17 @@ signed/unsigned elements of size S@. Subtract the high/low elements of 2 from
1 and widen the resulting elements. Put the N/2 results of size 2*S in the
output vector (operand 0).
+@cindex @code{vec_widen_sabdl_hi_@var{m}} instruction pattern
+@cindex @code{vec_widen_sabdl_lo_@var{m}} instruction pattern
+@cindex @code{vec_widen_uabdl_hi_@var{m}} instruction pattern
+@cindex @code{vec_widen_uabdl_lo_@var{m}} instruction pattern
+@item @samp{vec_widen_uabdl_hi_@var{m}}, @samp{vec_widen_uabdl_lo_@var{m}}
+@itemx @samp{vec_widen_sabdl_hi_@var{m}}, @samp{vec_widen_sabdl_lo_@var{m}}
+Signed/Unsigned widening absolute difference long. Operands 1 and 2 are
+vectors with N signed/unsigned elements of size S@. Find the absolute
+difference between 1 and 2 and widen the resulting elements. Put the N/2
+results of size 2*S in the output vector (operand 0).
+
@cindex @code{vec_addsub@var{m}3} instruction pattern
@item @samp{vec_addsub@var{m}3}
Alternating subtract, add with even lanes doing subtract and odd
@@ -352,6 +352,11 @@ DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_MINUS,
first,
vec_widen_ssub, vec_widen_usub,
binary)
+DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_ABD,
+ ECF_CONST | ECF_NOTHROW,
+ first,
+ vec_widen_sabd, vec_widen_uabd,
+ binary)
DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary)
DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary)
@@ -418,6 +418,11 @@ OPTAB_D (vec_widen_sadd_hi_optab, "vec_widen_sadd_hi_$a")
OPTAB_D (vec_widen_sadd_lo_optab, "vec_widen_sadd_lo_$a")
OPTAB_D (vec_widen_sadd_odd_optab, "vec_widen_sadd_odd_$a")
OPTAB_D (vec_widen_sadd_even_optab, "vec_widen_sadd_even_$a")
+OPTAB_D (vec_widen_sabd_optab, "vec_widen_sabd_$a")
+OPTAB_D (vec_widen_sabd_hi_optab, "vec_widen_sabd_hi_$a")
+OPTAB_D (vec_widen_sabd_lo_optab, "vec_widen_sabd_lo_$a")
+OPTAB_D (vec_widen_sabd_odd_optab, "vec_widen_sabd_odd_$a")
+OPTAB_D (vec_widen_sabd_even_optab, "vec_widen_sabd_even_$a")
OPTAB_D (vec_widen_sshiftl_hi_optab, "vec_widen_sshiftl_hi_$a")
OPTAB_D (vec_widen_sshiftl_lo_optab, "vec_widen_sshiftl_lo_$a")
OPTAB_D (vec_widen_umult_even_optab, "vec_widen_umult_even_$a")
@@ -436,6 +441,11 @@ OPTAB_D (vec_widen_uadd_hi_optab, "vec_widen_uadd_hi_$a")
OPTAB_D (vec_widen_uadd_lo_optab, "vec_widen_uadd_lo_$a")
OPTAB_D (vec_widen_uadd_odd_optab, "vec_widen_uadd_odd_$a")
OPTAB_D (vec_widen_uadd_even_optab, "vec_widen_uadd_even_$a")
+OPTAB_D (vec_widen_uabd_optab, "vec_widen_uabd_$a")
+OPTAB_D (vec_widen_uabd_hi_optab, "vec_widen_uabd_hi_$a")
+OPTAB_D (vec_widen_uabd_lo_optab, "vec_widen_uabd_lo_$a")
+OPTAB_D (vec_widen_uabd_odd_optab, "vec_widen_uabd_odd_$a")
+OPTAB_D (vec_widen_uabd_even_optab, "vec_widen_uabd_even_$a")
OPTAB_D (vec_addsub_optab, "vec_addsub$a3")
OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4")
OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4")
@@ -1404,15 +1404,28 @@ vect_recog_sad_pattern (vec_info *vinfo,
gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
if (!abd_stmt
|| !gimple_call_internal_p (abd_stmt)
- || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
+ || gimple_call_num_args (abd_stmt) != 2)
return NULL;
tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
- if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
- || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
- &unprom[1]))
+ if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
+ {
+ if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
+ &unprom[0])
+ || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
+ &unprom[1]))
+ return NULL;
+ }
+ else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
+ {
+ unprom[0].op = abd_oprnd0;
+ unprom[0].type = TREE_TYPE (abd_oprnd0);
+ unprom[1].op = abd_oprnd1;
+ unprom[1].type = TREE_TYPE (abd_oprnd1);
+ }
+ else
return NULL;
half_type = unprom[0].type;
@@ -1442,16 +1455,19 @@ vect_recog_sad_pattern (vec_info *vinfo,
/* Function vect_recog_abd_pattern
- Try to find the following ABsolute Difference (ABD) pattern:
+ Try to find the following ABsolute Difference (ABD) or
+ widening ABD (WIDEN_ABD) pattern:
- VTYPE x, y, out;
- type diff;
- loop i in range:
- S1 diff = x[i] - y[i]
- S2 out[i] = ABS_EXPR <diff>;
+ TYPE1 x;
+ TYPE2 y;
+ TYPE3 x_cast = (TYPE3) x; // widening or no-op
+ TYPE3 y_cast = (TYPE3) y; // widening or no-op
+ TYPE3 diff = x_cast - y_cast;
+ TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
+ TYPE5 abs = ABS(U)_EXPR <diff_cast>;
- where 'type' is a integer and 'VTYPE' is a vector of integers
- the same size as 'type'
+ WIDEN_ABD exists to optimize the case where WTYPE is at least
+ twice as wide as VTYPE.
Input:
@@ -1459,30 +1475,18 @@ vect_recog_sad_pattern (vec_info *vinfo,
Output:
- * TYPE_out: The type of the output of this pattern
+ * TYPE_OUT: The type of the output of this pattern
* Return value: A new stmt that will be used to replace the sequence of
- stmts that constitute the pattern; either SABD or UABD:
- SABD_EXPR<x, y, out>
- UABD_EXPR<x, y, out>
+ stmts that constitute the pattern; either SABD, UABD, SABDL or UABDL:
+ IFN_ABD<x, y, out>
+ IFN_WIDEN_ABD<x, y, out>
*/
static gimple *
vect_recog_abd_pattern (vec_info *vinfo,
stmt_vec_info stmt_vinfo, tree *type_out)
{
- /* Look for the following patterns
- X = x[i]
- Y = y[i]
- DIFF = X - Y
- DAD = ABS_EXPR<DIFF>
- out[i] = DAD
-
- In which
- - X, Y, DIFF, DAD all have the same type
- - x, y, out are all vectors of the same type
- */
-
gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
if (!last_stmt)
return NULL;
@@ -1496,54 +1500,83 @@ vect_recog_abd_pattern (vec_info *vinfo,
unprom, &diff_stmt))
return NULL;
- tree abd_type = out_type, vectype;
- tree abd_oprnds[2];
- bool extend = false;
+ tree abd_in_type, abd_out_type;
+
if (half_type)
{
- vectype = get_vectype_for_scalar_type (vinfo, half_type);
- abd_type = half_type;
- extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
+ abd_in_type = half_type;
+ abd_out_type = abd_in_type;
}
else
{
unprom[0].op = gimple_assign_rhs1 (diff_stmt);
unprom[1].op = gimple_assign_rhs2 (diff_stmt);
- tree signed_out = signed_type_for (out_type);
- vectype = get_vectype_for_scalar_type (vinfo, signed_out);
+ abd_in_type = signed_type_for (out_type);
+ abd_out_type = abd_in_type;
}
- vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+ tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
+ if (!vectype_in)
+ return NULL;
- if (!vectype
- || !direct_internal_fn_supported_p (IFN_ABD, vectype,
+ internal_fn ifn = IFN_ABD;
+ tree vectype_out = vectype_in;
+
+ if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
+ && TYPE_PRECISION (abd_out_type) != stmt_vinfo->min_output_precision)
+ {
+ tree mid_type
+ = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
+ TYPE_UNSIGNED (abd_in_type));
+ tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
+
+ code_helper dummy_code;
+ int dummy_int;
+ auto_vec<tree> dummy_vec;
+ if (mid_vectype
+ && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
+ stmt_vinfo, mid_vectype,
+ vectype_in,
+ &dummy_code, &dummy_code,
+ &dummy_int, &dummy_vec))
+ {
+ ifn = IFN_VEC_WIDEN_ABD;
+ abd_out_type = mid_type;
+ vectype_out = mid_vectype;
+ }
+ }
+
+ if (ifn == IFN_ABD
+ && !direct_internal_fn_supported_p (ifn, vectype_in,
OPTIMIZE_FOR_SPEED))
return NULL;
+ vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+
+ tree abd_oprnds[2];
vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
- TREE_TYPE (vectype), unprom, vectype);
+ abd_in_type, unprom, vectype_in);
*type_out = get_vectype_for_scalar_type (vinfo, out_type);
- tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
- gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
+ tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
+ gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
abd_oprnds[0], abd_oprnds[1]);
gimple_call_set_lhs (abd_stmt, abd_result);
gimple_set_location (abd_stmt, gimple_location (last_stmt));
- if (!extend)
- return abd_stmt;
-
gimple *stmt = abd_stmt;
- if (!TYPE_UNSIGNED (abd_type))
+ if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
+ && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
+ && !TYPE_UNSIGNED (abd_out_type))
{
- tree unsign = unsigned_type_for (abd_type);
+ tree unsign = unsigned_type_for (abd_out_type);
tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
unsign_vectype);
}
- return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
+ return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
}
/* Recognize an operation that performs ORIG_CODE on widened inputs,
@@ -1703,6 +1736,81 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
&subtype);
}
+/* Try to detect abd on widened inputs, converting IFN_ABD
+ to IFN_VEC_WIDEN_ABD. */
+static gimple *
+vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
+ tree *type_out)
+{
+ gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
+ if (!last_stmt || !gimple_assign_cast_p (last_stmt))
+ return NULL;
+
+ tree last_rhs = gimple_assign_rhs1 (last_stmt);
+
+ tree in_type = TREE_TYPE (last_rhs);
+ tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
+ if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type))
+ return NULL;
+
+ stmt_vec_info abs_vinfo = vect_get_internal_def (vinfo, last_rhs);
+ if (!abs_vinfo)
+ return NULL;
+
+ stmt_vec_info abd_pattern_vinfo = STMT_VINFO_RELATED_STMT (abs_vinfo);
+ if (!abd_pattern_vinfo)
+ return NULL;
+
+ gimple *pattern_stmt = STMT_VINFO_STMT (abd_pattern_vinfo);
+ if (gimple_assign_cast_p (pattern_stmt))
+ {
+ tree op = gimple_assign_rhs1 (pattern_stmt);
+ vect_unpromoted_value unprom;
+ op = vect_look_through_possible_promotion (vinfo, op, &unprom);
+
+ if (!op)
+ return NULL;
+
+ abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
+ if (!abd_pattern_vinfo)
+ return NULL;
+
+ pattern_stmt = STMT_VINFO_STMT (abd_pattern_vinfo);
+ }
+
+ gcall *abd_stmt = dyn_cast <gcall *> (pattern_stmt);
+ if (!abd_stmt || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
+ return NULL;
+
+ tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
+ tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
+ if (TYPE_PRECISION (TREE_TYPE (abd_oprnd0)) != TYPE_PRECISION (in_type))
+ return NULL;
+
+ tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
+ tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
+
+ code_helper dummy_code;
+ int dummy_int;
+ auto_vec<tree> dummy_vec;
+ if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
+ vectype_out, vectype_in,
+ &dummy_code, &dummy_code,
+ &dummy_int, &dummy_vec))
+ return NULL;
+
+ vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
+
+ *type_out = vectype_out;
+
+ tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
+ gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
+ abd_oprnd0, abd_oprnd1);
+ gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
+ gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
+ return widen_abd_stmt;
+}
+
/* Function vect_recog_ctz_ffs_pattern
Try to find the following pattern:
@@ -6670,6 +6778,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
{ vect_recog_mask_conversion_pattern, "mask_conversion" },
{ vect_recog_widen_plus_pattern, "widen_plus" },
{ vect_recog_widen_minus_pattern, "widen_minus" },
+ { vect_recog_widen_abd_pattern, "widen_abd" },
/* These must come after the double widening ones. */
};