Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE.
Checks
Commit Message
r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
TYPE_MIN, but PABSB will store unsigned result into dst. The patch
uses ABSU_EXPR + VCE instead of ABS_EXPR.
Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
vector absm2 is guarded with TARGET_MMX_WITH_SSE.
Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
Ok for trunk?
gcc/ChangeLog:
PR target/110108
* config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
_mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple
ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o
TARGET_64BIT.
* config/i386/i386-builtin.def: Replace CODE_FOR_nothing with
real codename for __builtin_ia32_pabs{b,w,d}.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr110108.c: New test.
---
gcc/config/i386/i386-builtin.def | 6 ++--
gcc/config/i386/i386.cc | 44 ++++++++++++++++++++----
gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++
3 files changed, 56 insertions(+), 10 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c
Comments
On Mon, Jun 5, 2023 at 9:34 PM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> uses ABSU_EXPR + VCE instead of ABS_EXPR.
>
> Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> vector absm2 is guarded with TARGET_MMX_WITH_SSE.
>
> Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> Ok for trunk?
>
>
> gcc/ChangeLog:
>
> PR target/110108
> * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
> _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple
> ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o
> TARGET_64BIT.
> * config/i386/i386-builtin.def: Replace CODE_FOR_nothing with
> real codename for __builtin_ia32_pabs{b,w,d}.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr110108.c: New test.
> ---
> gcc/config/i386/i386-builtin.def | 6 ++--
> gcc/config/i386/i386.cc | 44 ++++++++++++++++++++----
> gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++
> 3 files changed, 56 insertions(+), 10 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c
>
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index 383b68a9bb8..7ba5b6a9d11 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd"
>
> /* SSSE3 */
> BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
>
> BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
> BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index d4ff56ee8dd..b09b3c79e99 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -18433,6 +18433,7 @@ bool
> ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> {
> gimple *stmt = gsi_stmt (*gsi), *g;
> + gimple_seq stmts = NULL;
> tree fndecl = gimple_call_fndecl (stmt);
> gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
> int n_args = gimple_call_num_args (stmt);
> @@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> {
> loc = gimple_location (stmt);
> tree type = TREE_TYPE (arg2);
> - gimple_seq stmts = NULL;
> if (VECTOR_FLOAT_TYPE_P (type))
> {
> tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
> @@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> tree zero_vec = build_zero_cst (type);
> tree minus_one_vec = build_minus_one_cst (type);
> tree cmp_type = truth_type_for (type);
> - gimple_seq stmts = NULL;
> tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
> gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> g = gimple_build_assign (gimple_call_lhs (stmt),
> @@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> break;
>
> case IX86_BUILTIN_PABSB:
> + case IX86_BUILTIN_PABSW:
> + case IX86_BUILTIN_PABSD:
> + /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
> + if (!TARGET_64BIT)
> + break;
> + /* FALLTHRU. */
> case IX86_BUILTIN_PABSB128:
> case IX86_BUILTIN_PABSB256:
> case IX86_BUILTIN_PABSB512:
> - case IX86_BUILTIN_PABSW:
> case IX86_BUILTIN_PABSW128:
> case IX86_BUILTIN_PABSW256:
> case IX86_BUILTIN_PABSW512:
> - case IX86_BUILTIN_PABSD:
> case IX86_BUILTIN_PABSD128:
> case IX86_BUILTIN_PABSD256:
> case IX86_BUILTIN_PABSD512:
> @@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> if (n_args > 1
> && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
> break;
> - loc = gimple_location (stmt);
> - g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
> - gsi_replace (gsi, g, false);
> + {
> + tree utype, ures, vce;
> + switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0))))
> + {
> + case E_QImode:
> + utype = unsigned_intQI_type_node;
> + break;
> + case E_HImode:
> + utype = unsigned_intHI_type_node;
> + break;
> + case E_SImode:
> + utype = unsigned_intSI_type_node;
> + break;
> + case E_DImode:
> + utype = long_long_unsigned_type_node;
> + break;
> + default:
> + gcc_unreachable ();
> + }
> + utype = get_same_sized_vectype (utype, TREE_TYPE (arg0));
The above switch can be replaced with just simply
utype = unsigned_type_for (TREE_TYPE (arg0));
> + /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
> + instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
> + ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
> + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> + loc = gimple_location (stmt);
> + vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
> + g = gimple_build_assign (gimple_call_lhs (stmt),
> + VIEW_CONVERT_EXPR, vce);
> + gsi_replace (gsi, g, false);
> + }
> return true;
>
> default:
> diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c
> new file mode 100644
> index 00000000000..cd05763b9bf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110108.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx2 -O2" } */
> +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
> +#include <immintrin.h>
> +
> +__m128i do_stuff_128(__m128i X0, __m128i X1) {
> + __m128i AbsX0 = _mm_abs_epi8(X0);
> + __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0);
> + return Result;
> +}
> +
> +__m256i do_stuff_256(__m256i X0, __m256i X1) {
> + __m256i AbsX0 = _mm256_abs_epi8(X0);
> + __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0);
> + return Result;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>
On Tue, Jun 6, 2023 at 12:49 PM Andrew Pinski <pinskia@gmail.com> wrote:
>
> On Mon, Jun 5, 2023 at 9:34 PM liuhongt via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> > TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> > uses ABSU_EXPR + VCE instead of ABS_EXPR.
> >
> > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> > vector absm2 is guarded with TARGET_MMX_WITH_SSE.
> >
> > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> > Ok for trunk?
> >
> >
> > gcc/ChangeLog:
> >
> > PR target/110108
> > * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
> > _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple
> > ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o
> > TARGET_64BIT.
> > * config/i386/i386-builtin.def: Replace CODE_FOR_nothing with
> > real codename for __builtin_ia32_pabs{b,w,d}.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/pr110108.c: New test.
> > ---
> > gcc/config/i386/i386-builtin.def | 6 ++--
> > gcc/config/i386/i386.cc | 44 ++++++++++++++++++++----
> > gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++
> > 3 files changed, 56 insertions(+), 10 deletions(-)
> > create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c
> >
> > diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> > index 383b68a9bb8..7ba5b6a9d11 100644
> > --- a/gcc/config/i386/i386-builtin.def
> > +++ b/gcc/config/i386/i386-builtin.def
> > @@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd"
> >
> > /* SSSE3 */
> > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
> > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
> > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
> > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
> > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
> >
> > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
> > BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index d4ff56ee8dd..b09b3c79e99 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -18433,6 +18433,7 @@ bool
> > ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> > {
> > gimple *stmt = gsi_stmt (*gsi), *g;
> > + gimple_seq stmts = NULL;
> > tree fndecl = gimple_call_fndecl (stmt);
> > gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
> > int n_args = gimple_call_num_args (stmt);
> > @@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> > {
> > loc = gimple_location (stmt);
> > tree type = TREE_TYPE (arg2);
> > - gimple_seq stmts = NULL;
> > if (VECTOR_FLOAT_TYPE_P (type))
> > {
> > tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
> > @@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> > tree zero_vec = build_zero_cst (type);
> > tree minus_one_vec = build_minus_one_cst (type);
> > tree cmp_type = truth_type_for (type);
> > - gimple_seq stmts = NULL;
> > tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
> > gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> > g = gimple_build_assign (gimple_call_lhs (stmt),
> > @@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> > break;
> >
> > case IX86_BUILTIN_PABSB:
> > + case IX86_BUILTIN_PABSW:
> > + case IX86_BUILTIN_PABSD:
> > + /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
> > + if (!TARGET_64BIT)
> > + break;
> > + /* FALLTHRU. */
> > case IX86_BUILTIN_PABSB128:
> > case IX86_BUILTIN_PABSB256:
> > case IX86_BUILTIN_PABSB512:
> > - case IX86_BUILTIN_PABSW:
> > case IX86_BUILTIN_PABSW128:
> > case IX86_BUILTIN_PABSW256:
> > case IX86_BUILTIN_PABSW512:
> > - case IX86_BUILTIN_PABSD:
> > case IX86_BUILTIN_PABSD128:
> > case IX86_BUILTIN_PABSD256:
> > case IX86_BUILTIN_PABSD512:
> > @@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> > if (n_args > 1
> > && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
> > break;
> > - loc = gimple_location (stmt);
> > - g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
> > - gsi_replace (gsi, g, false);
> > + {
> > + tree utype, ures, vce;
> > + switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0))))
> > + {
> > + case E_QImode:
> > + utype = unsigned_intQI_type_node;
> > + break;
> > + case E_HImode:
> > + utype = unsigned_intHI_type_node;
> > + break;
> > + case E_SImode:
> > + utype = unsigned_intSI_type_node;
> > + break;
> > + case E_DImode:
> > + utype = long_long_unsigned_type_node;
> > + break;
> > + default:
> > + gcc_unreachable ();
> > + }
> > + utype = get_same_sized_vectype (utype, TREE_TYPE (arg0));
>
> The above switch can be replaced with just simply
> utype = unsigned_type_for (TREE_TYPE (arg0));
Yes, thanks.
>
> > + /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
> > + instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
> > + ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
> > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> > + loc = gimple_location (stmt);
> > + vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
> > + g = gimple_build_assign (gimple_call_lhs (stmt),
> > + VIEW_CONVERT_EXPR, vce);
> > + gsi_replace (gsi, g, false);
> > + }
> > return true;
> >
> > default:
> > diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c
> > new file mode 100644
> > index 00000000000..cd05763b9bf
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr110108.c
> > @@ -0,0 +1,16 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-mavx2 -O2" } */
> > +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
> > +#include <immintrin.h>
> > +
> > +__m128i do_stuff_128(__m128i X0, __m128i X1) {
> > + __m128i AbsX0 = _mm_abs_epi8(X0);
> > + __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0);
> > + return Result;
> > +}
> > +
> > +__m256i do_stuff_256(__m256i X0, __m256i X1) {
> > + __m256i AbsX0 = _mm256_abs_epi8(X0);
> > + __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0);
> > + return Result;
> > +}
> > --
> > 2.39.1.388.g2fc9e9ca3c
> >
On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> uses ABSU_EXPR + VCE instead of ABS_EXPR.
>
> Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> vector absm2 is guarded with TARGET_MMX_WITH_SSE.
>
> Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> Ok for trunk?
>
>
> gcc/ChangeLog:
>
> PR target/110108
> * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
> _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple
> ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o
> TARGET_64BIT.
> * config/i386/i386-builtin.def: Replace CODE_FOR_nothing with
> real codename for __builtin_ia32_pabs{b,w,d}.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr110108.c: New test.
> ---
> gcc/config/i386/i386-builtin.def | 6 ++--
> gcc/config/i386/i386.cc | 44 ++++++++++++++++++++----
> gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++
> 3 files changed, 56 insertions(+), 10 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c
>
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index 383b68a9bb8..7ba5b6a9d11 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd"
>
> /* SSSE3 */
> BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
>
> BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
> BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index d4ff56ee8dd..b09b3c79e99 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -18433,6 +18433,7 @@ bool
> ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> {
> gimple *stmt = gsi_stmt (*gsi), *g;
> + gimple_seq stmts = NULL;
> tree fndecl = gimple_call_fndecl (stmt);
> gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
> int n_args = gimple_call_num_args (stmt);
> @@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> {
> loc = gimple_location (stmt);
> tree type = TREE_TYPE (arg2);
> - gimple_seq stmts = NULL;
> if (VECTOR_FLOAT_TYPE_P (type))
> {
> tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
> @@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> tree zero_vec = build_zero_cst (type);
> tree minus_one_vec = build_minus_one_cst (type);
> tree cmp_type = truth_type_for (type);
> - gimple_seq stmts = NULL;
> tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
> gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> g = gimple_build_assign (gimple_call_lhs (stmt),
> @@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> break;
>
> case IX86_BUILTIN_PABSB:
> + case IX86_BUILTIN_PABSW:
> + case IX86_BUILTIN_PABSD:
> + /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
> + if (!TARGET_64BIT)
This should be !TARGET_MMX_WITH_SSE. TARGET_64BIT is not enough, see
the definition of T_M_W_S in i386.h. OTOH, these builtins are
available for TARGET_MMX, so I'm not sure if the above check is needed
at all.
Uros.
> + break;
> + /* FALLTHRU. */
> case IX86_BUILTIN_PABSB128:
> case IX86_BUILTIN_PABSB256:
> case IX86_BUILTIN_PABSB512:
> - case IX86_BUILTIN_PABSW:
> case IX86_BUILTIN_PABSW128:
> case IX86_BUILTIN_PABSW256:
> case IX86_BUILTIN_PABSW512:
> - case IX86_BUILTIN_PABSD:
> case IX86_BUILTIN_PABSD128:
> case IX86_BUILTIN_PABSD256:
> case IX86_BUILTIN_PABSD512:
> @@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> if (n_args > 1
> && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
> break;
> - loc = gimple_location (stmt);
> - g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
> - gsi_replace (gsi, g, false);
> + {
> + tree utype, ures, vce;
> + switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0))))
> + {
> + case E_QImode:
> + utype = unsigned_intQI_type_node;
> + break;
> + case E_HImode:
> + utype = unsigned_intHI_type_node;
> + break;
> + case E_SImode:
> + utype = unsigned_intSI_type_node;
> + break;
> + case E_DImode:
> + utype = long_long_unsigned_type_node;
> + break;
> + default:
> + gcc_unreachable ();
> + }
> + utype = get_same_sized_vectype (utype, TREE_TYPE (arg0));
> + /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
> + instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
> + ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
> + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> + loc = gimple_location (stmt);
> + vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
> + g = gimple_build_assign (gimple_call_lhs (stmt),
> + VIEW_CONVERT_EXPR, vce);
> + gsi_replace (gsi, g, false);
> + }
> return true;
>
> default:
> diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c
> new file mode 100644
> index 00000000000..cd05763b9bf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110108.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx2 -O2" } */
> +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
> +#include <immintrin.h>
> +
> +__m128i do_stuff_128(__m128i X0, __m128i X1) {
> + __m128i AbsX0 = _mm_abs_epi8(X0);
> + __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0);
> + return Result;
> +}
> +
> +__m256i do_stuff_256(__m256i X0, __m256i X1) {
> + __m256i AbsX0 = _mm256_abs_epi8(X0);
> + __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0);
> + return Result;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>
On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> uses ABSU_EXPR + VCE instead of ABS_EXPR.
>
> Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> vector absm2 is guarded with TARGET_MMX_WITH_SSE.
Please note that we are using builtins here, so we should not fold to
absm2, but to ssse3_absm2, which is also available with TARGET_MMX.
Uros.
>
> Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> Ok for trunk?
>
>
> gcc/ChangeLog:
>
> PR target/110108
> * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
> _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple
> ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o
> TARGET_64BIT.
> * config/i386/i386-builtin.def: Replace CODE_FOR_nothing with
> real codename for __builtin_ia32_pabs{b,w,d}.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr110108.c: New test.
> ---
> gcc/config/i386/i386-builtin.def | 6 ++--
> gcc/config/i386/i386.cc | 44 ++++++++++++++++++++----
> gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++
> 3 files changed, 56 insertions(+), 10 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c
>
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index 383b68a9bb8..7ba5b6a9d11 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd"
>
> /* SSSE3 */
> BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
>
> BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
> BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index d4ff56ee8dd..b09b3c79e99 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -18433,6 +18433,7 @@ bool
> ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> {
> gimple *stmt = gsi_stmt (*gsi), *g;
> + gimple_seq stmts = NULL;
> tree fndecl = gimple_call_fndecl (stmt);
> gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
> int n_args = gimple_call_num_args (stmt);
> @@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> {
> loc = gimple_location (stmt);
> tree type = TREE_TYPE (arg2);
> - gimple_seq stmts = NULL;
> if (VECTOR_FLOAT_TYPE_P (type))
> {
> tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
> @@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> tree zero_vec = build_zero_cst (type);
> tree minus_one_vec = build_minus_one_cst (type);
> tree cmp_type = truth_type_for (type);
> - gimple_seq stmts = NULL;
> tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
> gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> g = gimple_build_assign (gimple_call_lhs (stmt),
> @@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> break;
>
> case IX86_BUILTIN_PABSB:
> + case IX86_BUILTIN_PABSW:
> + case IX86_BUILTIN_PABSD:
> + /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
> + if (!TARGET_64BIT)
> + break;
> + /* FALLTHRU. */
> case IX86_BUILTIN_PABSB128:
> case IX86_BUILTIN_PABSB256:
> case IX86_BUILTIN_PABSB512:
> - case IX86_BUILTIN_PABSW:
> case IX86_BUILTIN_PABSW128:
> case IX86_BUILTIN_PABSW256:
> case IX86_BUILTIN_PABSW512:
> - case IX86_BUILTIN_PABSD:
> case IX86_BUILTIN_PABSD128:
> case IX86_BUILTIN_PABSD256:
> case IX86_BUILTIN_PABSD512:
> @@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> if (n_args > 1
> && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
> break;
> - loc = gimple_location (stmt);
> - g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
> - gsi_replace (gsi, g, false);
> + {
> + tree utype, ures, vce;
> + switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0))))
> + {
> + case E_QImode:
> + utype = unsigned_intQI_type_node;
> + break;
> + case E_HImode:
> + utype = unsigned_intHI_type_node;
> + break;
> + case E_SImode:
> + utype = unsigned_intSI_type_node;
> + break;
> + case E_DImode:
> + utype = long_long_unsigned_type_node;
> + break;
> + default:
> + gcc_unreachable ();
> + }
> + utype = get_same_sized_vectype (utype, TREE_TYPE (arg0));
> + /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
> + instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
> + ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
> + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> + loc = gimple_location (stmt);
> + vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
> + g = gimple_build_assign (gimple_call_lhs (stmt),
> + VIEW_CONVERT_EXPR, vce);
> + gsi_replace (gsi, g, false);
> + }
> return true;
>
> default:
> diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c
> new file mode 100644
> index 00000000000..cd05763b9bf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110108.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx2 -O2" } */
> +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
> +#include <immintrin.h>
> +
> +__m128i do_stuff_128(__m128i X0, __m128i X1) {
> + __m128i AbsX0 = _mm_abs_epi8(X0);
> + __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0);
> + return Result;
> +}
> +
> +__m256i do_stuff_256(__m256i X0, __m256i X1) {
> + __m256i AbsX0 = _mm256_abs_epi8(X0);
> + __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0);
> + return Result;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>
On Tue, Jun 6, 2023 at 5:11 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> > TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> > uses ABSU_EXPR + VCE instead of ABS_EXPR.
> >
> > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> > vector absm2 is guarded with TARGET_MMX_WITH_SSE.
>
>This should be !TARGET_MMX_WITH_SSE. TARGET_64BIT is not enough, see
>the definition of T_M_W_S in i386.h. OTOH, these builtins are
>available for TARGET_MMX, so I'm not sure if the above check is needed
>at all.
BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0,
CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB,
UNKNOWN, (int) V8QI_FTYPE_V8QI)
ISA requirement(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX) will be
checked by ix86_check_builtin_isa_match which is at the beginning of
ix86_gimple_fold_builtin.
Here, we're folding those builtin into gimple ABSU_EXPR, and
ABSU_EXPR<vector> will be lowered by vec_lower pass when backend
doesn't support corressponding absm2_optab, that's why i only check
TARGET_64BIT here.
> Please note that we are using builtins here, so we should not fold to
> absm2, but to ssse3_absm2, which is also available with TARGET_MMX.
Yes, that exactly why I checked TARGET_64BIT here, w/ TARGET_64BIT,
backend suppport absm2_optab which exactly matches ssse3_absm2.
w/o TARGET_64BIT, the builtin shouldn't folding into gimple ABSU_EXPR,
but let backend expanded to ssse3_absm2.
>
> Uros.
>
> >
> > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> > Ok for trunk?
> >
> >
> > gcc/ChangeLog:
> >
> > PR target/110108
> > * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
> > _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple
> > ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o
> > TARGET_64BIT.
> > * config/i386/i386-builtin.def: Replace CODE_FOR_nothing with
> > real codename for __builtin_ia32_pabs{b,w,d}.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/pr110108.c: New test.
> > ---
> > gcc/config/i386/i386-builtin.def | 6 ++--
> > gcc/config/i386/i386.cc | 44 ++++++++++++++++++++----
> > gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++
> > 3 files changed, 56 insertions(+), 10 deletions(-)
> > create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c
> >
> > diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> > index 383b68a9bb8..7ba5b6a9d11 100644
> > --- a/gcc/config/i386/i386-builtin.def
> > +++ b/gcc/config/i386/i386-builtin.def
> > @@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd"
> >
> > /* SSSE3 */
> > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
> > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
> > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
> > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
> > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
> >
> > BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
> > BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index d4ff56ee8dd..b09b3c79e99 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -18433,6 +18433,7 @@ bool
> > ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> > {
> > gimple *stmt = gsi_stmt (*gsi), *g;
> > + gimple_seq stmts = NULL;
> > tree fndecl = gimple_call_fndecl (stmt);
> > gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
> > int n_args = gimple_call_num_args (stmt);
> > @@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> > {
> > loc = gimple_location (stmt);
> > tree type = TREE_TYPE (arg2);
> > - gimple_seq stmts = NULL;
> > if (VECTOR_FLOAT_TYPE_P (type))
> > {
> > tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
> > @@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> > tree zero_vec = build_zero_cst (type);
> > tree minus_one_vec = build_minus_one_cst (type);
> > tree cmp_type = truth_type_for (type);
> > - gimple_seq stmts = NULL;
> > tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
> > gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> > g = gimple_build_assign (gimple_call_lhs (stmt),
> > @@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> > break;
> >
> > case IX86_BUILTIN_PABSB:
> > + case IX86_BUILTIN_PABSW:
> > + case IX86_BUILTIN_PABSD:
> > + /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
> > + if (!TARGET_64BIT)
> > + break;
> > + /* FALLTHRU. */
> > case IX86_BUILTIN_PABSB128:
> > case IX86_BUILTIN_PABSB256:
> > case IX86_BUILTIN_PABSB512:
> > - case IX86_BUILTIN_PABSW:
> > case IX86_BUILTIN_PABSW128:
> > case IX86_BUILTIN_PABSW256:
> > case IX86_BUILTIN_PABSW512:
> > - case IX86_BUILTIN_PABSD:
> > case IX86_BUILTIN_PABSD128:
> > case IX86_BUILTIN_PABSD256:
> > case IX86_BUILTIN_PABSD512:
> > @@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> > if (n_args > 1
> > && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
> > break;
> > - loc = gimple_location (stmt);
> > - g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
> > - gsi_replace (gsi, g, false);
> > + {
> > + tree utype, ures, vce;
> > + switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0))))
> > + {
> > + case E_QImode:
> > + utype = unsigned_intQI_type_node;
> > + break;
> > + case E_HImode:
> > + utype = unsigned_intHI_type_node;
> > + break;
> > + case E_SImode:
> > + utype = unsigned_intSI_type_node;
> > + break;
> > + case E_DImode:
> > + utype = long_long_unsigned_type_node;
> > + break;
> > + default:
> > + gcc_unreachable ();
> > + }
> > + utype = get_same_sized_vectype (utype, TREE_TYPE (arg0));
> > + /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
> > + instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
> > + ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
> > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> > + loc = gimple_location (stmt);
> > + vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
> > + g = gimple_build_assign (gimple_call_lhs (stmt),
> > + VIEW_CONVERT_EXPR, vce);
> > + gsi_replace (gsi, g, false);
> > + }
> > return true;
> >
> > default:
> > diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c
> > new file mode 100644
> > index 00000000000..cd05763b9bf
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr110108.c
> > @@ -0,0 +1,16 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-mavx2 -O2" } */
> > +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
> > +#include <immintrin.h>
> > +
> > +__m128i do_stuff_128(__m128i X0, __m128i X1) {
> > + __m128i AbsX0 = _mm_abs_epi8(X0);
> > + __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0);
> > + return Result;
> > +}
> > +
> > +__m256i do_stuff_256(__m256i X0, __m256i X1) {
> > + __m256i AbsX0 = _mm256_abs_epi8(X0);
> > + __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0);
> > + return Result;
> > +}
> > --
> > 2.39.1.388.g2fc9e9ca3c
> >
On Tue, Jun 6, 2023 at 1:42 PM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Tue, Jun 6, 2023 at 5:11 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> > > TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> > > uses ABSU_EXPR + VCE instead of ABS_EXPR.
> > >
> > > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> > > vector absm2 is guarded with TARGET_MMX_WITH_SSE.
> >
> >This should be !TARGET_MMX_WITH_SSE. TARGET_64BIT is not enough, see
> >the definition of T_M_W_S in i386.h. OTOH, these builtins are
> >available for TARGET_MMX, so I'm not sure if the above check is needed
> >at all.
> BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0,
> CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB,
> UNKNOWN, (int) V8QI_FTYPE_V8QI)
>
> ISA requirement(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX) will be
> checked by ix86_check_builtin_isa_match which is at the beginning of
> ix86_gimple_fold_builtin.
> Here, we're folding those builtin into gimple ABSU_EXPR, and
> ABSU_EXPR<vector> will be lowered by vec_lower pass when backend
> doesn't support corressponding absm2_optab, that's why i only check
> TARGET_64BIT here.
>
> > Please note that we are using builtins here, so we should not fold to
> > absm2, but to ssse3_absm2, which is also available with TARGET_MMX.
> Yes, that exactly why I checked TARGET_64BIT here, w/ TARGET_64BIT,
> backend suppport absm2_optab which exactly matches ssse3_absm2.
> w/o TARGET_64BIT, the builtin shouldn't folding into gimple ABSU_EXPR,
> but let backend expanded to ssse3_absm2.
Thanks for the explanation, but for consistency, I'd recommend
checking TARGET_MMX_WITH_SSE (= TARGET_64BIT && TARGET_SSE2) here. The
macro is self-explanatory, while the usage of TARGET_64BIT is not that
descriptive.
Uros.
On Tue, Jun 6, 2023 at 10:36 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Tue, Jun 6, 2023 at 1:42 PM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > On Tue, Jun 6, 2023 at 5:11 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > >
> > > On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches
> > > <gcc-patches@gcc.gnu.org> wrote:
> > > >
> > > > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> > > > TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> > > > uses ABSU_EXPR + VCE instead of ABS_EXPR.
> > > >
> > > > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> > > > vector absm2 is guarded with TARGET_MMX_WITH_SSE.
> > >
> > >This should be !TARGET_MMX_WITH_SSE. TARGET_64BIT is not enough, see
> > >the definition of T_M_W_S in i386.h. OTOH, these builtins are
> > >available for TARGET_MMX, so I'm not sure if the above check is needed
> > >at all.
> > BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0,
> > CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB,
> > UNKNOWN, (int) V8QI_FTYPE_V8QI)
> >
> > ISA requirement(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX) will be
> > checked by ix86_check_builtin_isa_match which is at the beginning of
> > ix86_gimple_fold_builtin.
> > Here, we're folding those builtin into gimple ABSU_EXPR, and
> > ABSU_EXPR<vector> will be lowered by vec_lower pass when backend
> > doesn't support corressponding absm2_optab, that's why i only check
> > TARGET_64BIT here.
> >
> > > Please note that we are using builtins here, so we should not fold to
> > > absm2, but to ssse3_absm2, which is also available with TARGET_MMX.
> > Yes, that exactly why I checked TARGET_64BIT here, w/ TARGET_64BIT,
> > backend suppport absm2_optab which exactly matches ssse3_absm2.
> > w/o TARGET_64BIT, the builtin shouldn't folding into gimple ABSU_EXPR,
> > but let backend expanded to ssse3_absm2.
>
> Thanks for the explanation, but for consistency, I'd recommend
> checking TARGET_MMX_WITH_SSE (= TARGET_64BIT && TARGET_SSE2) here. The
> macro is self-explanatory, while the usage of TARGET_64BIT is not that
> descriptive.
Sure.
>
> Uros.
On Wed, Jun 7, 2023 at 8:31 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Tue, Jun 6, 2023 at 10:36 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Tue, Jun 6, 2023 at 1:42 PM Hongtao Liu <crazylht@gmail.com> wrote:
> > >
> > > On Tue, Jun 6, 2023 at 5:11 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > > >
> > > > On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches
> > > > <gcc-patches@gcc.gnu.org> wrote:
> > > > >
> > > > > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> > > > > TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> > > > > uses ABSU_EXPR + VCE instead of ABS_EXPR.
> > > > >
> > > > > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> > > > > vector absm2 is guarded with TARGET_MMX_WITH_SSE.
> > > >
> > > >This should be !TARGET_MMX_WITH_SSE. TARGET_64BIT is not enough, see
> > > >the definition of T_M_W_S in i386.h. OTOH, these builtins are
> > > >available for TARGET_MMX, so I'm not sure if the above check is needed
> > > >at all.
> > > BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0,
> > > CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB,
> > > UNKNOWN, (int) V8QI_FTYPE_V8QI)
> > >
> > > ISA requirement(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX) will be
> > > checked by ix86_check_builtin_isa_match which is at the beginning of
> > > ix86_gimple_fold_builtin.
> > > Here, we're folding those builtin into gimple ABSU_EXPR, and
> > > ABSU_EXPR<vector> will be lowered by vec_lower pass when backend
> > > doesn't support corressponding absm2_optab, that's why i only check
> > > TARGET_64BIT here.
> > >
> > > > Please note that we are using builtins here, so we should not fold to
> > > > absm2, but to ssse3_absm2, which is also available with TARGET_MMX.
> > > Yes, that exactly why I checked TARGET_64BIT here, w/ TARGET_64BIT,
> > > backend suppport absm2_optab which exactly matches ssse3_absm2.
> > > w/o TARGET_64BIT, the builtin shouldn't folding into gimple ABSU_EXPR,
> > > but let backend expanded to ssse3_absm2.
> >
> > Thanks for the explanation, but for consistency, I'd recommend
> > checking TARGET_MMX_WITH_SSE (= TARGET_64BIT && TARGET_SSE2) here. The
> > macro is self-explanatory, while the usage of TARGET_64BIT is not that
> > descriptive.
> Sure.
Pushed to trunk.
> >
> > Uros.
>
>
>
> --
> BR,
> Hongtao
@@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd"
/* SSSE3 */
BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
@@ -18433,6 +18433,7 @@ bool
ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
{
gimple *stmt = gsi_stmt (*gsi), *g;
+ gimple_seq stmts = NULL;
tree fndecl = gimple_call_fndecl (stmt);
gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
int n_args = gimple_call_num_args (stmt);
@@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
{
loc = gimple_location (stmt);
tree type = TREE_TYPE (arg2);
- gimple_seq stmts = NULL;
if (VECTOR_FLOAT_TYPE_P (type))
{
tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
@@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
tree zero_vec = build_zero_cst (type);
tree minus_one_vec = build_minus_one_cst (type);
tree cmp_type = truth_type_for (type);
- gimple_seq stmts = NULL;
tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
g = gimple_build_assign (gimple_call_lhs (stmt),
@@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
break;
case IX86_BUILTIN_PABSB:
+ case IX86_BUILTIN_PABSW:
+ case IX86_BUILTIN_PABSD:
+ /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
+ if (!TARGET_64BIT)
+ break;
+ /* FALLTHRU. */
case IX86_BUILTIN_PABSB128:
case IX86_BUILTIN_PABSB256:
case IX86_BUILTIN_PABSB512:
- case IX86_BUILTIN_PABSW:
case IX86_BUILTIN_PABSW128:
case IX86_BUILTIN_PABSW256:
case IX86_BUILTIN_PABSW512:
- case IX86_BUILTIN_PABSD:
case IX86_BUILTIN_PABSD128:
case IX86_BUILTIN_PABSD256:
case IX86_BUILTIN_PABSD512:
@@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
if (n_args > 1
&& !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
break;
- loc = gimple_location (stmt);
- g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
- gsi_replace (gsi, g, false);
+ {
+ tree utype, ures, vce;
+ switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0))))
+ {
+ case E_QImode:
+ utype = unsigned_intQI_type_node;
+ break;
+ case E_HImode:
+ utype = unsigned_intHI_type_node;
+ break;
+ case E_SImode:
+ utype = unsigned_intSI_type_node;
+ break;
+ case E_DImode:
+ utype = long_long_unsigned_type_node;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ utype = get_same_sized_vectype (utype, TREE_TYPE (arg0));
+ /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
+ instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
+ ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ loc = gimple_location (stmt);
+ vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
+ g = gimple_build_assign (gimple_call_lhs (stmt),
+ VIEW_CONVERT_EXPR, vce);
+ gsi_replace (gsi, g, false);
+ }
return true;
default:
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2" } */
+/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
+#include <immintrin.h>
+
+__m128i do_stuff_128(__m128i X0, __m128i X1) {
+ __m128i AbsX0 = _mm_abs_epi8(X0);
+ __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0);
+ return Result;
+}
+
+__m256i do_stuff_256(__m256i X0, __m256i X1) {
+ __m256i AbsX0 = _mm256_abs_epi8(X0);
+ __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0);
+ return Result;
+}