Don't fold _mm{, 256}_blendv_epi8 into (mask < 0 ? src1 : src2) when -funsigned-char.

Message ID 20230606043121.24843-2-hongtao.liu@intel.com
State Accepted
Headers
Series Don't fold _mm{, 256}_blendv_epi8 into (mask < 0 ? src1 : src2) when -funsigned-char. |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

liuhongt June 6, 2023, 4:31 a.m. UTC
  Since mask < 0 will be always false when -funsigned-char, but
vpblendvb needs to check the most significant bit.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk and backport to GCC12/GCC13 release branch?

gcc/ChangeLog:

	PR target/110108
	* config/i386/i386-builtin.def (BDESC): Replace
	CODE_FOR_nothing with real code name for blendvb builtins.
	* config/i386/i386.cc (ix86_gimple_fold_builtin): Don't fold
	_mm{,256}_blendv_epi8 into (mask < 0 ? src1 : src2) when
	-funsigned-char.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr110108-2.c: New test.
---
 gcc/config/i386/i386-builtin.def           |  4 ++--
 gcc/config/i386/i386.cc                    |  7 +++++++
 gcc/testsuite/gcc.target/i386/pr110108-2.c | 14 ++++++++++++++
 3 files changed, 23 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110108-2.c
  

Comments

Andrew Pinski June 6, 2023, 4:46 a.m. UTC | #1
On Mon, Jun 5, 2023 at 9:34 PM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Since mask < 0 will be always false when -funsigned-char, but
> vpblendvb needs to check the most significant bit.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk and backport to GCC12/GCC13 release branch?

I think this is a better patch and will always be correct and still
get folded at the gimple level (correctly):
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index d4ff56ee8dd..02bf5ba93a5 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -18561,8 +18561,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
              tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
                ? intSI_type_node : intDI_type_node;
              type = get_same_sized_vectype (itype, type);
-             arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
            }
+         else
+           type = signed_type_for (type);
+         arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
          tree zero_vec = build_zero_cst (type);
          tree cmp_type = truth_type_for (type);
          tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);


Thanks,
Andrew Pinski


>
> gcc/ChangeLog:
>
>         PR target/110108
>         * config/i386/i386-builtin.def (BDESC): Replace
>         CODE_FOR_nothing with real code name for blendvb builtins.
>         * config/i386/i386.cc (ix86_gimple_fold_builtin): Don't fold
>         _mm{,256}_blendv_epi8 into (mask < 0 ? src1 : src2) when
>         -funsigned-char.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr110108-2.c: New test.
> ---
>  gcc/config/i386/i386-builtin.def           |  4 ++--
>  gcc/config/i386/i386.cc                    |  7 +++++++
>  gcc/testsuite/gcc.target/i386/pr110108-2.c | 14 ++++++++++++++
>  3 files changed, 23 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110108-2.c
>
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index 7ba5b6a9d11..b4c99ff62a2 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -944,7 +944,7 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", I
>  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
>  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_insertps_v4sf, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
>  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT)
> -BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI)
> +BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI)
>  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT)
>
>  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI)
> @@ -1198,7 +1198,7 @@ BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI)
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256",  IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256",  IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
> -BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI)
> +BDESC (OPTION_MASK_ISA_AVX2, 0,  CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI)
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index b09b3c79e99..f8f6c26c8eb 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -18548,6 +18548,13 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>        /* FALLTHRU.  */
>      case IX86_BUILTIN_PBLENDVB128:
>      case IX86_BUILTIN_BLENDVPS:
> +      /* Don't fold PBLENDVB when funsigned-char since mask < 0
> +        will always be false in the gimple level.  */
> +      if ((fn_code == IX86_BUILTIN_PBLENDVB128
> +          || fn_code == IX86_BUILTIN_PBLENDVB256)
> +         && !flag_signed_char)
> +       break;
> +
>        gcc_assert (n_args == 3);
>        arg0 = gimple_call_arg (stmt, 0);
>        arg1 = gimple_call_arg (stmt, 1);
> diff --git a/gcc/testsuite/gcc.target/i386/pr110108-2.c b/gcc/testsuite/gcc.target/i386/pr110108-2.c
> new file mode 100644
> index 00000000000..2d1d2fd4991
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110108-2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx2 -O2 -funsigned-char" } */
> +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
> +
> +#include <immintrin.h>
> +__m128i do_stuff_128(__m128i X0, __m128i X1, __m128i X2) {
> +  __m128i Result = _mm_blendv_epi8(X0, X1, X2);
> +  return Result;
> +}
> +
> +__m256i do_stuff_256(__m256i X0, __m256i X1, __m256i X2) {
> +  __m256i Result = _mm256_blendv_epi8(X0, X1, X2);
> +  return Result;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>
  

Patch

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 7ba5b6a9d11..b4c99ff62a2 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -944,7 +944,7 @@  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", I
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_insertps_v4sf, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT)
-BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI)
+BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT)
 
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI)
@@ -1198,7 +1198,7 @@  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256",  IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256",  IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
-BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI)
+BDESC (OPTION_MASK_ISA_AVX2, 0,  CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b09b3c79e99..f8f6c26c8eb 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -18548,6 +18548,13 @@  ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       /* FALLTHRU.  */
     case IX86_BUILTIN_PBLENDVB128:
     case IX86_BUILTIN_BLENDVPS:
+      /* Don't fold PBLENDVB when funsigned-char since mask < 0
+	 will always be false in the gimple level.  */
+      if ((fn_code == IX86_BUILTIN_PBLENDVB128
+	   || fn_code == IX86_BUILTIN_PBLENDVB256)
+	  && !flag_signed_char)
+	break;
+
       gcc_assert (n_args == 3);
       arg0 = gimple_call_arg (stmt, 0);
       arg1 = gimple_call_arg (stmt, 1);
diff --git a/gcc/testsuite/gcc.target/i386/pr110108-2.c b/gcc/testsuite/gcc.target/i386/pr110108-2.c
new file mode 100644
index 00000000000..2d1d2fd4991
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110108-2.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2 -funsigned-char" } */
+/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
+
+#include <immintrin.h>
+__m128i do_stuff_128(__m128i X0, __m128i X1, __m128i X2) {
+  __m128i Result = _mm_blendv_epi8(X0, X1, X2);
+  return Result;
+}
+
+__m256i do_stuff_256(__m256i X0, __m256i X1, __m256i X2) {
+  __m256i Result = _mm256_blendv_epi8(X0, X1, X2);
+  return Result;
+}