Generate vpblendd instead of vpblendw for V4SI under AVX2.

Message ID 20230329072126.2297953-1-hongtao.liu@intel.com
State Accepted
Headers
Series Generate vpblendd instead of vpblendw for V4SI under AVX2. |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

liuhongt March 29, 2023, 7:21 a.m. UTC
  Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
Ok for GCC14 stage-1(or maybe trunk)?

gcc/ChangeLog:

	* config/i386/i386-expand.cc (expand_vec_perm_blend): Generate
	vpblendd instead of vpblendw for V4SI under avx2.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr88828-0.c: Adjust testcase.
---
 gcc/config/i386/i386-expand.cc            | 18 ++++++++++++++----
 gcc/testsuite/gcc.target/i386/pr88828-0.c |  2 +-
 2 files changed, 15 insertions(+), 5 deletions(-)
  

Comments

Uros Bizjak March 29, 2023, 6:26 p.m. UTC | #1
On Wed, Mar 29, 2023 at 9:21 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> Ok for GCC14 stage-1(or maybe trunk)?
>
> gcc/ChangeLog:
>
>         * config/i386/i386-expand.cc (expand_vec_perm_blend): Generate
>         vpblendd instead of vpblendw for V4SI under avx2.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr88828-0.c: Adjust testcase.

Looks safe, so OK for trunk.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386-expand.cc            | 18 ++++++++++++++----
>  gcc/testsuite/gcc.target/i386/pr88828-0.c |  2 +-
>  2 files changed, 15 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index c1300dc4e26..1c436262ee5 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -19069,10 +19069,20 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
>        goto do_subreg;
>
>      case E_V4SImode:
> -      for (i = 0; i < 4; ++i)
> -       mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
> -      vmode = V8HImode;
> -      goto do_subreg;
> +      if (TARGET_AVX2)
> +       {
> +         /* Use vpblendd instead of vpblendw.  */
> +         for (i = 0; i < nelt; ++i)
> +           mask |= ((unsigned HOST_WIDE_INT) (d->perm[i] >= nelt)) << i;
> +         break;
> +       }
> +      else
> +       {
> +         for (i = 0; i < 4; ++i)
> +           mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
> +         vmode = V8HImode;
> +         goto do_subreg;
> +       }
>
>      case E_V16QImode:
>        /* See if bytes move in pairs so we can use pblendw with
> diff --git a/gcc/testsuite/gcc.target/i386/pr88828-0.c b/gcc/testsuite/gcc.target/i386/pr88828-0.c
> index 3ddb2d13526..441c441b51d 100644
> --- a/gcc/testsuite/gcc.target/i386/pr88828-0.c
> +++ b/gcc/testsuite/gcc.target/i386/pr88828-0.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -msse4.2" } */
> +/* { dg-options "-O2 -msse4.2 -mno-avx2" } */
>
>  typedef int v4si __attribute__((vector_size(16)));
>  typedef float v4sf __attribute__((vector_size(16)));
> --
> 2.39.1.388.g2fc9e9ca3c
>
  

Patch

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index c1300dc4e26..1c436262ee5 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -19069,10 +19069,20 @@  expand_vec_perm_blend (struct expand_vec_perm_d *d)
       goto do_subreg;
 
     case E_V4SImode:
-      for (i = 0; i < 4; ++i)
-	mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
-      vmode = V8HImode;
-      goto do_subreg;
+      if (TARGET_AVX2)
+	{
+	  /* Use vpblendd instead of vpblendw.  */
+	  for (i = 0; i < nelt; ++i)
+	    mask |= ((unsigned HOST_WIDE_INT) (d->perm[i] >= nelt)) << i;
+	  break;
+	}
+      else
+	{
+	  for (i = 0; i < 4; ++i)
+	    mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
+	  vmode = V8HImode;
+	  goto do_subreg;
+	}
 
     case E_V16QImode:
       /* See if bytes move in pairs so we can use pblendw with
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-0.c b/gcc/testsuite/gcc.target/i386/pr88828-0.c
index 3ddb2d13526..441c441b51d 100644
--- a/gcc/testsuite/gcc.target/i386/pr88828-0.c
+++ b/gcc/testsuite/gcc.target/i386/pr88828-0.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -msse4.2" } */
+/* { dg-options "-O2 -msse4.2 -mno-avx2" } */
 
 typedef int v4si __attribute__((vector_size(16)));
 typedef float v4sf __attribute__((vector_size(16)));