[x86] Add AVX512 support for STV of SI/DImode rotation by constant.

Message ID 035a01d9b2a4$f6078950$e2169bf0$@nextmovesoftware.com
State Unresolved
Headers
Series [x86] Add AVX512 support for STV of SI/DImode rotation by constant. |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Roger Sayle July 9, 2023, 8:35 p.m. UTC
  Following Uros' suggestion, this patch adds support for AVX512VL's
vpro[lr][dq] instructions to the recently added scalar-to-vector (STV)
enhancements to handle DImode and SImode rotations by a constant.

For the test cases:

unsigned long long rot1(unsigned long long x) {
  return (x>>1) | (x<<63);
}

void mem1(unsigned long long *p) {
  *p = rot1(*p);
}

with -m32 -O2 -mavx512vl, we currently generate:

rot1:   movl    4(%esp), %eax
        movl    8(%esp), %edx
        movl    %eax, %ecx
        shrdl   $1, %edx, %eax
        shrdl   $1, %ecx, %edx
        ret

mem1:   movl    4(%esp), %eax
        vmovq   (%eax), %xmm0
        vpshufd $20, %xmm0, %xmm0
        vpsrlq  $1, %xmm0, %xmm0
        vpshufd $136, %xmm0, %xmm0
        vmovq   %xmm0, (%eax)
        ret

with this patch, we now generate:

rot1:   vmovq   4(%esp), %xmm0
        vprorq  $1, %xmm0, %xmm0
        vmovd   %xmm0, %eax
        vpextrd $1, %xmm0, %edx
        ret

mem1:   movl    4(%esp), %eax
        vmovq   (%eax), %xmm0
        vprorq  $1, %xmm0, %xmm0
        vmovq   %xmm0, (%eax)
        ret


This patch has been tested on x86_64-pc-linux-gnu (cascadelake which has
avx512) with make bootstrap and make -k check, both with and without
--target_board=unix{-m32} with no new failures.  Ok for mainline?


2023-07-09  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
        * config/i386/i386-features.cc (compute_convert_gain): Tweak
        gains/costs for ROTATE/ROTATERT by integer constant on AVX512VL.
        (general_scalar_chain::convert_rotate): On TARGET_AVX512F generate
        avx512vl_rolv2di or avx412vl_rolv4si when appropriate.

gcc/testsuite/ChangeLog
        * gcc.target/i386/avx512vl-stv-rotatedi-1.c: New test case.


Cheers,
Roger
--
  

Comments

Uros Bizjak July 10, 2023, 6:18 a.m. UTC | #1
On Sun, Jul 9, 2023 at 10:35 PM Roger Sayle <roger@nextmovesoftware.com> wrote:
>
>
> Following Uros' suggestion, this patch adds support for AVX512VL's
> vpro[lr][dq] instructions to the recently added scalar-to-vector (STV)
> enhancements to handle DImode and SImode rotations by a constant.
>
> For the test cases:
>
> unsigned long long rot1(unsigned long long x) {
>   return (x>>1) | (x<<63);
> }
>
> void mem1(unsigned long long *p) {
>   *p = rot1(*p);
> }
>
> with -m32 -O2 -mavx512vl, we currently generate:
>
> rot1:   movl    4(%esp), %eax
>         movl    8(%esp), %edx
>         movl    %eax, %ecx
>         shrdl   $1, %edx, %eax
>         shrdl   $1, %ecx, %edx
>         ret
>
> mem1:   movl    4(%esp), %eax
>         vmovq   (%eax), %xmm0
>         vpshufd $20, %xmm0, %xmm0
>         vpsrlq  $1, %xmm0, %xmm0
>         vpshufd $136, %xmm0, %xmm0
>         vmovq   %xmm0, (%eax)
>         ret
>
> with this patch, we now generate:
>
> rot1:   vmovq   4(%esp), %xmm0
>         vprorq  $1, %xmm0, %xmm0
>         vmovd   %xmm0, %eax
>         vpextrd $1, %xmm0, %edx
>         ret
>
> mem1:   movl    4(%esp), %eax
>         vmovq   (%eax), %xmm0
>         vprorq  $1, %xmm0, %xmm0
>         vmovq   %xmm0, (%eax)
>         ret
>
>
> This patch has been tested on x86_64-pc-linux-gnu (cascadelake which has
> avx512) with make bootstrap and make -k check, both with and without
> --target_board=unix{-m32} with no new failures.  Ok for mainline?
>
>
> 2023-07-09  Roger Sayle  <roger@nextmovesoftware.com>
>
> gcc/ChangeLog
>         * config/i386/i386-features.cc (compute_convert_gain): Tweak
>         gains/costs for ROTATE/ROTATERT by integer constant on AVX512VL.
>         (general_scalar_chain::convert_rotate): On TARGET_AVX512F generate
>         avx512vl_rolv2di or avx412vl_rolv4si when appropriate.
>
> gcc/testsuite/ChangeLog
>         * gcc.target/i386/avx512vl-stv-rotatedi-1.c: New test case.

OK.

Thanks,
Uros.

>
>
> Cheers,
> Roger
> --
>
  

Patch

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 2e751d1..4d69251 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -585,7 +585,9 @@  general_scalar_chain::compute_convert_gain ()
 	  case ROTATE:
 	  case ROTATERT:
 	    igain += m * ix86_cost->shift_const;
-	    if (smode == DImode)
+	    if (TARGET_AVX512F)
+	      igain -= ix86_cost->sse_op;
+	    else if (smode == DImode)
 	      {
 		int bits = INTVAL (XEXP (src, 1));
 		if ((bits & 0x0f) == 0)
@@ -1225,6 +1227,8 @@  general_scalar_chain::convert_rotate (enum rtx_code code, rtx op0, rtx op1,
 	  emit_insn_before (pat, insn);
 	  result = gen_lowpart (V2DImode, tmp1);
 	}
+      else if (TARGET_AVX512F)
+	result = simplify_gen_binary (code, V2DImode, op0, op1);
       else if (bits == 16 || bits == 48)
 	{
 	  rtx tmp1 = gen_reg_rtx (V8HImode);
@@ -1269,6 +1273,8 @@  general_scalar_chain::convert_rotate (enum rtx_code code, rtx op0, rtx op1,
       emit_insn_before (pat, insn);
       result = gen_lowpart (V4SImode, tmp1);
     }
+  else if (TARGET_AVX512F)
+    result = simplify_gen_binary (code, V4SImode, op0, op1);
   else
     {
       if (code == ROTATE)
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-stv-rotatedi-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-stv-rotatedi-1.c
new file mode 100644
index 0000000..2f0ead8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-stv-rotatedi-1.c
@@ -0,0 +1,35 @@ 
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -mavx512vl" } */
+
+unsigned long long rot1(unsigned long long x) { return (x>>1) | (x<<63); }
+unsigned long long rot2(unsigned long long x) { return (x>>2) | (x<<62); }
+unsigned long long rot3(unsigned long long x) { return (x>>3) | (x<<61); }
+unsigned long long rot4(unsigned long long x) { return (x>>4) | (x<<60); }
+unsigned long long rot5(unsigned long long x) { return (x>>5) | (x<<59); }
+unsigned long long rot6(unsigned long long x) { return (x>>6) | (x<<58); }
+unsigned long long rot7(unsigned long long x) { return (x>>7) | (x<<57); }
+unsigned long long rot8(unsigned long long x) { return (x>>8) | (x<<56); }
+unsigned long long rot9(unsigned long long x) { return (x>>9) | (x<<55); }
+unsigned long long rot10(unsigned long long x) { return (x>>10) | (x<<54); }
+unsigned long long rot15(unsigned long long x) { return (x>>15) | (x<<49); }
+unsigned long long rot16(unsigned long long x) { return (x>>16) | (x<<48); }
+unsigned long long rot17(unsigned long long x) { return (x>>17) | (x<<47); }
+unsigned long long rot20(unsigned long long x) { return (x>>20) | (x<<44); }
+unsigned long long rot24(unsigned long long x) { return (x>>24) | (x<<40); }
+unsigned long long rot30(unsigned long long x) { return (x>>30) | (x<<34); }
+unsigned long long rot31(unsigned long long x) { return (x>>31) | (x<<33); }
+unsigned long long rot32(unsigned long long x) { return (x>>32) | (x<<32); }
+unsigned long long rot33(unsigned long long x) { return (x>>33) | (x<<31); }
+unsigned long long rot34(unsigned long long x) { return (x>>34) | (x<<30); }
+unsigned long long rot40(unsigned long long x) { return (x>>40) | (x<<24); }
+unsigned long long rot42(unsigned long long x) { return (x>>42) | (x<<22); }
+unsigned long long rot48(unsigned long long x) { return (x>>48) | (x<<16); }
+unsigned long long rot50(unsigned long long x) { return (x>>50) | (x<<14); }
+unsigned long long rot56(unsigned long long x) { return (x>>56) | (x<<8); }
+unsigned long long rot58(unsigned long long x) { return (x>>58) | (x<<6); }
+unsigned long long rot60(unsigned long long x) { return (x>>60) | (x<<4); }
+unsigned long long rot61(unsigned long long x) { return (x>>61) | (x<<3); }
+unsigned long long rot62(unsigned long long x) { return (x>>62) | (x<<2); }
+unsigned long long rot63(unsigned long long x) { return (x>>63) | (x<<1); }
+
+/* { dg-final { scan-assembler-times "vpro\[lr\]q" 29 } } */