i386: Prevent splitting to xmm16+ when !TARGET_AVX512VL

Message ID 20231020062050.971264-1-haochen.jiang@intel.com
State Accepted
Headers
Series i386: Prevent splitting to xmm16+ when !TARGET_AVX512VL |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Jiang, Haochen Oct. 20, 2023, 6:20 a.m. UTC
  Hi all,

Currently, there will be a chance in split to use x/ymm16+ w/o AVX512VL,
which finally leads to an ICE as pr111753 does.

This patch aims to fix that.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

Thx,
Haochen

gcc/ChangeLog:

	PR target/111753
	* config/i386/i386.cc (ix86_standard_x87sse_constant_load_p):
	Do not split to xmm16+ when !TARGET_AVX512VL.

gcc/testsuite/ChangeLog:

	PR target/111753
	* gcc.target/i386/pr111753.c: New test.
---
 gcc/config/i386/i386.cc                  |  3 ++
 gcc/testsuite/gcc.target/i386/pr111753.c | 69 ++++++++++++++++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr111753.c
  

Comments

liuhongt Oct. 20, 2023, 6:27 a.m. UTC | #1
> -----Original Message-----
> From: Jiang, Haochen <haochen.jiang@intel.com>
> Sent: Friday, October 20, 2023 2:21 PM
> To: gcc-patches@gcc.gnu.org
> Cc: ubizjak@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
> Subject: [PATCH] i386: Prevent splitting to xmm16+ when !TARGET_AVX512VL
> 
> Hi all,
> 
> Currently, there will be a chance in split to use x/ymm16+ w/o AVX512VL,
> which finally leads to an ICE as pr111753 does.
> 
> This patch aims to fix that.
> 
> Regtested on x86_64-pc-linux-gnu. Ok for trunk?
LGTM.
> 
> Thx,
> Haochen
> 
> gcc/ChangeLog:
> 
> 	PR target/111753
> 	* config/i386/i386.cc (ix86_standard_x87sse_constant_load_p):
> 	Do not split to xmm16+ when !TARGET_AVX512VL.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR target/111753
> 	* gcc.target/i386/pr111753.c: New test.
> ---
>  gcc/config/i386/i386.cc                  |  3 ++
>  gcc/testsuite/gcc.target/i386/pr111753.c | 69
> ++++++++++++++++++++++++
>  2 files changed, 72 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr111753.c
> 
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index
> 641e7680335..5f8c5eb98a2 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -5481,6 +5481,9 @@ ix86_standard_x87sse_constant_load_p (const
> rtx_insn *insn, rtx dst)
>    if (src == NULL
>        || (SSE_REGNO_P (REGNO (dst))
>  	  && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
> +      || (!TARGET_AVX512VL
> +	  && EXT_REX_SSE_REGNO_P (REGNO (dst))
> +	  && standard_sse_constant_p (src, GET_MODE (dst)) == 1)
>        || (STACK_REGNO_P (REGNO (dst))
>  	   && standard_80387_constant_p (src) < 1))
>      return false;
> diff --git a/gcc/testsuite/gcc.target/i386/pr111753.c
> b/gcc/testsuite/gcc.target/i386/pr111753.c
> new file mode 100644
> index 00000000000..16ceca6ddc6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr111753.c
> @@ -0,0 +1,69 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512bw -fno-tree-ter -Wno-div-by-zero" } */
> +
> +typedef int __attribute__((__vector_size__ (8))) v64u8; typedef char
> +__attribute__((__vector_size__ (16))) v128u8; typedef int
> +__attribute__((__vector_size__ (16))) v128u32; typedef int
> +__attribute__((__vector_size__ (32))) v256u8; typedef int
> +__attribute__((__vector_size__ (64))) v512u8; typedef short
> +__attribute__((__vector_size__ (4))) v32s16; typedef short
> +__attribute__((__vector_size__ (16))) v128s16; typedef short
> +__attribute__((__vector_size__ (32))) v256s16; typedef _Float16
> +__attribute__((__vector_size__ (16))) f16; typedef _Float32 f32;
> +typedef double __attribute__((__vector_size__ (64))) v512f64; typedef
> +_Decimal32 d32; typedef _Decimal64 __attribute__((__vector_size__
> +(32))) v256d64; typedef _Decimal64 __attribute__((__vector_size__
> +(64))) v512d64;
> +d32 foo0_d32_0, foo0_ret;
> +v256d64 foo0_v256d64_0;
> +v128s16 foo0_v128s16_0;
> +int foo0_v256d128_0;
> +
> +extern void bar(int);
> +
> +void
> +foo (v64u8, v128u8 v128u8_0, v128u8 v128s8_0,
> +     v256u8 v256u8_0, int v256s8_0, v512u8 v512u8_0, int v512s8_0,
> +     v256s16 v256s16_0,
> +     v512u8 v512s16_0,
> +     v128u32 v128u64_0,
> +     v128u32 v128s64_0,
> +     int, int, __int128 v128u128_0, __int128 v128s128_0, v128u32
> +v128f64_0) {
> +  v512d64 v512d64_0;
> +  v256u8 v256f32_0, v256d64_1 = foo0_v256d64_0 == foo0_d32_0;
> +  f32 f32_0;
> +  f16 v128f16_0;
> +  f32_0 /= 0;
> +  v128u8 v128u8_1 = v128u8_0 != 0;
> +  int v256d32_1;
> +  v256f32_0 /= 0;
> +  v32s16 v32s16_1 = __builtin_shufflevector ((v128s16) { }, v256s16_0,
> +5, 10);
> +  v512f64 v512f64_1 = __builtin_convertvector (v512d64_0, v512f64);
> +  v512u8 v512d128_1 = v512s16_0;
> +  v128s16 v128s16_2 =
> +    __builtin_shufflevector ((v32s16) { }, v32s16_1, 0, 3, 2, 1,
> +			     0, 0, 0, 3), v128s16_3 = foo0_v128s16_0 > 0;
> +  v128f16_0 /= 0;
> +  __int128 v128s128_1 = 0 == v128s128_0;
> +  v512u8 v512u8_r = v512u8_0 + v512s8_0 + (v512u8) v512f64_1 +
> +v512s16_0;
> +  v256u8 v256u8_r = ((union {
> +		      v512u8 a;
> +		      v256u8 b;}) v512u8_r).b +
> +    v256u8_0 + v256s8_0 + v256f32_0 + v256d32_1 +
> +    (v256u8) v256d64_1 + foo0_v256d128_0;
> +  v128u8 v128u8_r = ((union {
> +		      v256u8 a;
> +		      v128u8 b;}) v256u8_r).b +
> +    v128u8_0 + v128u8_1 + v128s8_0 + (v128u8) v128s16_2 +
> +    (v128u8) v128s16_3 + (v128u8) v128u64_0 + (v128u8) v128s64_0 +
> +    (v128u8) v128u128_0 + (v128u8) v128s128_1 +
> +    (v128u8) v128f16_0 + (v128u8) v128f64_0;
> +  bar (f32_0 + (int) foo0_d32_0);
> +  foo0_ret = ((union {
> +	       v64u8 a;
> +	       int b;}) ((union {
> +			  v128u8 a;
> +			  v64u8 b;}) v128u8_r).b).b;
> +}
> --
> 2.31.1
  

Patch

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 641e7680335..5f8c5eb98a2 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -5481,6 +5481,9 @@  ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
   if (src == NULL
       || (SSE_REGNO_P (REGNO (dst))
 	  && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
+      || (!TARGET_AVX512VL
+	  && EXT_REX_SSE_REGNO_P (REGNO (dst))
+	  && standard_sse_constant_p (src, GET_MODE (dst)) == 1)
       || (STACK_REGNO_P (REGNO (dst))
 	   && standard_80387_constant_p (src) < 1))
     return false;
diff --git a/gcc/testsuite/gcc.target/i386/pr111753.c b/gcc/testsuite/gcc.target/i386/pr111753.c
new file mode 100644
index 00000000000..16ceca6ddc6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr111753.c
@@ -0,0 +1,69 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512bw -fno-tree-ter -Wno-div-by-zero" } */
+
+typedef int __attribute__((__vector_size__ (8))) v64u8;
+typedef char __attribute__((__vector_size__ (16))) v128u8;
+typedef int __attribute__((__vector_size__ (16))) v128u32;
+typedef int __attribute__((__vector_size__ (32))) v256u8;
+typedef int __attribute__((__vector_size__ (64))) v512u8;
+typedef short __attribute__((__vector_size__ (4))) v32s16;
+typedef short __attribute__((__vector_size__ (16))) v128s16;
+typedef short __attribute__((__vector_size__ (32))) v256s16;
+typedef _Float16 __attribute__((__vector_size__ (16))) f16;
+typedef _Float32 f32;
+typedef double __attribute__((__vector_size__ (64))) v512f64;
+typedef _Decimal32 d32;
+typedef _Decimal64 __attribute__((__vector_size__ (32))) v256d64;
+typedef _Decimal64 __attribute__((__vector_size__ (64))) v512d64;
+d32 foo0_d32_0, foo0_ret;
+v256d64 foo0_v256d64_0;
+v128s16 foo0_v128s16_0;
+int foo0_v256d128_0;
+
+extern void bar(int);
+
+void
+foo (v64u8, v128u8 v128u8_0, v128u8 v128s8_0,
+     v256u8 v256u8_0, int v256s8_0, v512u8 v512u8_0, int v512s8_0,
+     v256s16 v256s16_0,
+     v512u8 v512s16_0,
+     v128u32 v128u64_0,
+     v128u32 v128s64_0,
+     int, int, __int128 v128u128_0, __int128 v128s128_0, v128u32 v128f64_0)
+{
+  v512d64 v512d64_0;
+  v256u8 v256f32_0, v256d64_1 = foo0_v256d64_0 == foo0_d32_0;
+  f32 f32_0;
+  f16 v128f16_0;
+  f32_0 /= 0;
+  v128u8 v128u8_1 = v128u8_0 != 0;
+  int v256d32_1;
+  v256f32_0 /= 0;
+  v32s16 v32s16_1 = __builtin_shufflevector ((v128s16) { }, v256s16_0, 5, 10);
+  v512f64 v512f64_1 = __builtin_convertvector (v512d64_0, v512f64);
+  v512u8 v512d128_1 = v512s16_0;
+  v128s16 v128s16_2 =
+    __builtin_shufflevector ((v32s16) { }, v32s16_1, 0, 3, 2, 1,
+			     0, 0, 0, 3), v128s16_3 = foo0_v128s16_0 > 0;
+  v128f16_0 /= 0;
+  __int128 v128s128_1 = 0 == v128s128_0;
+  v512u8 v512u8_r = v512u8_0 + v512s8_0 + (v512u8) v512f64_1 + v512s16_0;
+  v256u8 v256u8_r = ((union {
+		      v512u8 a;
+		      v256u8 b;}) v512u8_r).b +
+    v256u8_0 + v256s8_0 + v256f32_0 + v256d32_1 +
+    (v256u8) v256d64_1 + foo0_v256d128_0;
+  v128u8 v128u8_r = ((union {
+		      v256u8 a;
+		      v128u8 b;}) v256u8_r).b +
+    v128u8_0 + v128u8_1 + v128s8_0 + (v128u8) v128s16_2 +
+    (v128u8) v128s16_3 + (v128u8) v128u64_0 + (v128u8) v128s64_0 +
+    (v128u8) v128u128_0 + (v128u8) v128s128_1 +
+    (v128u8) v128f16_0 + (v128u8) v128f64_0;
+  bar (f32_0 + (int) foo0_d32_0);
+  foo0_ret = ((union {
+	       v64u8 a;
+	       int b;}) ((union {
+			  v128u8 a;
+			  v64u8 b;}) v128u8_r).b).b;
+}