[12/18] Disable zmm register and 512 bit libmvec call when !TARGET_EVEX512

Message ID 20230921072013.2124750-13-lin1.hu@intel.com
State Unresolved
Headers
Series Support -mevex512 for AVX512 |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Hu, Lin1 Sept. 21, 2023, 7:20 a.m. UTC
  From: Haochen Jiang <haochen.jiang@intel.com>

gcc/ChangeLog:

	* config/i386/i386-expand.cc (ix86_broadcast_from_constant):
	Disable zmm broadcast for !TARGET_EVEX512.
	* config/i386/i386-options.cc (ix86_option_override_internal):
	Do not use PVW_512 when no-evex512.
	(ix86_simd_clone_adjust): Add evex512 target into string.
	* config/i386/i386.cc (type_natural_mode): Report ABI warning
	when using zmm register w/o evex512.
	(ix86_return_in_memory): Do not allow zmm when !TARGET_EVEX512.
	(ix86_hard_regno_mode_ok): Ditto.
	(ix86_set_reg_reg_cost): Ditto.
	(ix86_rtx_costs): Ditto.
	(ix86_vector_mode_supported_p): Ditto.
	(ix86_preferred_simd_mode): Ditto.
	(ix86_get_mask_mode): Ditto.
	(ix86_simd_clone_compute_vecsize_and_simdlen): Disable 512 bit
	libmvec call when !TARGET_EVEX512.
	(ix86_simd_clone_usable): Ditto.
	* config/i386/i386.h (BIGGEST_ALIGNMENT): Disable 512 alignment
	when !TARGET_EVEX512
	(MOVE_MAX): Do not use PVW_512 when !TARGET_EVEX512.
	(STORE_MAX_PIECES): Ditto.
---
 gcc/config/i386/i386-expand.cc  |  1 +
 gcc/config/i386/i386-options.cc | 14 +++++----
 gcc/config/i386/i386.cc         | 53 ++++++++++++++++++---------------
 gcc/config/i386/i386.h          |  7 +++--
 4 files changed, 42 insertions(+), 33 deletions(-)
  

Patch

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index e42ff27c6ef..6eedcb384c0 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -611,6 +611,7 @@  ix86_broadcast_from_constant (machine_mode mode, rtx op)
      avx512 embed broadcast is available.  */
   if (GET_MODE_INNER (mode) == DImode && !TARGET_64BIT
       && (!TARGET_AVX512F
+	  || (GET_MODE_SIZE (mode) == 64 && !TARGET_EVEX512)
 	  || (GET_MODE_SIZE (mode) < 64 && !TARGET_AVX512VL)))
     return nullptr;
 
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index a1a7a92da9f..e2a90d7d9e2 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -2845,7 +2845,8 @@  ix86_option_override_internal (bool main_args_p,
 	  opts->x_ix86_move_max = opts->x_prefer_vector_width_type;
 	  if (opts_set->x_ix86_move_max == PVW_NONE)
 	    {
-	      if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
+	      if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
+		  && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
 		opts->x_ix86_move_max = PVW_AVX512;
 	      else
 		opts->x_ix86_move_max = PVW_AVX128;
@@ -2866,7 +2867,8 @@  ix86_option_override_internal (bool main_args_p,
 	  opts->x_ix86_store_max = opts->x_prefer_vector_width_type;
 	  if (opts_set->x_ix86_store_max == PVW_NONE)
 	    {
-	      if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
+	      if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
+		  && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
 		opts->x_ix86_store_max = PVW_AVX512;
 	      else
 		opts->x_ix86_store_max = PVW_AVX128;
@@ -3145,13 +3147,13 @@  ix86_simd_clone_adjust (struct cgraph_node *node)
     case 'e':
       if (TARGET_PREFER_AVX256)
 	{
-	  if (!TARGET_AVX512F)
-	    str = "avx512f,prefer-vector-width=512";
+	  if (!TARGET_AVX512F || !TARGET_EVEX512)
+	    str = "avx512f,evex512,prefer-vector-width=512";
 	  else
 	    str = "prefer-vector-width=512";
 	}
-      else if (!TARGET_AVX512F)
-	str = "avx512f";
+      else if (!TARGET_AVX512F || !TARGET_EVEX512)
+	str = "avx512f,evex512";
       break;
     default:
       gcc_unreachable ();
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 477e6cecc38..0df3bf10547 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -1924,7 +1924,8 @@  type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
 	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
 		&& GET_MODE_INNER (mode) == innermode)
 	      {
-		if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
+		if (size == 64 && (!TARGET_AVX512F || !TARGET_EVEX512)
+		    && !TARGET_IAMCU)
 		  {
 		    static bool warnedavx512f;
 		    static bool warnedavx512f_ret;
@@ -4347,7 +4348,7 @@  ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 
 	  /* AVX512F values are returned in ZMM0 if available.  */
 	  if (size == 64)
-	    return !TARGET_AVX512F;
+	    return !TARGET_AVX512F || !TARGET_EVEX512;
 	}
 
       if (mode == XFmode)
@@ -20286,7 +20287,7 @@  ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 	  - any of 512-bit wide vector mode
 	  - any scalar mode.  */
       if (TARGET_AVX512F
-	  && (VALID_AVX512F_REG_OR_XI_MODE (mode)
+	  && ((VALID_AVX512F_REG_OR_XI_MODE (mode) && TARGET_EVEX512)
 	      || VALID_AVX512F_SCALAR_MODE (mode)))
 	return true;
 
@@ -20538,7 +20539,7 @@  ix86_set_reg_reg_cost (machine_mode mode)
 
     case MODE_VECTOR_INT:
     case MODE_VECTOR_FLOAT:
-      if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
+      if ((TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
 	  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
 	  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
 	  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
@@ -21267,7 +21268,8 @@  ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	{
 	  /* (ior (not ...) ...) can be a single insn in AVX512.  */
 	  if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
-	      && (GET_MODE_SIZE (mode) == 64
+	      && ((TARGET_EVEX512
+		   && GET_MODE_SIZE (mode) == 64)
 		  || (TARGET_AVX512VL
 		      && (GET_MODE_SIZE (mode) == 32
 			  || GET_MODE_SIZE (mode) == 16))))
@@ -21315,7 +21317,8 @@  ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 
 	      /* (and (not ...) (not ...)) can be a single insn in AVX512.  */
 	      if (GET_CODE (right) == NOT && TARGET_AVX512F
-		  && (GET_MODE_SIZE (mode) == 64
+		  && ((TARGET_EVEX512
+		       && GET_MODE_SIZE (mode) == 64)
 		      || (TARGET_AVX512VL
 			  && (GET_MODE_SIZE (mode) == 32
 			      || GET_MODE_SIZE (mode) == 16))))
@@ -21385,7 +21388,8 @@  ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	{
 	  /* (not (xor ...)) can be a single insn in AVX512.  */
 	  if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
-	      && (GET_MODE_SIZE (mode) == 64
+	      && ((TARGET_EVEX512
+		   && GET_MODE_SIZE (mode) == 64)
 		  || (TARGET_AVX512VL
 		      && (GET_MODE_SIZE (mode) == 32
 			  || GET_MODE_SIZE (mode) == 16))))
@@ -23000,7 +23004,7 @@  ix86_vector_mode_supported_p (machine_mode mode)
     return true;
   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
     return true;
-  if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
+  if (TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
     return true;
   if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
       && VALID_MMX_REG_MODE (mode))
@@ -23690,7 +23694,7 @@  ix86_preferred_simd_mode (scalar_mode mode)
   switch (mode)
     {
     case E_QImode:
-      if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
 	return V64QImode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V32QImode;
@@ -23698,7 +23702,7 @@  ix86_preferred_simd_mode (scalar_mode mode)
 	return V16QImode;
 
     case E_HImode:
-      if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
 	return V32HImode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V16HImode;
@@ -23706,7 +23710,7 @@  ix86_preferred_simd_mode (scalar_mode mode)
 	return V8HImode;
 
     case E_SImode:
-      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
 	return V16SImode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V8SImode;
@@ -23714,7 +23718,7 @@  ix86_preferred_simd_mode (scalar_mode mode)
 	return V4SImode;
 
     case E_DImode:
-      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
 	return V8DImode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V4DImode;
@@ -23728,15 +23732,16 @@  ix86_preferred_simd_mode (scalar_mode mode)
 	    {
 	      if (TARGET_PREFER_AVX128)
 		return V8HFmode;
-	      else if (TARGET_PREFER_AVX256)
+	      else if (TARGET_PREFER_AVX256 || !TARGET_EVEX512)
 		return V16HFmode;
 	    }
-	  return V32HFmode;
+	  if (TARGET_EVEX512)
+	    return V32HFmode;
 	}
       return word_mode;
 
     case E_SFmode:
-      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
 	return V16SFmode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V8SFmode;
@@ -23744,7 +23749,7 @@  ix86_preferred_simd_mode (scalar_mode mode)
 	return V4SFmode;
 
     case E_DFmode:
-      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
 	return V8DFmode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V4DFmode;
@@ -23764,13 +23769,13 @@  ix86_preferred_simd_mode (scalar_mode mode)
 static unsigned int
 ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
 {
-  if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
+  if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
     {
       modes->safe_push (V64QImode);
       modes->safe_push (V32QImode);
       modes->safe_push (V16QImode);
     }
-  else if (TARGET_AVX512F && all)
+  else if (TARGET_AVX512F && TARGET_EVEX512 && all)
     {
       modes->safe_push (V32QImode);
       modes->safe_push (V16QImode);
@@ -23808,7 +23813,7 @@  ix86_get_mask_mode (machine_mode data_mode)
   unsigned elem_size = vector_size / nunits;
 
   /* Scalar mask case.  */
-  if ((TARGET_AVX512F && vector_size == 64)
+  if ((TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64)
       || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
     {
       if (elem_size == 4
@@ -24306,7 +24311,7 @@  ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
     {
       /* If the function isn't exported, we can pick up just one ISA
 	 for the clones.  */
-      if (TARGET_AVX512F)
+      if (TARGET_AVX512F && TARGET_EVEX512)
 	clonei->vecsize_mangle = 'e';
       else if (TARGET_AVX2)
 	clonei->vecsize_mangle = 'd';
@@ -24398,17 +24403,17 @@  ix86_simd_clone_usable (struct cgraph_node *node)
 	return -1;
       if (!TARGET_AVX)
 	return 0;
-      return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
+      return (TARGET_AVX512F && TARGET_EVEX512) ? 3 : TARGET_AVX2 ? 2 : 1;
     case 'c':
       if (!TARGET_AVX)
 	return -1;
-      return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
+      return (TARGET_AVX512F && TARGET_EVEX512) ? 2 : TARGET_AVX2 ? 1 : 0;
     case 'd':
       if (!TARGET_AVX2)
 	return -1;
-      return TARGET_AVX512F ? 1 : 0;
+      return (TARGET_AVX512F && TARGET_EVEX512) ? 1 : 0;
     case 'e':
-      if (!TARGET_AVX512F)
+      if (!TARGET_AVX512F || !TARGET_EVEX512)
 	return -1;
       return 0;
     default:
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 3e8488f2ae8..aac972f5caf 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -770,7 +770,8 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
    TARGET_ABSOLUTE_BIGGEST_ALIGNMENT.  */
 
 #define BIGGEST_ALIGNMENT \
-  (TARGET_IAMCU ? 32 : (TARGET_AVX512F ? 512 : (TARGET_AVX ? 256 : 128)))
+  (TARGET_IAMCU ? 32 : ((TARGET_AVX512F && TARGET_EVEX512) \
+			? 512 : (TARGET_AVX ? 256 : 128)))
 
 /* Maximum stack alignment.  */
 #define MAX_STACK_ALIGNMENT MAX_OFILE_ALIGNMENT
@@ -1807,7 +1808,7 @@  typedef struct ix86_args {
    MOVE_MAX_PIECES defaults to MOVE_MAX.  */
 
 #define MOVE_MAX \
-  ((TARGET_AVX512F \
+  ((TARGET_AVX512F && TARGET_EVEX512\
     && (ix86_move_max == PVW_AVX512 \
 	|| ix86_store_max == PVW_AVX512)) \
    ? 64 \
@@ -1826,7 +1827,7 @@  typedef struct ix86_args {
    store_by_pieces of 16/32/64 bytes.  */
 #define STORE_MAX_PIECES \
   (TARGET_INTER_UNIT_MOVES_TO_VEC \
-   ? ((TARGET_AVX512F && ix86_store_max == PVW_AVX512) \
+   ? ((TARGET_AVX512F && TARGET_EVEX512 && ix86_store_max == PVW_AVX512) \
       ? 64 \
       : ((TARGET_AVX \
 	  && ix86_store_max >= PVW_AVX256) \