[1/4,1/3] Change internal intrin call for AVX512 intrins

Message ID 20231031063703.2643896-2-haochen.jiang@intel.com
State Unresolved
Headers
Series Fix no-evex512 function attribute |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Jiang, Haochen Oct. 31, 2023, 6:37 a.m. UTC
  The newly added  _mm{,256}_avx512* intrins are duplicated from their
_mm{,256}_* forms from AVX2 or before. We need to add them to prevent target
option mismatch when calling AVX512 intrins implemented with these intrins
under no-evex512 function attribute. All AVX512 intrins calling those AVX2
intrins or before will change their calls to these newly added AVX512 version.

gcc/ChangeLog:

	* config/i386/avx512bitalgvlintrin.h: Change intrin call.
	* config/i386/avx512dqintrin.h: Ditto.
	* config/i386/avx512fintrin.h:
	(_mm_avx512_setzero_ps): New.
	(_mm_avx512_setzero_pd): Ditto.
	(__attribute__): Change intrin call.
	* config/i386/avx512fp16intrin.h: Ditto.
	* config/i386/avx512fp16vlintrin.h: Ditto.
	* config/i386/avx512vbmi2vlintrin.h: Ditto.
	* config/i386/avx512vbmivlintrin.h: Ditto.
	* config/i386/avx512vlbwintrin.h: Ditto.
	* config/i386/avx512vldqintrin.h: Ditto.
	* config/i386/avx512vlintrin.h
	(_mm_avx512_setzero_si128): New.
	(_mm256_avx512_setzero_pd): Ditto.
	(_mm256_avx512_setzero_ps): Ditto.
	(_mm256_avx512_setzero_si256): Ditto.
	(__attribute__): Change intrin call.
	* config/i386/avx512vpopcntdqvlintrin.h: Ditto.
	* config/i386/gfniintrin.h: Ditto.
---
 gcc/config/i386/avx512bitalgvlintrin.h    |    8 +-
 gcc/config/i386/avx512dqintrin.h          |   60 +-
 gcc/config/i386/avx512fintrin.h           |  209 ++--
 gcc/config/i386/avx512fp16intrin.h        |   24 +-
 gcc/config/i386/avx512fp16vlintrin.h      |  118 +--
 gcc/config/i386/avx512vbmi2vlintrin.h     |   72 +-
 gcc/config/i386/avx512vbmivlintrin.h      |    8 +-
 gcc/config/i386/avx512vlbwintrin.h        |  316 +++---
 gcc/config/i386/avx512vldqintrin.h        |  238 ++---
 gcc/config/i386/avx512vlintrin.h          | 1095 +++++++++++----------
 gcc/config/i386/avx512vpopcntdqvlintrin.h |    8 +-
 gcc/config/i386/gfniintrin.h              |   20 +-
 12 files changed, 1109 insertions(+), 1067 deletions(-)
  

Patch

diff --git a/gcc/config/i386/avx512bitalgvlintrin.h b/gcc/config/i386/avx512bitalgvlintrin.h
index 36d697dea8a..39301625601 100644
--- a/gcc/config/i386/avx512bitalgvlintrin.h
+++ b/gcc/config/i386/avx512bitalgvlintrin.h
@@ -49,7 +49,7 @@  _mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
 						(__v32qi)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						(__mmask32) __U);
 }
 
@@ -132,7 +132,7 @@  _mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
 						(__v16hi)
-						_mm256_setzero_si256 (),
+						_mm256_avx512_setzero_si256 (),
 						(__mmask16) __U);
 }
 
@@ -151,7 +151,7 @@  _mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
 							 (__v16qi)
-							 _mm_setzero_si128 (),
+							 _mm_avx512_setzero_si128 (),
 							 (__mmask16) __U);
 }
 extern __inline __m128i
@@ -169,7 +169,7 @@  _mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
 							(__v8hi)
-							_mm_setzero_si128 (),
+							_mm_avx512_setzero_si128 (),
 							(__mmask8) __U);
 }
 #ifdef __DISABLE_AVX512BITALGVL__
diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h
index b6a1d499e25..fb0aea70280 100644
--- a/gcc/config/i386/avx512dqintrin.h
+++ b/gcc/config/i386/avx512dqintrin.h
@@ -205,7 +205,7 @@  _mm_reduce_sd (__m128d __A, __m128d __B, int __C)
 {
   return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
 						 (__v2df) __B, __C,
-						 (__v2df) _mm_setzero_pd (),
+						 (__v2df) _mm_avx512_setzero_pd (),
 						 (__mmask8) -1);
 }
 
@@ -216,7 +216,7 @@  _mm_reduce_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
   return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
 						       (__v2df) __B, __C,
 						       (__v2df)
-						       _mm_setzero_pd (),
+						       _mm_avx512_setzero_pd (),
 						       (__mmask8) -1, __R);
 }
 
@@ -248,7 +248,7 @@  _mm_maskz_reduce_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
 {
   return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
 						 (__v2df) __B, __C,
-						 (__v2df) _mm_setzero_pd (),
+						 (__v2df) _mm_avx512_setzero_pd (),
 						 (__mmask8) __U);
 }
 
@@ -260,7 +260,7 @@  _mm_maskz_reduce_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
   return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
 						       (__v2df) __B, __C,
 						       (__v2df)
-						       _mm_setzero_pd (),
+						       _mm_avx512_setzero_pd (),
 						       __U, __R);
 }
 
@@ -270,7 +270,7 @@  _mm_reduce_ss (__m128 __A, __m128 __B, int __C)
 {
   return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
 						(__v4sf) __B, __C,
-						(__v4sf) _mm_setzero_ps (),
+						(__v4sf) _mm_avx512_setzero_ps (),
 						(__mmask8) -1);
 }
 
@@ -281,7 +281,7 @@  _mm_reduce_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
   return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
 						      (__v4sf) __B, __C,
 						      (__v4sf)
-						      _mm_setzero_ps (),
+						      _mm_avx512_setzero_ps (),
 						      (__mmask8) -1, __R);
 }
 
@@ -313,7 +313,7 @@  _mm_maskz_reduce_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
 {
   return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
 						(__v4sf) __B, __C,
-						(__v4sf) _mm_setzero_ps (),
+						(__v4sf) _mm_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -325,7 +325,7 @@  _mm_maskz_reduce_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
   return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
 						      (__v4sf) __B, __C,
 						      (__v4sf)
-						      _mm_setzero_ps (),
+						      _mm_avx512_setzero_ps (),
 						      __U, __R);
 }
 
@@ -336,7 +336,7 @@  _mm_range_sd (__m128d __A, __m128d __B, int __C)
   return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
 						   (__v2df) __B, __C,
 						   (__v2df)
-						   _mm_setzero_pd (),
+						   _mm_avx512_setzero_pd (),
 						   (__mmask8) -1,
 						   _MM_FROUND_CUR_DIRECTION);
 }
@@ -359,7 +359,7 @@  _mm_maskz_range_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
   return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
 						   (__v2df) __B, __C,
 						   (__v2df)
-						   _mm_setzero_pd (),
+						   _mm_avx512_setzero_pd (),
 						   (__mmask8) __U,
 						   _MM_FROUND_CUR_DIRECTION);
 }
@@ -371,7 +371,7 @@  _mm_range_ss (__m128 __A, __m128 __B, int __C)
   return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
 						  (__v4sf) __B, __C,
 						  (__v4sf)
-						  _mm_setzero_ps (),
+						  _mm_avx512_setzero_ps (),
 						  (__mmask8) -1,
 						  _MM_FROUND_CUR_DIRECTION);
 }
@@ -394,7 +394,7 @@  _mm_maskz_range_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
   return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
 						  (__v4sf) __B, __C,
 						  (__v4sf)
-						  _mm_setzero_ps (),
+						  _mm_avx512_setzero_ps (),
 						  (__mmask8) __U,
 						  _MM_FROUND_CUR_DIRECTION);
 }
@@ -406,7 +406,7 @@  _mm_range_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
   return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
 						   (__v2df) __B, __C,
 						   (__v2df)
-						   _mm_setzero_pd (),
+						   _mm_avx512_setzero_pd (),
 						   (__mmask8) -1, __R);
 }
 
@@ -429,7 +429,7 @@  _mm_maskz_range_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C,
   return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
 						   (__v2df) __B, __C,
 						   (__v2df)
-						   _mm_setzero_pd (),
+						   _mm_avx512_setzero_pd (),
 						   (__mmask8) __U, __R);
 }
 
@@ -440,7 +440,7 @@  _mm_range_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
   return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
 						  (__v4sf) __B, __C,
 						  (__v4sf)
-						  _mm_setzero_ps (),
+						  _mm_avx512_setzero_ps (),
 						  (__mmask8) -1, __R);
 }
 
@@ -463,7 +463,7 @@  _mm_maskz_range_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C,
   return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
 						  (__v4sf) __B, __C,
 						  (__v4sf)
-						  _mm_setzero_ps (),
+						  _mm_avx512_setzero_ps (),
 						  (__mmask8) __U, __R);
 }
 
@@ -506,7 +506,7 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
 
 #define _mm_range_sd(A, B, C)						 \
   ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
-    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), 	 \
+    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_avx512_setzero_pd (), 	 \
     (__mmask8) -1, _MM_FROUND_CUR_DIRECTION))
 
 #define _mm_mask_range_sd(W, U, A, B, C)				 \
@@ -516,12 +516,12 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
 
 #define _mm_maskz_range_sd(U, A, B, C)					 \
   ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
-    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), 	 \
+    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_avx512_setzero_pd (), 	 \
     (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
 
 #define _mm_range_ss(A, B, C)						\
   ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A),	\
-    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),		\
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (),		\
     (__mmask8) -1, _MM_FROUND_CUR_DIRECTION))
 
 #define _mm_mask_range_ss(W, U, A, B, C)				\
@@ -531,12 +531,12 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
 
 #define _mm_maskz_range_ss(U, A, B, C)					\
   ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A),	\
-    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),		\
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (),		\
     (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
 
 #define _mm_range_round_sd(A, B, C, R)					 \
   ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
-    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),		 \
+    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_avx512_setzero_pd (),		 \
     (__mmask8) -1, (R)))
 
 #define _mm_mask_range_round_sd(W, U, A, B, C, R)			 \
@@ -546,12 +546,12 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
 
 #define _mm_maskz_range_round_sd(U, A, B, C, R)				 \
   ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
-    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),		 \
+    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_avx512_setzero_pd (),		 \
     (__mmask8)(U), (R)))
 
 #define _mm_range_round_ss(A, B, C, R)					\
   ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A),	\
-    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),		\
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (),		\
     (__mmask8) -1, (R)))
 
 #define _mm_mask_range_round_ss(W, U, A, B, C, R)			\
@@ -561,7 +561,7 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
 
 #define _mm_maskz_range_round_ss(U, A, B, C, R)				\
   ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A),	\
-    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),		\
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (),		\
     (__mmask8)(U), (R)))
 
 #define _mm_fpclass_ss_mask(X, C)					\
@@ -581,7 +581,7 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
 					     (int) (C), (__mmask8) (U)))
 #define _mm_reduce_sd(A, B, C)						\
   ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A),	\
-    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),		\
+    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_avx512_setzero_pd (),		\
     (__mmask8)-1))
 
 #define _mm_mask_reduce_sd(W, U, A, B, C)				\
@@ -590,7 +590,7 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
 
 #define _mm_maskz_reduce_sd(U, A, B, C)					\
   ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A),	\
-    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),		\
+    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_avx512_setzero_pd (),		\
     (__mmask8)(U)))
 
 #define _mm_reduce_round_sd(A, B, C, R)				       \
@@ -604,12 +604,12 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
 
 #define _mm_maskz_reduce_round_sd(U, A, B, C, R)		       \
   ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
-    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),	       \
+    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_avx512_setzero_pd (),	       \
     (__mmask8)(U), (int)(R)))
 
 #define _mm_reduce_ss(A, B, C)						\
   ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A),		\
-    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),		\
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (),		\
     (__mmask8)-1))
 
 #define _mm_mask_reduce_ss(W, U, A, B, C)				\
@@ -618,7 +618,7 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
 
 #define _mm_maskz_reduce_ss(U, A, B, C)					\
   ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A),		\
-    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),		\
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (),		\
     (__mmask8)(U)))
 
 #define _mm_reduce_round_ss(A, B, C, R)				       \
@@ -632,7 +632,7 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
 
 #define _mm_maskz_reduce_round_ss(U, A, B, C, R)		       \
   ((__m128) __builtin_ia32_reducesd_mask_round ((__v4sf)(__m128)(A),   \
-    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),	       \
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (),	       \
     (__mmask8)(U), (int)(R)))
 
 #endif
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 85bf72d9fae..530be29eefa 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -54,6 +54,23 @@  typedef enum
   _MM_MANT_SIGN_nan		/* DEST = NaN if sign(SRC) = 1 */
 } _MM_MANTISSA_SIGN_ENUM;
 
+/* These _mm{,256}_avx512* intrins are duplicated from their _mm{,256}_* forms
+   from AVX2 or before.  We need to add them to prevent target option mismatch
+   when calling AVX512 intrins implemented with these intrins under no-evex512
+   function attribute.  All AVX512 intrins calling those AVX2 intrins or
+   before will change their calls to these AVX512 version.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avx512_setzero_ps (void)
+{
+  return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avx512_setzero_pd (void)
+{
+  return __extension__ (__m128d){ 0.0, 0.0 };
+}
+
 #ifdef __OPTIMIZE__
 extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -83,7 +100,7 @@  _mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
   return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
 						 (__v2df) __B,
 						 (__v2df)
-						 _mm_setzero_pd (),
+						 _mm_avx512_setzero_pd (),
 						 (__mmask8) __U, __R);
 }
 
@@ -115,7 +132,7 @@  _mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
   return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
 						 (__v4sf) __B,
 						 (__v4sf)
-						 _mm_setzero_ps (),
+						 _mm_avx512_setzero_ps (),
 						 (__mmask8) __U, __R);
 }
 
@@ -147,7 +164,7 @@  _mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
   return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
 						 (__v2df) __B,
 						 (__v2df)
-						 _mm_setzero_pd (),
+						 _mm_avx512_setzero_pd (),
 						 (__mmask8) __U, __R);
 }
 
@@ -179,7 +196,7 @@  _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
   return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
 						 (__v4sf) __B,
 						 (__v4sf)
-						 _mm_setzero_ps (),
+						 _mm_avx512_setzero_ps (),
 						 (__mmask8) __U, __R);
 }
 
@@ -191,7 +208,7 @@  _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
     (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
 
 #define _mm_maskz_add_round_sd(U, A, B, C)   \
-    (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
+    (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
 
 #define _mm_add_round_ss(A, B, C)            \
     (__m128)__builtin_ia32_addss_round(A, B, C)
@@ -200,7 +217,7 @@  _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
     (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
 
 #define _mm_maskz_add_round_ss(U, A, B, C)   \
-    (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
+    (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
 
 #define _mm_sub_round_sd(A, B, C)            \
     (__m128d)__builtin_ia32_subsd_round(A, B, C)
@@ -209,7 +226,7 @@  _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
     (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
 
 #define _mm_maskz_sub_round_sd(U, A, B, C)   \
-    (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
+    (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
 
 #define _mm_sub_round_ss(A, B, C)            \
     (__m128)__builtin_ia32_subss_round(A, B, C)
@@ -218,7 +235,7 @@  _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
     (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
 
 #define _mm_maskz_sub_round_ss(U, A, B, C)   \
-    (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
+    (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
 
 #endif
 
@@ -246,7 +263,7 @@  _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
 {
   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
 						(__v2df) __A,
-						(__v2df) _mm_setzero_ps (),
+						(__v2df) _mm_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -274,7 +291,7 @@  _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
 						(__v4sf) __A,
-						(__v4sf) _mm_setzero_ps (),
+						(__v4sf) _mm_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -302,7 +319,7 @@  _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
 {
   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
 						 (__v2df) __A,
-						 (__v2df) _mm_setzero_pd (),
+						 (__v2df) _mm_avx512_setzero_pd (),
 						 (__mmask8) __U);
 }
 
@@ -330,7 +347,7 @@  _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
 						(__v4sf) __A,
-						(__v4sf) _mm_setzero_ps (),
+						(__v4sf) _mm_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -342,7 +359,7 @@  _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
   return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
 						     (__v2df) __A,
 						     (__v2df)
-						     _mm_setzero_pd (),
+						     _mm_avx512_setzero_pd (),
 						     (__mmask8) -1, __R);
 }
 
@@ -364,7 +381,7 @@  _mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R)
   return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
 						     (__v2df) __A,
 						     (__v2df)
-						     _mm_setzero_pd (),
+						     _mm_avx512_setzero_pd (),
 						     (__mmask8) __U, __R);
 }
 
@@ -375,7 +392,7 @@  _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
   return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
 						    (__v4sf) __A,
 						    (__v4sf)
-						    _mm_setzero_ps (),
+						    _mm_avx512_setzero_ps (),
 						    (__mmask8) -1, __R);
 }
 
@@ -397,7 +414,7 @@  _mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
   return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
 						    (__v4sf) __A,
 						    (__v4sf)
-						    _mm_setzero_ps (),
+						    _mm_avx512_setzero_ps (),
 						    (__mmask8) __U, __R);
 }
 
@@ -429,7 +446,7 @@  _mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
   return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
 						 (__v2df) __B,
 						 (__v2df)
-						 _mm_setzero_pd (),
+						 _mm_avx512_setzero_pd (),
 						 (__mmask8) __U, __R);
 }
 
@@ -461,7 +478,7 @@  _mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
   return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
 						 (__v4sf) __B,
 						 (__v4sf)
-						 _mm_setzero_ps (),
+						 _mm_avx512_setzero_ps (),
 						 (__mmask8) __U, __R);
 }
 
@@ -493,7 +510,7 @@  _mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
   return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
 						 (__v2df) __B,
 						 (__v2df)
-						 _mm_setzero_pd (),
+						 _mm_avx512_setzero_pd (),
 						 (__mmask8) __U, __R);
 }
 
@@ -525,7 +542,7 @@  _mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
   return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
 						 (__v4sf) __B,
 						 (__v4sf)
-						 _mm_setzero_ps (),
+						 _mm_avx512_setzero_ps (),
 						 (__mmask8) __U, __R);
 }
 
@@ -536,7 +553,7 @@  _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
   return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
 						       (__v2df) __B,
 						       (__v2df)
-						       _mm_setzero_pd (),
+						       _mm_avx512_setzero_pd (),
 						       (__mmask8) -1, __R);
 }
 
@@ -559,7 +576,7 @@  _mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
   return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
 						       (__v2df) __B,
 						       (__v2df)
-						       _mm_setzero_pd (),
+						       _mm_avx512_setzero_pd (),
 						       (__mmask8) __U, __R);
 }
 
@@ -570,7 +587,7 @@  _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
   return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
 						      (__v4sf) __B,
 						      (__v4sf)
-						      _mm_setzero_ps (),
+						      _mm_avx512_setzero_ps (),
 						      (__mmask8) -1, __R);
 }
 
@@ -592,31 +609,31 @@  _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
   return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
 						      (__v4sf) __B,
 						      (__v4sf)
-						      _mm_setzero_ps (),
+						      _mm_avx512_setzero_ps (),
 						      (__mmask8) __U, __R);
 }
 #else
 #define _mm_sqrt_round_sd(A, B, C)	      \
     (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
-	(__v2df) _mm_setzero_pd (), -1, C)
+	(__v2df) _mm_avx512_setzero_pd (), -1, C)
 
 #define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
     (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C)
 
 #define _mm_maskz_sqrt_round_sd(U, A, B, C)   \
     (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
-	(__v2df) _mm_setzero_pd (), U, C)
+	(__v2df) _mm_avx512_setzero_pd (), U, C)
 
 #define _mm_sqrt_round_ss(A, B, C)	      \
     (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
-	(__v4sf) _mm_setzero_ps (), -1, C)
+	(__v4sf) _mm_avx512_setzero_ps (), -1, C)
 
 #define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
     (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C)
 
 #define _mm_maskz_sqrt_round_ss(U, A, B, C)   \
     (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
-	(__v4sf) _mm_setzero_ps (), U, C)
+	(__v4sf) _mm_avx512_setzero_ps (), U, C)
 
 #define _mm_mul_round_sd(A, B, C)            \
     (__m128d)__builtin_ia32_mulsd_round(A, B, C)
@@ -625,7 +642,7 @@  _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
     (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
 
 #define _mm_maskz_mul_round_sd(U, A, B, C)   \
-    (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
+    (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
 
 #define _mm_mul_round_ss(A, B, C)            \
     (__m128)__builtin_ia32_mulss_round(A, B, C)
@@ -634,7 +651,7 @@  _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
     (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
 
 #define _mm_maskz_mul_round_ss(U, A, B, C)   \
-    (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
+    (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
 
 #define _mm_div_round_sd(A, B, C)            \
     (__m128d)__builtin_ia32_divsd_round(A, B, C)
@@ -643,7 +660,7 @@  _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
     (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
 
 #define _mm_maskz_div_round_sd(U, A, B, C)   \
-    (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
+    (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
 
 #define _mm_div_round_ss(A, B, C)            \
     (__m128)__builtin_ia32_divss_round(A, B, C)
@@ -652,7 +669,7 @@  _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
     (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
 
 #define _mm_maskz_div_round_ss(U, A, B, C)   \
-    (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
+    (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
 
 #define _mm_scalef_round_sd(A, B, C)					\
   ((__m128d)								\
@@ -677,13 +694,13 @@  _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
 #define _mm_maskz_scalef_round_sd(U, A, B, C)				\
   ((__m128d)								\
    __builtin_ia32_scalefsd_mask_round ((A), (B),			\
-				       (__v2df) _mm_setzero_pd (),	\
+				       (__v2df) _mm_avx512_setzero_pd (),	\
 				       (U), (C)))
 
 #define _mm_maskz_scalef_round_ss(U, A, B, C)				\
   ((__m128)								\
    __builtin_ia32_scalefss_mask_round ((A), (B),			\
-				       (__v4sf) _mm_setzero_ps (),	\
+				       (__v4sf) _mm_avx512_setzero_ps (),	\
 				       (U), (C)))
 #endif
 
@@ -831,7 +848,7 @@  extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_load_ss (__mmask8 __U, const float *__P)
 {
-  return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) _mm_setzero_ps (),
+  return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) _mm_avx512_setzero_ps (),
 					      __U);
 }
 
@@ -846,7 +863,7 @@  extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_load_sd (__mmask8 __U, const double *__P)
 {
-  return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) _mm_setzero_pd (),
+  return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) _mm_avx512_setzero_pd (),
 					       __U);
 }
 
@@ -863,7 +880,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
-					      (__v4sf) _mm_setzero_ps (), __U);
+					      (__v4sf) _mm_avx512_setzero_ps (), __U);
 }
 
 extern __inline __m128d
@@ -879,7 +896,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
 {
   return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
-					       (__v2df) _mm_setzero_pd (),
+					       (__v2df) _mm_avx512_setzero_pd (),
 					       __U);
 }
 
@@ -1259,7 +1276,7 @@  _mm_maskz_cvt_roundsd_ss (__mmask8 __U, __m128 __A,
 {
   return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
 						      (__v2df) __B,
-						      _mm_setzero_ps (),
+						      _mm_avx512_setzero_ps (),
 						      __U,
 						      __R);
 }
@@ -1292,7 +1309,7 @@  _mm_maskz_cvt_roundss_sd (__mmask8 __U, __m128d __A,
 {
   return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
 						       (__v4sf) __B,
-						       _mm_setzero_pd (),
+						       _mm_avx512_setzero_pd (),
 						       __U,
 						       __R);
 }
@@ -1325,7 +1342,7 @@  _mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
   return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
 						 (__v4sf) __B,
 						 (__v4sf)
-						 _mm_setzero_ps (),
+						 _mm_avx512_setzero_ps (),
 						 (__mmask8) __U, __R);
 }
 
@@ -1357,7 +1374,7 @@  _mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
   return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
 						 (__v2df) __B,
 						 (__v2df)
-						 _mm_setzero_pd (),
+						 _mm_avx512_setzero_pd (),
 						 (__mmask8) __U, __R);
 }
 
@@ -1396,7 +1413,7 @@  _mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
 							(__v2df) __B,
 						        (__D << 2) | __C,
                                                         (__v2df)
-                                                        _mm_setzero_pd(),
+                                                        _mm_avx512_setzero_pd(),
 						        __U, __R);
 }
 
@@ -1435,7 +1452,7 @@  _mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
 							(__v4sf) __B,
 						        (__D << 2) | __C,
                                                         (__v4sf)
-                                                        _mm_setzero_ps(),
+                                                        _mm_avx512_setzero_ps(),
 						        __U, __R);
 }
 
@@ -1448,7 +1465,7 @@  _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm,
     __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
 					  (__v4sf) __B, __imm,
 					  (__v4sf)
-					  _mm_setzero_ps (),
+					  _mm_avx512_setzero_ps (),
 					  (__mmask8) -1,
 					  __R);
 }
@@ -1475,7 +1492,7 @@  _mm_maskz_roundscale_round_ss (__mmask8 __A, __m128 __B, __m128 __C,
     __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
 					  (__v4sf) __C, __imm,
 					  (__v4sf)
-					  _mm_setzero_ps (),
+					  _mm_avx512_setzero_ps (),
 					  (__mmask8) __A,
 					  __R);
 }
@@ -1489,7 +1506,7 @@  _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
     __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
 					  (__v2df) __B, __imm,
 					  (__v2df)
-					  _mm_setzero_pd (),
+					  _mm_avx512_setzero_pd (),
 					  (__mmask8) -1,
 					  __R);
 }
@@ -1516,7 +1533,7 @@  _mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
     __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
 					  (__v2df) __C, __imm,
 					  (__v2df)
-					  _mm_setzero_pd (),
+					  _mm_avx512_setzero_pd (),
 					  (__mmask8) __A,
 					  __R);
 }
@@ -1547,7 +1564,7 @@  _mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
     (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), (W), (U), (C))
 
 #define _mm_maskz_cvt_roundsd_ss(U, A, B, C)	\
-    (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), _mm_setzero_ps (), \
+    (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), _mm_avx512_setzero_ps (), \
 						(U), (C))
 
 #define _mm_cvt_roundss_sd(A, B, C)		 \
@@ -1557,7 +1574,7 @@  _mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
     (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), (W), (U), (C))
 
 #define _mm_maskz_cvt_roundss_sd(U, A, B, C)	\
-    (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), _mm_setzero_pd (), \
+    (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), _mm_avx512_setzero_pd (), \
 						 (U), (C))
 
 #define _mm_getmant_round_sd(X, Y, C, D, R)                                                  \
@@ -1578,7 +1595,7 @@  _mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
   ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X),                  \
                                                  (__v2df)(__m128d)(Y),                  \
                                              (int)(((D)<<2) | (C)),              \
-                                             (__v2df)(__m128d)_mm_setzero_pd(),  \
+                                             (__v2df)(__m128d)_mm_avx512_setzero_pd(),  \
                                              (__mmask8)(U),\
 					     (R)))
 
@@ -1600,7 +1617,7 @@  _mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
   ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X),                  \
                                                  (__v4sf)(__m128)(Y),                  \
                                              (int)(((D)<<2) | (C)),              \
-                                             (__v4sf)(__m128)_mm_setzero_ps(),  \
+                                             (__v4sf)(__m128)_mm_avx512_setzero_ps(),  \
                                              (__mmask8)(U),\
 					     (R)))
 
@@ -1611,7 +1628,7 @@  _mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
     (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C)
 
 #define _mm_maskz_getexp_round_ss(U, A, B, C)   \
-    (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
+    (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
 
 #define _mm_getexp_round_sd(A, B, R)						       \
   ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
@@ -1620,14 +1637,14 @@  _mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
     (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C)
 
 #define _mm_maskz_getexp_round_sd(U, A, B, C)   \
-    (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
+    (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
 
 #define _mm_roundscale_round_ss(A, B, I, R)				\
   ((__m128)								\
    __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A),		\
 					 (__v4sf) (__m128) (B),		\
 					 (int) (I),			\
-					 (__v4sf) _mm_setzero_ps (),	\
+					 (__v4sf) _mm_avx512_setzero_ps (),	\
 					 (__mmask8) (-1),		\
 					 (int) (R)))
 #define _mm_mask_roundscale_round_ss(A, U, B, C, I, R)		\
@@ -1643,7 +1660,7 @@  _mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
    __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A),		\
 					 (__v4sf) (__m128) (B),		\
 					 (int) (I),			\
-					 (__v4sf) _mm_setzero_ps (),	\
+					 (__v4sf) _mm_avx512_setzero_ps (),	\
 					 (__mmask8) (U),		\
 					 (int) (R)))
 #define _mm_roundscale_round_sd(A, B, I, R)				\
@@ -1651,7 +1668,7 @@  _mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
    __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A),	\
 					 (__v2df) (__m128d) (B),	\
 					 (int) (I),			\
-					 (__v2df) _mm_setzero_pd (),	\
+					 (__v2df) _mm_avx512_setzero_pd (),	\
 					 (__mmask8) (-1),		\
 					 (int) (R)))
 #define _mm_mask_roundscale_round_sd(A, U, B, C, I, R)			\
@@ -1667,7 +1684,7 @@  _mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
    __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A),	\
 					 (__v2df) (__m128d) (B),	\
 					 (int) (I),			\
-					 (__v2df) _mm_setzero_pd (),	\
+					 (__v2df) _mm_avx512_setzero_pd (),	\
 					 (__mmask8) (U),		\
 					 (int) (R)))
 
@@ -1900,7 +1917,7 @@  _mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
   return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
 						 (__v2df) __B,
 						 (__v2df)
-						 _mm_setzero_pd (),
+						 _mm_avx512_setzero_pd (),
 						 (__mmask8) __U, __R);
 }
 
@@ -1932,7 +1949,7 @@  _mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
   return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
 						 (__v4sf) __B,
 						 (__v4sf)
-						 _mm_setzero_ps (),
+						 _mm_avx512_setzero_ps (),
 						 (__mmask8) __U, __R);
 }
 
@@ -1964,7 +1981,7 @@  _mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
   return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
 						 (__v2df) __B,
 						 (__v2df)
-						 _mm_setzero_pd (),
+						 _mm_avx512_setzero_pd (),
 						 (__mmask8) __U, __R);
 }
 
@@ -1996,7 +2013,7 @@  _mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
   return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
 						 (__v4sf) __B,
 						 (__v4sf)
-						 _mm_setzero_ps (),
+						 _mm_avx512_setzero_ps (),
 						 (__mmask8) __U, __R);
 }
 
@@ -2008,7 +2025,7 @@  _mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
     (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
 
 #define _mm_maskz_max_round_sd(U, A, B, C)   \
-    (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
+    (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
 
 #define _mm_max_round_ss(A, B, C)            \
     (__m128)__builtin_ia32_maxss_round(A, B, C)
@@ -2017,7 +2034,7 @@  _mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
     (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
 
 #define _mm_maskz_max_round_ss(U, A, B, C)   \
-    (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
+    (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
 
 #define _mm_min_round_sd(A, B, C)            \
     (__m128d)__builtin_ia32_minsd_round(A, B, C)
@@ -2026,7 +2043,7 @@  _mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
     (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
 
 #define _mm_maskz_min_round_sd(U, A, B, C)   \
-    (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
+    (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
 
 #define _mm_min_round_ss(A, B, C)            \
     (__m128)__builtin_ia32_minss_round(A, B, C)
@@ -2035,7 +2052,7 @@  _mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
     (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
 
 #define _mm_maskz_min_round_ss(U, A, B, C)   \
-    (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
+    (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
 
 #endif
 
@@ -2786,7 +2803,7 @@  _mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
 						(__v2df) __B,
 						(__v2df)
-						_mm_setzero_pd (),
+						_mm_avx512_setzero_pd (),
 						(__mmask8) __U,
 						_MM_FROUND_CUR_DIRECTION);
 }
@@ -2809,7 +2826,7 @@  _mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
 						(__v4sf) __B,
 						(__v4sf)
-						_mm_setzero_ps (),
+						_mm_avx512_setzero_ps (),
 						(__mmask8) __U,
 						_MM_FROUND_CUR_DIRECTION);
 }
@@ -2832,7 +2849,7 @@  _mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
 						(__v2df) __B,
 						(__v2df)
-						_mm_setzero_pd (),
+						_mm_avx512_setzero_pd (),
 						(__mmask8) __U,
 						_MM_FROUND_CUR_DIRECTION);
 }
@@ -2855,7 +2872,7 @@  _mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
 						(__v4sf) __B,
 						(__v4sf)
-						_mm_setzero_ps (),
+						_mm_avx512_setzero_ps (),
 						(__mmask8) __U,
 						_MM_FROUND_CUR_DIRECTION);
 }
@@ -2879,7 +2896,7 @@  _mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
 						 (__v2df) __B,
 						 (__v2df)
-						 _mm_setzero_pd (),
+						 _mm_avx512_setzero_pd (),
 						 (__mmask8) __U,
 						  _MM_FROUND_CUR_DIRECTION);
 }
@@ -2903,7 +2920,7 @@  _mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
 						 (__v4sf) __B,
 						 (__v4sf)
-						 _mm_setzero_ps (),
+						 _mm_avx512_setzero_ps (),
 						 (__mmask8) __U,
 						  _MM_FROUND_CUR_DIRECTION);
 }
@@ -2927,7 +2944,7 @@  _mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
 						 (__v2df) __B,
 						 (__v2df)
-						 _mm_setzero_pd (),
+						 _mm_avx512_setzero_pd (),
 						 (__mmask8) __U,
 						  _MM_FROUND_CUR_DIRECTION);
 }
@@ -2951,7 +2968,7 @@  _mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
 						 (__v4sf) __B,
 						 (__v4sf)
-						 _mm_setzero_ps (),
+						 _mm_avx512_setzero_ps (),
 						 (__mmask8) __U,
 						  _MM_FROUND_CUR_DIRECTION);
 }
@@ -2974,7 +2991,7 @@  _mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
 						 (__v2df) __B,
 						 (__v2df)
-						 _mm_setzero_pd (),
+						 _mm_avx512_setzero_pd (),
 						 (__mmask8) __U,
 						 _MM_FROUND_CUR_DIRECTION);
 }
@@ -2997,7 +3014,7 @@  _mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
 						(__v4sf) __B,
 						(__v4sf)
-						_mm_setzero_ps (),
+						_mm_avx512_setzero_ps (),
 						(__mmask8) __U,
 						_MM_FROUND_CUR_DIRECTION);
 }
@@ -3020,7 +3037,7 @@  _mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
 						 (__v2df) __B,
 						 (__v2df)
-						 _mm_setzero_pd (),
+						 _mm_avx512_setzero_pd (),
 						 (__mmask8) __U,
 						 _MM_FROUND_CUR_DIRECTION);
 }
@@ -3043,7 +3060,7 @@  _mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
 						(__v4sf) __B,
 						(__v4sf)
-						_mm_setzero_ps (),
+						_mm_avx512_setzero_ps (),
 						(__mmask8) __U,
 						_MM_FROUND_CUR_DIRECTION);
 }
@@ -3055,7 +3072,7 @@  _mm_scalef_sd (__m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
 						    (__v2df) __B,
 						    (__v2df)
-						    _mm_setzero_pd (),
+						    _mm_avx512_setzero_pd (),
 						    (__mmask8) -1,
 						    _MM_FROUND_CUR_DIRECTION);
 }
@@ -3067,7 +3084,7 @@  _mm_scalef_ss (__m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
 						   (__v4sf) __B,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) -1,
 						   _MM_FROUND_CUR_DIRECTION);
 }
@@ -3391,7 +3408,7 @@  _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
 						(__v4sf) __B,
 						(__v4sf)
-						_mm_setzero_ps (),
+						_mm_avx512_setzero_ps (),
 						(__mmask8) __U,
 						_MM_FROUND_CUR_DIRECTION);
 }
@@ -3423,7 +3440,7 @@  _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
 						(__v2df) __B,
 						(__v2df)
-						_mm_setzero_pd (),
+						_mm_avx512_setzero_pd (),
 						(__mmask8) __U,
 						_MM_FROUND_CUR_DIRECTION);
 }
@@ -3461,7 +3478,7 @@  _mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
                                                         (__v2df) __B,
 						        (__D << 2) | __C,
                                                         (__v2df)
-							_mm_setzero_pd(),
+							_mm_avx512_setzero_pd(),
 						        __U,
 						     _MM_FROUND_CUR_DIRECTION);
 }
@@ -3499,7 +3516,7 @@  _mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
                                                         (__v4sf) __B,
 						        (__D << 2) | __C,
                                                         (__v4sf)
-							_mm_setzero_ps(),
+							_mm_avx512_setzero_ps(),
 						        __U,
 						     _MM_FROUND_CUR_DIRECTION);
 }
@@ -3512,7 +3529,7 @@  _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
     __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
 					  (__v4sf) __B, __imm,
 					  (__v4sf)
-					  _mm_setzero_ps (),
+					  _mm_avx512_setzero_ps (),
 					  (__mmask8) -1,
 					  _MM_FROUND_CUR_DIRECTION);
 }
@@ -3540,7 +3557,7 @@  _mm_maskz_roundscale_ss (__mmask8 __A, __m128 __B, __m128 __C,
     __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
 					  (__v4sf) __C, __imm,
 					  (__v4sf)
-					  _mm_setzero_ps (),
+					  _mm_avx512_setzero_ps (),
 					  (__mmask8) __A,
 					  _MM_FROUND_CUR_DIRECTION);
 }
@@ -3553,7 +3570,7 @@  _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
     __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
 					  (__v2df) __B, __imm,
 					  (__v2df)
-					  _mm_setzero_pd (),
+					  _mm_avx512_setzero_pd (),
 					  (__mmask8) -1,
 					  _MM_FROUND_CUR_DIRECTION);
 }
@@ -3580,7 +3597,7 @@  _mm_maskz_roundscale_sd (__mmask8 __A, __m128d __B, __m128d __C,
     __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
 					  (__v2df) __C, __imm,
 					  (__v2df)
-					  _mm_setzero_pd (),
+					  _mm_avx512_setzero_pd (),
 					  (__mmask8) __A,
 					  _MM_FROUND_CUR_DIRECTION);
 }
@@ -3644,7 +3661,7 @@  _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
   ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X),                 \
                                            (__v2df)(__m128d)(Y),                     \
                                               (int)(((D)<<2) | (C)),                \
-                                           (__v2df)_mm_setzero_pd(),             \
+                                           (__v2df)_mm_avx512_setzero_pd(),             \
                                               (__mmask8)(U),\
 					      _MM_FROUND_CUR_DIRECTION))
 
@@ -3666,7 +3683,7 @@  _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
   ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X),                 \
                                            (__v4sf)(__m128)(Y),                     \
                                               (int)(((D)<<2) | (C)),                \
-                                           (__v4sf)_mm_setzero_ps(),             \
+                                           (__v4sf)_mm_avx512_setzero_ps(),             \
                                               (__mmask8)(U),\
 					      _MM_FROUND_CUR_DIRECTION))
 
@@ -3679,7 +3696,7 @@  _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
                                              _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_maskz_getexp_ss(U, A, B)   \
-    (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U,\
+    (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U,\
 					      _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_getexp_sd(A, B)						       \
@@ -3691,7 +3708,7 @@  _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
                                              _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_maskz_getexp_sd(U, A, B)   \
-    (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U,\
+    (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U,\
 					      _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_roundscale_ss(A, B, I)					\
@@ -3699,7 +3716,7 @@  _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
    __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A),		\
 					 (__v4sf) (__m128) (B),		\
 					 (int) (I),			\
-					 (__v4sf) _mm_setzero_ps (),	\
+					 (__v4sf) _mm_avx512_setzero_ps (),	\
 					 (__mmask8) (-1),		\
 					 _MM_FROUND_CUR_DIRECTION))
 #define _mm_mask_roundscale_ss(A, U, B, C, I)				\
@@ -3715,7 +3732,7 @@  _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
    __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A),		\
 					 (__v4sf) (__m128) (B),		\
 					 (int) (I),			\
-					 (__v4sf) _mm_setzero_ps (),	\
+					 (__v4sf) _mm_avx512_setzero_ps (),	\
 					 (__mmask8) (U),		\
 					 _MM_FROUND_CUR_DIRECTION))
 #define _mm_roundscale_sd(A, B, I)					\
@@ -3723,7 +3740,7 @@  _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
    __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A),	\
 					 (__v2df) (__m128d) (B),	\
 					 (int) (I),			\
-					 (__v2df) _mm_setzero_pd (),	\
+					 (__v2df) _mm_avx512_setzero_pd (),	\
 					 (__mmask8) (-1),		\
 					 _MM_FROUND_CUR_DIRECTION))
 #define _mm_mask_roundscale_sd(A, U, B, C, I)				\
@@ -3739,7 +3756,7 @@  _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
    __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A),	\
 					 (__v2df) (__m128d) (B),	\
 					 (int) (I),			\
-					 (__v2df) _mm_setzero_pd (),	\
+					 (__v2df) _mm_avx512_setzero_pd (),	\
 					 (__mmask8) (U),		\
 					 _MM_FROUND_CUR_DIRECTION))
 
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index 92c0c24e9bd..0ed83770d6b 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -1747,7 +1747,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtsh_ss (__m128 __A, __m128h __B)
 {
   return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
-					      _mm_setzero_ps (),
+					      _mm_avx512_setzero_ps (),
 					      (__mmask8) -1,
 					      _MM_FROUND_CUR_DIRECTION);
 }
@@ -1767,7 +1767,7 @@  _mm_maskz_cvtsh_ss (__mmask8 __A, __m128 __B,
 			  __m128h __C)
 {
   return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
-					      _mm_setzero_ps (),
+					      _mm_avx512_setzero_ps (),
 					      __A, _MM_FROUND_CUR_DIRECTION);
 }
 
@@ -1776,7 +1776,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtsh_sd (__m128d __A, __m128h __B)
 {
   return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
-					      _mm_setzero_pd (),
+					      _mm_avx512_setzero_pd (),
 					      (__mmask8) -1,
 					      _MM_FROUND_CUR_DIRECTION);
 }
@@ -1795,7 +1795,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_cvtsh_sd (__mmask8 __A, __m128d __B, __m128h __C)
 {
   return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
-					      _mm_setzero_pd (),
+					      _mm_avx512_setzero_pd (),
 					      __A, _MM_FROUND_CUR_DIRECTION);
 }
 
@@ -1805,7 +1805,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvt_roundsh_ss (__m128 __A, __m128h __B, const int __R)
 {
   return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
-					      _mm_setzero_ps (),
+					      _mm_avx512_setzero_ps (),
 					      (__mmask8) -1, __R);
 }
 
@@ -1823,7 +1823,7 @@  _mm_maskz_cvt_roundsh_ss (__mmask8 __A, __m128 __B,
 			  __m128h __C, const int __R)
 {
   return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
-					      _mm_setzero_ps (),
+					      _mm_avx512_setzero_ps (),
 					      __A, __R);
 }
 
@@ -1832,7 +1832,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvt_roundsh_sd (__m128d __A, __m128h __B, const int __R)
 {
   return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
-					      _mm_setzero_pd (),
+					      _mm_avx512_setzero_pd (),
 					      (__mmask8) -1, __R);
 }
 
@@ -1849,14 +1849,14 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R)
 {
   return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
-					      _mm_setzero_pd (),
+					      _mm_avx512_setzero_pd (),
 					      __A, __R);
 }
 
 #else
 #define _mm_cvt_roundsh_ss(A, B, R)				\
   (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A),		\
-					_mm_setzero_ps (),	\
+					_mm_avx512_setzero_ps (),	\
 					(__mmask8) -1, (R)))
 
 #define _mm_mask_cvt_roundsh_ss(A, B, C, D, R)				\
@@ -1864,12 +1864,12 @@  _mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R)
 
 #define _mm_maskz_cvt_roundsh_ss(A, B, C, R)			\
   (__builtin_ia32_vcvtsh2ss_mask_round ((C), (B),		\
-					_mm_setzero_ps (),	\
+					_mm_avx512_setzero_ps (),	\
 					(A), (R)))
 
 #define _mm_cvt_roundsh_sd(A, B, R)				\
   (__builtin_ia32_vcvtsh2sd_mask_round ((B), (A),		\
-					_mm_setzero_pd (),	\
+					_mm_avx512_setzero_pd (),	\
 					(__mmask8) -1, (R)))
 
 #define _mm_mask_cvt_roundsh_sd(A, B, C, D, R)				\
@@ -1877,7 +1877,7 @@  _mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R)
 
 #define _mm_maskz_cvt_roundsh_sd(A, B, C, R)			\
   (__builtin_ia32_vcvtsh2sd_mask_round ((C), (B),		\
-					_mm_setzero_pd (),	\
+					_mm_avx512_setzero_pd (),	\
 					(A), (R)))
 
 #endif /* __OPTIMIZE__ */
diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
index 308b0b26850..1d772aefd95 100644
--- a/gcc/config/i386/avx512fp16vlintrin.h
+++ b/gcc/config/i386/avx512fp16vlintrin.h
@@ -147,7 +147,7 @@  extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_zextph128_ph256 (__m128h __A)
 {
-  return (__m256h) _mm256_insertf128_ps (_mm256_setzero_ps (),
+  return (__m256h) _mm256_insertf128_ps (_mm256_avx512_setzero_ps (),
 					 (__m128) __A, 0);
 }
 
@@ -175,7 +175,7 @@  _mm256_maskz_conj_pch (__mmask8 __U, __m256h __A)
   return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf)
 						   _mm256_conj_pch (__A),
 						  (__v8sf)
-						   _mm256_setzero_ps (),
+						   _mm256_avx512_setzero_ps (),
 						  (__mmask8) __U);
 }
 
@@ -200,7 +200,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_conj_pch (__mmask8 __U, __m128h __A)
 {
   return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A),
-						  (__v4sf) _mm_setzero_ps (),
+						  (__v4sf) _mm_avx512_setzero_ps (),
 						  (__mmask8) __U);
 }
 
@@ -1124,7 +1124,7 @@  _mm_cvtph_epi32 (__m128h __A)
   return (__m128i)
     __builtin_ia32_vcvtph2dq128_mask (__A,
 				      (__v4si)
-				      _mm_setzero_si128 (),
+				      _mm_avx512_setzero_si128 (),
 				      (__mmask8) -1);
 }
 
@@ -1142,7 +1142,7 @@  _mm_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B)
 {
   return (__m128i)
     __builtin_ia32_vcvtph2dq128_mask (__B,
-				      (__v4si) _mm_setzero_si128 (),
+				      (__v4si) _mm_avx512_setzero_si128 (),
 				      __A);
 }
 
@@ -1153,7 +1153,7 @@  _mm256_cvtph_epi32 (__m128h __A)
   return (__m256i)
     __builtin_ia32_vcvtph2dq256_mask (__A,
 				      (__v8si)
-				      _mm256_setzero_si256 (),
+				      _mm256_avx512_setzero_si256 (),
 				      (__mmask8) -1);
 }
 
@@ -1172,7 +1172,7 @@  _mm256_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B)
   return (__m256i)
     __builtin_ia32_vcvtph2dq256_mask (__B,
 				      (__v8si)
-				      _mm256_setzero_si256 (),
+				      _mm256_avx512_setzero_si256 (),
 				      __A);
 }
 
@@ -1184,7 +1184,7 @@  _mm_cvtph_epu32 (__m128h __A)
   return (__m128i)
     __builtin_ia32_vcvtph2udq128_mask (__A,
 				       (__v4si)
-				       _mm_setzero_si128 (),
+				       _mm_avx512_setzero_si128 (),
 				       (__mmask8) -1);
 }
 
@@ -1203,7 +1203,7 @@  _mm_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B)
   return (__m128i)
     __builtin_ia32_vcvtph2udq128_mask (__B,
 				       (__v4si)
-				       _mm_setzero_si128 (),
+				       _mm_avx512_setzero_si128 (),
 				       __A);
 }
 
@@ -1214,7 +1214,7 @@  _mm256_cvtph_epu32 (__m128h __A)
   return (__m256i)
     __builtin_ia32_vcvtph2udq256_mask (__A,
 				       (__v8si)
-				       _mm256_setzero_si256 (),
+				       _mm256_avx512_setzero_si256 (),
 				       (__mmask8) -1);
 }
 
@@ -1232,7 +1232,7 @@  _mm256_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B)
 {
   return (__m256i)
     __builtin_ia32_vcvtph2udq256_mask (__B,
-				       (__v8si) _mm256_setzero_si256 (),
+				       (__v8si) _mm256_avx512_setzero_si256 (),
 				       __A);
 }
 
@@ -1243,7 +1243,7 @@  _mm_cvttph_epi32 (__m128h __A)
 {
   return (__m128i)
     __builtin_ia32_vcvttph2dq128_mask (__A,
-				       (__v4si) _mm_setzero_si128 (),
+				       (__v4si) _mm_avx512_setzero_si128 (),
 				       (__mmask8) -1);
 }
 
@@ -1262,7 +1262,7 @@  _mm_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B)
 {
   return (__m128i)
     __builtin_ia32_vcvttph2dq128_mask (__B,
-				       (__v4si) _mm_setzero_si128 (),
+				       (__v4si) _mm_avx512_setzero_si128 (),
 				       __A);
 }
 
@@ -1273,7 +1273,7 @@  _mm256_cvttph_epi32 (__m128h __A)
   return (__m256i)
     __builtin_ia32_vcvttph2dq256_mask (__A,
 				       (__v8si)
-				       _mm256_setzero_si256 (),
+				       _mm256_avx512_setzero_si256 (),
 				       (__mmask8) -1);
 }
 
@@ -1294,7 +1294,7 @@  _mm256_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B)
   return (__m256i)
     __builtin_ia32_vcvttph2dq256_mask (__B,
 				       (__v8si)
-				       _mm256_setzero_si256 (),
+				       _mm256_avx512_setzero_si256 (),
 				       __A);
 }
 
@@ -1306,7 +1306,7 @@  _mm_cvttph_epu32 (__m128h __A)
   return (__m128i)
     __builtin_ia32_vcvttph2udq128_mask (__A,
 					(__v4si)
-					_mm_setzero_si128 (),
+					_mm_avx512_setzero_si128 (),
 					(__mmask8) -1);
 }
 
@@ -1327,7 +1327,7 @@  _mm_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B)
   return (__m128i)
     __builtin_ia32_vcvttph2udq128_mask (__B,
 					(__v4si)
-					_mm_setzero_si128 (),
+					_mm_avx512_setzero_si128 (),
 					__A);
 }
 
@@ -1338,7 +1338,7 @@  _mm256_cvttph_epu32 (__m128h __A)
   return (__m256i)
     __builtin_ia32_vcvttph2udq256_mask (__A,
 					(__v8si)
-					_mm256_setzero_si256 (), (__mmask8) -1);
+					_mm256_avx512_setzero_si256 (), (__mmask8) -1);
 }
 
 extern __inline __m256i
@@ -1358,7 +1358,7 @@  _mm256_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B)
   return (__m256i)
     __builtin_ia32_vcvttph2udq256_mask (__B,
 					(__v8si)
-					_mm256_setzero_si256 (),
+					_mm256_avx512_setzero_si256 (),
 					__A);
 }
 
@@ -1473,7 +1473,7 @@  _mm_cvtph_epi64 (__m128h __A)
 {
   return
     __builtin_ia32_vcvtph2qq128_mask (__A,
-				      _mm_setzero_si128 (),
+				      _mm_avx512_setzero_si128 (),
 				      (__mmask8) -1);
 }
 
@@ -1489,7 +1489,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
 {
   return __builtin_ia32_vcvtph2qq128_mask (__B,
-					   _mm_setzero_si128 (),
+					   _mm_avx512_setzero_si128 (),
 					   __A);
 }
 
@@ -1498,7 +1498,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtph_epi64 (__m128h __A)
 {
   return __builtin_ia32_vcvtph2qq256_mask (__A,
-					   _mm256_setzero_si256 (),
+					   _mm256_avx512_setzero_si256 (),
 					   (__mmask8) -1);
 }
 
@@ -1514,7 +1514,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
 {
   return __builtin_ia32_vcvtph2qq256_mask (__B,
-					   _mm256_setzero_si256 (),
+					   _mm256_avx512_setzero_si256 (),
 					   __A);
 }
 
@@ -1524,7 +1524,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtph_epu64 (__m128h __A)
 {
   return __builtin_ia32_vcvtph2uqq128_mask (__A,
-					    _mm_setzero_si128 (),
+					    _mm_avx512_setzero_si128 (),
 					    (__mmask8) -1);
 }
 
@@ -1540,7 +1540,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
 {
   return __builtin_ia32_vcvtph2uqq128_mask (__B,
-					    _mm_setzero_si128 (),
+					    _mm_avx512_setzero_si128 (),
 					    __A);
 }
 
@@ -1549,7 +1549,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtph_epu64 (__m128h __A)
 {
   return __builtin_ia32_vcvtph2uqq256_mask (__A,
-					    _mm256_setzero_si256 (),
+					    _mm256_avx512_setzero_si256 (),
 					    (__mmask8) -1);
 }
 
@@ -1565,7 +1565,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
 {
   return __builtin_ia32_vcvtph2uqq256_mask (__B,
-					    _mm256_setzero_si256 (),
+					    _mm256_avx512_setzero_si256 (),
 					    __A);
 }
 
@@ -1575,7 +1575,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvttph_epi64 (__m128h __A)
 {
   return __builtin_ia32_vcvttph2qq128_mask (__A,
-					    _mm_setzero_si128 (),
+					    _mm_avx512_setzero_si128 (),
 					    (__mmask8) -1);
 }
 
@@ -1593,7 +1593,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
 {
   return __builtin_ia32_vcvttph2qq128_mask (__B,
-					    _mm_setzero_si128 (),
+					    _mm_avx512_setzero_si128 (),
 					    __A);
 }
 
@@ -1602,7 +1602,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvttph_epi64 (__m128h __A)
 {
   return __builtin_ia32_vcvttph2qq256_mask (__A,
-					    _mm256_setzero_si256 (),
+					    _mm256_avx512_setzero_si256 (),
 					    (__mmask8) -1);
 }
 
@@ -1620,7 +1620,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
 {
   return __builtin_ia32_vcvttph2qq256_mask (__B,
-					    _mm256_setzero_si256 (),
+					    _mm256_avx512_setzero_si256 (),
 					    __A);
 }
 
@@ -1630,7 +1630,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvttph_epu64 (__m128h __A)
 {
   return __builtin_ia32_vcvttph2uqq128_mask (__A,
-					     _mm_setzero_si128 (),
+					     _mm_avx512_setzero_si128 (),
 					     (__mmask8) -1);
 }
 
@@ -1648,7 +1648,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
 {
   return __builtin_ia32_vcvttph2uqq128_mask (__B,
-					     _mm_setzero_si128 (),
+					     _mm_avx512_setzero_si128 (),
 					     __A);
 }
 
@@ -1657,7 +1657,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvttph_epu64 (__m128h __A)
 {
   return __builtin_ia32_vcvttph2uqq256_mask (__A,
-					     _mm256_setzero_si256 (),
+					     _mm256_avx512_setzero_si256 (),
 					     (__mmask8) -1);
 }
 
@@ -1675,7 +1675,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
 {
   return __builtin_ia32_vcvttph2uqq256_mask (__B,
-					     _mm256_setzero_si256 (),
+					     _mm256_avx512_setzero_si256 (),
 					     __A);
 }
 
@@ -1789,7 +1789,7 @@  _mm_cvtph_epi16 (__m128h __A)
   return (__m128i)
     __builtin_ia32_vcvtph2w128_mask (__A,
 				     (__v8hi)
-				     _mm_setzero_si128 (),
+				     _mm_avx512_setzero_si128 (),
 				     (__mmask8) -1);
 }
 
@@ -1808,7 +1808,7 @@  _mm_maskz_cvtph_epi16 (__mmask8 __A, __m128h __B)
   return (__m128i)
     __builtin_ia32_vcvtph2w128_mask (__B,
 				     (__v8hi)
-				     _mm_setzero_si128 (),
+				     _mm_avx512_setzero_si128 (),
 				     __A);
 }
 
@@ -1819,7 +1819,7 @@  _mm256_cvtph_epi16 (__m256h __A)
   return (__m256i)
     __builtin_ia32_vcvtph2w256_mask (__A,
 				     (__v16hi)
-				     _mm256_setzero_si256 (),
+				     _mm256_avx512_setzero_si256 (),
 				     (__mmask16) -1);
 }
 
@@ -1838,7 +1838,7 @@  _mm256_maskz_cvtph_epi16 (__mmask16 __A, __m256h __B)
   return (__m256i)
     __builtin_ia32_vcvtph2w256_mask (__B,
 				     (__v16hi)
-				     _mm256_setzero_si256 (),
+				     _mm256_avx512_setzero_si256 (),
 				     __A);
 }
 
@@ -1850,7 +1850,7 @@  _mm_cvtph_epu16 (__m128h __A)
   return (__m128i)
     __builtin_ia32_vcvtph2uw128_mask (__A,
 				      (__v8hi)
-				      _mm_setzero_si128 (),
+				      _mm_avx512_setzero_si128 (),
 				      (__mmask8) -1);
 }
 
@@ -1869,7 +1869,7 @@  _mm_maskz_cvtph_epu16 (__mmask8 __A, __m128h __B)
   return (__m128i)
     __builtin_ia32_vcvtph2uw128_mask (__B,
 				      (__v8hi)
-				      _mm_setzero_si128 (),
+				      _mm_avx512_setzero_si128 (),
 				      __A);
 }
 
@@ -1880,7 +1880,7 @@  _mm256_cvtph_epu16 (__m256h __A)
   return (__m256i)
     __builtin_ia32_vcvtph2uw256_mask (__A,
 				      (__v16hi)
-				      _mm256_setzero_si256 (),
+				      _mm256_avx512_setzero_si256 (),
 				      (__mmask16) -1);
 }
 
@@ -1899,7 +1899,7 @@  _mm256_maskz_cvtph_epu16 (__mmask16 __A, __m256h __B)
   return (__m256i)
     __builtin_ia32_vcvtph2uw256_mask (__B,
 				      (__v16hi)
-				      _mm256_setzero_si256 (),
+				      _mm256_avx512_setzero_si256 (),
 				      __A);
 }
 
@@ -1911,7 +1911,7 @@  _mm_cvttph_epi16 (__m128h __A)
   return (__m128i)
     __builtin_ia32_vcvttph2w128_mask (__A,
 				      (__v8hi)
-				      _mm_setzero_si128 (),
+				      _mm_avx512_setzero_si128 (),
 				      (__mmask8) -1);
 }
 
@@ -1932,7 +1932,7 @@  _mm_maskz_cvttph_epi16 (__mmask8 __A, __m128h __B)
   return (__m128i)
     __builtin_ia32_vcvttph2w128_mask (__B,
 				      (__v8hi)
-				      _mm_setzero_si128 (),
+				      _mm_avx512_setzero_si128 (),
 				      __A);
 }
 
@@ -1943,7 +1943,7 @@  _mm256_cvttph_epi16 (__m256h __A)
   return (__m256i)
     __builtin_ia32_vcvttph2w256_mask (__A,
 				      (__v16hi)
-				      _mm256_setzero_si256 (),
+				      _mm256_avx512_setzero_si256 (),
 				      (__mmask16) -1);
 }
 
@@ -1964,7 +1964,7 @@  _mm256_maskz_cvttph_epi16 (__mmask16 __A, __m256h __B)
   return (__m256i)
     __builtin_ia32_vcvttph2w256_mask (__B,
 				      (__v16hi)
-				      _mm256_setzero_si256 (),
+				      _mm256_avx512_setzero_si256 (),
 				      __A);
 }
 
@@ -1976,7 +1976,7 @@  _mm_cvttph_epu16 (__m128h __A)
   return (__m128i)
     __builtin_ia32_vcvttph2uw128_mask (__A,
 				       (__v8hi)
-				       _mm_setzero_si128 (),
+				       _mm_avx512_setzero_si128 (),
 				       (__mmask8) -1);
 }
 
@@ -1997,7 +1997,7 @@  _mm_maskz_cvttph_epu16 (__mmask8 __A, __m128h __B)
   return (__m128i)
     __builtin_ia32_vcvttph2uw128_mask (__B,
 				       (__v8hi)
-				       _mm_setzero_si128 (),
+				       _mm_avx512_setzero_si128 (),
 				       __A);
 }
 
@@ -2008,7 +2008,7 @@  _mm256_cvttph_epu16 (__m256h __A)
   return (__m256i)
     __builtin_ia32_vcvttph2uw256_mask (__A,
 				       (__v16hi)
-				       _mm256_setzero_si256 (),
+				       _mm256_avx512_setzero_si256 (),
 				       (__mmask16) -1);
 }
 
@@ -2028,7 +2028,7 @@  _mm256_maskz_cvttph_epu16 (__mmask16 __A, __m256h __B)
 {
   return (__m256i)
     __builtin_ia32_vcvttph2uw256_mask (__B,
-				       (__v16hi) _mm256_setzero_si256 (),
+				       (__v16hi) _mm256_avx512_setzero_si256 (),
 				       __A);
 }
 
@@ -2144,7 +2144,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtph_pd (__m128h __A)
 {
   return __builtin_ia32_vcvtph2pd128_mask (__A,
-					   _mm_setzero_pd (),
+					   _mm_avx512_setzero_pd (),
 					   (__mmask8) -1);
 }
 
@@ -2159,7 +2159,7 @@  extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
 {
-  return __builtin_ia32_vcvtph2pd128_mask (__B, _mm_setzero_pd (), __A);
+  return __builtin_ia32_vcvtph2pd128_mask (__B, _mm_avx512_setzero_pd (), __A);
 }
 
 extern __inline __m256d
@@ -2167,7 +2167,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtph_pd (__m128h __A)
 {
   return __builtin_ia32_vcvtph2pd256_mask (__A,
-					   _mm256_setzero_pd (),
+					   _mm256_avx512_setzero_pd (),
 					   (__mmask8) -1);
 }
 
@@ -2183,7 +2183,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
 {
   return __builtin_ia32_vcvtph2pd256_mask (__B,
-					   _mm256_setzero_pd (),
+					   _mm256_avx512_setzero_pd (),
 					   __A);
 }
 
@@ -2193,7 +2193,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtxph_ps (__m128h __A)
 {
   return __builtin_ia32_vcvtph2psx128_mask (__A,
-					   _mm_setzero_ps (),
+					   _mm_avx512_setzero_ps (),
 					   (__mmask8) -1);
 }
 
@@ -2208,7 +2208,7 @@  extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_cvtxph_ps (__mmask8 __A, __m128h __B)
 {
-  return __builtin_ia32_vcvtph2psx128_mask (__B, _mm_setzero_ps (), __A);
+  return __builtin_ia32_vcvtph2psx128_mask (__B, _mm_avx512_setzero_ps (), __A);
 }
 
 extern __inline __m256
@@ -2216,7 +2216,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtxph_ps (__m128h __A)
 {
   return __builtin_ia32_vcvtph2psx256_mask (__A,
-					    _mm256_setzero_ps (),
+					    _mm256_avx512_setzero_ps (),
 					    (__mmask8) -1);
 }
 
@@ -2232,7 +2232,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_cvtxph_ps (__mmask8 __A, __m128h __B)
 {
   return __builtin_ia32_vcvtph2psx256_mask (__B,
-					    _mm256_setzero_ps (),
+					    _mm256_avx512_setzero_ps (),
 					    __A);
 }
 
diff --git a/gcc/config/i386/avx512vbmi2vlintrin.h b/gcc/config/i386/avx512vbmi2vlintrin.h
index 92cae8cf02b..4424adc774e 100644
--- a/gcc/config/i386/avx512vbmi2vlintrin.h
+++ b/gcc/config/i386/avx512vbmi2vlintrin.h
@@ -47,7 +47,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_compress_epi8 (__mmask16 __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __B,
-			(__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
+			(__v16qi) _mm_avx512_setzero_si128 (), (__mmask16) __A);
 }
 
 
@@ -72,7 +72,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_compress_epi16 (__mmask8 __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __B,
-				(__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
+				(__v8hi) _mm_avx512_setzero_si128 (), (__mmask8) __A);
 }
 
 extern __inline __m256i
@@ -88,7 +88,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_compress_epi16 (__mmask16 __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __B,
-			(__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
+			(__v16hi) _mm256_avx512_setzero_si256 (), (__mmask16) __A);
 }
 
 extern __inline void
@@ -121,7 +121,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_expand_epi8 (__mmask16 __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_expandqi128_maskz ((__v16qi) __B,
-			(__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
+			(__v16qi) _mm_avx512_setzero_si128 (), (__mmask16) __A);
 }
 
 extern __inline __m128i
@@ -137,7 +137,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_expandloadu_epi8 (__mmask16 __A, const void * __B)
 {
   return (__m128i) __builtin_ia32_expandloadqi128_maskz ((const __v16qi *) __B,
-			(__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
+			(__v16qi) _mm_avx512_setzero_si128 (), (__mmask16) __A);
 }
 
 extern __inline __m128i
@@ -154,7 +154,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_expand_epi16 (__mmask8 __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_expandhi128_maskz ((__v8hi) __B,
-				(__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
+				(__v8hi) _mm_avx512_setzero_si128 (), (__mmask8) __A);
 }
 
 extern __inline __m128i
@@ -170,7 +170,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_expandloadu_epi16 (__mmask8 __A, const void * __B)
 {
   return (__m128i) __builtin_ia32_expandloadhi128_maskz ((const __v8hi *) __B,
-				(__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
+				(__v8hi) _mm_avx512_setzero_si128 (), (__mmask8) __A);
 }
 extern __inline __m256i
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -186,7 +186,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_expand_epi16 (__mmask16 __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_expandhi256_maskz ((__v16hi) __B,
-			(__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
+			(__v16hi) _mm256_avx512_setzero_si256 (), (__mmask16) __A);
 }
 
 extern __inline __m256i
@@ -202,7 +202,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_expandloadu_epi16 (__mmask16 __A, const void * __B)
 {
   return (__m256i) __builtin_ia32_expandloadhi256_maskz ((const __v16hi *) __B,
-			(__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
+			(__v16hi) _mm256_avx512_setzero_si256 (), (__mmask16) __A);
 }
 
 #ifdef __OPTIMIZE__
@@ -228,7 +228,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_shrdi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D)
 {
   return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__B,
-	(__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A);
+	(__v16hi) __C, __D, (__v16hi) _mm256_avx512_setzero_si256 (), (__mmask16)__A);
 }
 
 extern __inline __m256i
@@ -245,7 +245,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_shrdi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
 {
   return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__B, (__v8si) __C,
-			__D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A);
+			__D, (__v8si) _mm256_avx512_setzero_si256 (), (__mmask8)__A);
 }
 
 extern __inline __m256i
@@ -269,7 +269,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_shrdi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
 {
   return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__B, (__v4di) __C,
-			__D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A);
+			__D, (__v4di) _mm256_avx512_setzero_si256 (), (__mmask8)__A);
 }
 
 extern __inline __m256i
@@ -293,7 +293,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_shrdi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
 {
   return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__B, (__v8hi) __C,
-			__D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A);
+			__D, (__v8hi) _mm_avx512_setzero_si128 (), (__mmask8)__A);
 }
 
 extern __inline __m128i
@@ -317,7 +317,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_shrdi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
 {
   return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__B, (__v4si) __C,
-			__D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A);
+			__D, (__v4si) _mm_avx512_setzero_si128 (), (__mmask8)__A);
 }
 
 extern __inline __m128i
@@ -341,7 +341,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_shrdi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
 {
   return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__B, (__v2di) __C,
-			__D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A);
+			__D, (__v2di) _mm_avx512_setzero_si128 (), (__mmask8)__A);
 }
 
 extern __inline __m128i
@@ -373,7 +373,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_shldi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D)
 {
   return (__m256i)__builtin_ia32_vpshld_v16hi_mask ((__v16hi)__B,
-	(__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A);
+	(__v16hi) __C, __D, (__v16hi) _mm256_avx512_setzero_si256 (), (__mmask16)__A);
 }
 
 extern __inline __m256i
@@ -390,7 +390,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_shldi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
 {
   return (__m256i)__builtin_ia32_vpshld_v8si_mask ((__v8si)__B, (__v8si) __C,
-			__D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A);
+			__D, (__v8si) _mm256_avx512_setzero_si256 (), (__mmask8)__A);
 }
 
 extern __inline __m256i
@@ -414,7 +414,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_shldi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
 {
   return (__m256i)__builtin_ia32_vpshld_v4di_mask ((__v4di)__B, (__v4di) __C,
-			__D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A);
+			__D, (__v4di) _mm256_avx512_setzero_si256 (), (__mmask8)__A);
 }
 
 extern __inline __m256i
@@ -438,7 +438,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_shldi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
 {
   return (__m128i)__builtin_ia32_vpshld_v8hi_mask ((__v8hi)__B, (__v8hi) __C,
-			__D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A);
+			__D, (__v8hi) _mm_avx512_setzero_si128 (), (__mmask8)__A);
 }
 
 extern __inline __m128i
@@ -462,7 +462,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_shldi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
 {
   return (__m128i)__builtin_ia32_vpshld_v4si_mask ((__v4si)__B, (__v4si) __C,
-			__D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A);
+			__D, (__v4si) _mm_avx512_setzero_si128 (), (__mmask8)__A);
 }
 
 extern __inline __m128i
@@ -486,7 +486,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_shldi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
 {
   return (__m128i)__builtin_ia32_vpshld_v2di_mask ((__v2di)__B, (__v2di) __C,
-			__D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A);
+			__D, (__v2di) _mm_avx512_setzero_si128 (), (__mmask8)__A);
 }
 
 extern __inline __m128i
@@ -509,7 +509,7 @@  _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
   ((__m256i) \
    __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(B),		\
 				     (__v16hi)(__m256i)(C),(int)(D),	\
-				     (__v16hi)(__m256i)_mm256_setzero_si256 (), \
+				     (__v16hi)(__m256i)_mm256_avx512_setzero_si256 (), \
 				     (__mmask16)(A)))
 #define _mm256_shrdi_epi32(A, B, C) \
   ((__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)(__m256i)(A), \
@@ -524,7 +524,7 @@  _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
   ((__m256i) \
    __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(B),		\
 				    (__v8si)(__m256i)(C),(int)(D),	\
-				    (__v8si)(__m256i)_mm256_setzero_si256 (), \
+				    (__v8si)(__m256i)_mm256_avx512_setzero_si256 (), \
 				    (__mmask8)(A)))
 #define _mm256_shrdi_epi64(A, B, C) \
   ((__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)(__m256i)(A), \
@@ -538,7 +538,7 @@  _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
   ((__m256i) \
    __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(B),		\
 				    (__v4di)(__m256i)(C),(int)(D),	\
-				    (__v4di)(__m256i)_mm256_setzero_si256 (), \
+				    (__v4di)(__m256i)_mm256_avx512_setzero_si256 (), \
 				    (__mmask8)(A)))
 #define _mm_shrdi_epi16(A, B, C) \
   ((__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)(__m128i)(A), \
@@ -552,7 +552,7 @@  _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
   ((__m128i) \
    __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(B),		\
 				    (__v8hi)(__m128i)(C),(int)(D),	\
-				    (__v8hi)(__m128i)_mm_setzero_si128 (), \
+				    (__v8hi)(__m128i)_mm_avx512_setzero_si128 (), \
 				    (__mmask8)(A)))
 #define _mm_shrdi_epi32(A, B, C) \
   ((__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)(__m128i)(A), \
@@ -566,7 +566,7 @@  _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
   ((__m128i) \
    __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B),		\
 				    (__v4si)(__m128i)(C),(int)(D),	\
-				    (__v4si)(__m128i)_mm_setzero_si128 (), \
+				    (__v4si)(__m128i)_mm_avx512_setzero_si128 (), \
 				    (__mmask8)(A)))
 #define _mm_shrdi_epi64(A, B, C) \
   ((__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)(__m128i)(A), \
@@ -580,7 +580,7 @@  _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
   ((__m128i) \
    __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(B),		\
 				    (__v2di)(__m128i)(C),(int)(D),	\
-				    (__v2di)(__m128i)_mm_setzero_si128 (), \
+				    (__v2di)(__m128i)_mm_avx512_setzero_si128 (), \
 				    (__mmask8)(A)))
 #define _mm256_shldi_epi16(A, B, C) \
   ((__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)(__m256i)(A), \
@@ -595,7 +595,7 @@  _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
   ((__m256i) \
    __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(B),		\
 				     (__v16hi)(__m256i)(C),(int)(D),	\
-				     (__v16hi)(__m256i)_mm256_setzero_si256 (), \
+				     (__v16hi)(__m256i)_mm256_avx512_setzero_si256 (), \
 				     (__mmask16)(A)))
 #define _mm256_shldi_epi32(A, B, C) \
   ((__m256i) __builtin_ia32_vpshld_v8si ((__v8si)(__m256i)(A), \
@@ -609,7 +609,7 @@  _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
   ((__m256i) \
    __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(B),		\
 				    (__v8si)(__m256i)(C),(int)(D),	\
-				    (__v8si)(__m256i)_mm256_setzero_si256 (), \
+				    (__v8si)(__m256i)_mm256_avx512_setzero_si256 (), \
 				    (__mmask8)(A)))
 #define _mm256_shldi_epi64(A, B, C) \
   ((__m256i) __builtin_ia32_vpshld_v4di ((__v4di)(__m256i)(A), \
@@ -623,7 +623,7 @@  _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
   ((__m256i) \
    __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(B),		\
 				    (__v4di)(__m256i)(C),(int)(D),	\
-				    (__v4di)(__m256i)_mm256_setzero_si256 (), \
+				    (__v4di)(__m256i)_mm256_avx512_setzero_si256 (), \
 				    (__mmask8)(A)))
 #define _mm_shldi_epi16(A, B, C) \
   ((__m128i) __builtin_ia32_vpshld_v8hi ((__v8hi)(__m128i)(A), \
@@ -637,7 +637,7 @@  _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
   ((__m128i) \
    __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(B),		\
 				    (__v8hi)(__m128i)(C),(int)(D),	\
-				    (__v8hi)(__m128i)_mm_setzero_si128 (), \
+				    (__v8hi)(__m128i)_mm_avx512_setzero_si128 (), \
 				    (__mmask8)(A)))
 #define _mm_shldi_epi32(A, B, C) \
   ((__m128i) __builtin_ia32_vpshld_v4si ((__v4si)(__m128i)(A), \
@@ -651,7 +651,7 @@  _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
   ((__m128i) \
    __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(B),		\
 				    (__v4si)(__m128i)(C),(int)(D),	\
-				    (__v4si)(__m128i)_mm_setzero_si128 (), \
+				    (__v4si)(__m128i)_mm_avx512_setzero_si128 (), \
 				    (__mmask8)(A)))
 #define _mm_shldi_epi64(A, B, C) \
   ((__m128i) __builtin_ia32_vpshld_v2di ((__v2di)(__m128i)(A), \
@@ -665,7 +665,7 @@  _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
   ((__m128i) \
    __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(B),		\
 				    (__v2di)(__m128i)(C),(int)(D),	\
-				    (__v2di)(__m128i)_mm_setzero_si128 (), \
+				    (__v2di)(__m128i)_mm_avx512_setzero_si128 (), \
 				    (__mmask8)(A)))
 #endif
 
@@ -970,7 +970,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_compress_epi8 (__mmask32 __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __B,
-			(__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
+			(__v32qi) _mm256_avx512_setzero_si256 (), (__mmask32) __A);
 }
 
 extern __inline void
@@ -995,7 +995,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_expand_epi8 (__mmask32 __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_expandqi256_maskz ((__v32qi) __B,
-			(__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
+			(__v32qi) _mm256_avx512_setzero_si256 (), (__mmask32) __A);
 }
 
 extern __inline __m256i
@@ -1011,7 +1011,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_expandloadu_epi8 (__mmask32 __A, const void * __B)
 {
   return (__m256i) __builtin_ia32_expandloadqi256_maskz ((const __v32qi *) __B,
-			(__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
+			(__v32qi) _mm256_avx512_setzero_si256 (), (__mmask32) __A);
 }
 
 #ifdef __DISABLE_AVX512VBMI2VL__
diff --git a/gcc/config/i386/avx512vbmivlintrin.h b/gcc/config/i386/avx512vbmivlintrin.h
index 035408f7bba..270e9406db5 100644
--- a/gcc/config/i386/avx512vbmivlintrin.h
+++ b/gcc/config/i386/avx512vbmivlintrin.h
@@ -51,7 +51,7 @@  _mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
   return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
 							  (__v32qi) __Y,
 							  (__v32qi)
-							  _mm256_setzero_si256 (),
+							  _mm256_avx512_setzero_si256 (),
 							  (__mmask32) __M);
 }
 
@@ -83,7 +83,7 @@  _mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
 							  (__v16qi) __Y,
 							  (__v16qi)
-							  _mm_setzero_si128 (),
+							  _mm_avx512_setzero_si128 (),
 							  (__mmask16) __M);
 }
 
@@ -117,7 +117,7 @@  _mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
   return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
 						     (__v32qi) __A,
 						     (__v32qi)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask32) __M);
 }
 
@@ -150,7 +150,7 @@  _mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
 						     (__v16qi) __A,
 						     (__v16qi)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask16) __M);
 }
 
diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h
index bc58fa4c5c1..7654bfaa87e 100644
--- a/gcc/config/i386/avx512vlbwintrin.h
+++ b/gcc/config/i386/avx512vlbwintrin.h
@@ -59,7 +59,7 @@  _mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A,
 						    (__v32qi)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask32) __U);
 }
 
@@ -78,7 +78,7 @@  _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
 						    (__v16qi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask16) __U);
 }
 
@@ -136,7 +136,7 @@  _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_loaddquhi256_mask ((const short *) __P,
 						     (__v16hi)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask16) __U);
 }
 
@@ -162,7 +162,7 @@  _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P,
 						     (__v8hi)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -182,7 +182,7 @@  _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A,
 						    (__v16hi)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask16) __U);
 }
 
@@ -201,7 +201,7 @@  _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
 						    (__v8hi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -227,7 +227,7 @@  _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_loaddquqi256_mask ((const char *) __P,
 						     (__v32qi)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask32) __U);
 }
 
@@ -253,7 +253,7 @@  _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P,
 						     (__v16qi)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask16) __U);
 }
 
@@ -324,7 +324,7 @@  _mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
 						  (__v16qi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -359,7 +359,7 @@  _mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
 						   (__v16qi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   __M);
 }
 
@@ -394,7 +394,7 @@  _mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
 						   (__v16qi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   __M);
 }
 
@@ -430,7 +430,7 @@  _mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
 						    (__v16qi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    __M);
 }
 
@@ -466,7 +466,7 @@  _mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
 						    (__v16qi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    __M);
 }
 
@@ -485,7 +485,7 @@  _mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A,
 						       (__v32qi)
-						       _mm256_setzero_si256 (),
+						       _mm256_avx512_setzero_si256 (),
 						       __M);
 }
 
@@ -504,7 +504,7 @@  _mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
 {
   return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
 							   (__v32qi)
-							   _mm256_setzero_si256 (),
+							   _mm256_avx512_setzero_si256 (),
 							   __M);
 }
 
@@ -523,7 +523,7 @@  _mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A,
 						       (__v16qi)
-						       _mm_setzero_si128 (),
+						       _mm_avx512_setzero_si128 (),
 						       __M);
 }
 
@@ -542,7 +542,7 @@  _mm_maskz_set1_epi8 (__mmask16 __M, char __A)
 {
   return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
 							   (__v16qi)
-							   _mm_setzero_si128 (),
+							   _mm_avx512_setzero_si128 (),
 							   __M);
 }
 
@@ -561,7 +561,7 @@  _mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A,
 						       (__v16hi)
-						       _mm256_setzero_si256 (),
+						       _mm256_avx512_setzero_si256 (),
 						       __M);
 }
 
@@ -580,7 +580,7 @@  _mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
 {
   return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
 							   (__v16hi)
-							   _mm256_setzero_si256 (),
+							   _mm256_avx512_setzero_si256 (),
 							   __M);
 }
 
@@ -599,7 +599,7 @@  _mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A,
 						       (__v8hi)
-						       _mm_setzero_si128 (),
+						       _mm_avx512_setzero_si128 (),
 						       __M);
 }
 
@@ -618,7 +618,7 @@  _mm_maskz_set1_epi16 (__mmask8 __M, short __A)
 {
   return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
 							   (__v8hi)
-							   _mm_setzero_si128 (),
+							   _mm_avx512_setzero_si128 (),
 							   __M);
 }
 
@@ -629,7 +629,7 @@  _mm256_permutexvar_epi16 (__m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
 						     (__v16hi) __A,
 						     (__v16hi)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask16) -1);
 }
 
@@ -641,7 +641,7 @@  _mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A,
   return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
 						     (__v16hi) __A,
 						     (__v16hi)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask16) __M);
 }
 
@@ -663,7 +663,7 @@  _mm_permutexvar_epi16 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
 						     (__v8hi) __A,
 						     (__v8hi)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) -1);
 }
 
@@ -674,7 +674,7 @@  _mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
 						     (__v8hi) __A,
 						     (__v8hi)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __M);
 }
 
@@ -807,7 +807,7 @@  _mm256_maskz_maddubs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y)
   return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
 						     (__v32qi) __Y,
 						     (__v16hi)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask16) __U);
 }
 
@@ -829,7 +829,7 @@  _mm_maskz_maddubs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
 						     (__v16qi) __Y,
 						     (__v8hi)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -851,7 +851,7 @@  _mm256_maskz_madd_epi16 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
 						   (__v16hi) __B,
 						   (__v8si)
-						   _mm256_setzero_si256 (),
+						   _mm256_avx512_setzero_si256 (),
 						   (__mmask8) __U);
 }
 
@@ -873,7 +873,7 @@  _mm_maskz_madd_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
 						   (__v8hi) __B,
 						   (__v4si)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   (__mmask8) __U);
 }
 
@@ -1008,7 +1008,7 @@  _mm256_maskz_min_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
 						  (__v16hi) __B,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) __M);
 }
 
@@ -1030,7 +1030,7 @@  _mm_maskz_min_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
 						  (__v8hi) __B,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __M);
 }
 
@@ -1052,7 +1052,7 @@  _mm256_maskz_min_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
 						  (__v16hi) __B,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) __M);
 }
 
@@ -1074,7 +1074,7 @@  _mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
 						  (__v32qi) __B,
 						  (__v32qi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask32) __M);
 }
 
@@ -1096,7 +1096,7 @@  _mm_maskz_max_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
 						  (__v16qi) __B,
 						  (__v16qi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask16) __M);
 }
 
@@ -1118,7 +1118,7 @@  _mm256_maskz_max_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
 						  (__v32qi) __B,
 						  (__v32qi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask32) __M);
 }
 
@@ -1140,7 +1140,7 @@  _mm_maskz_max_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
 						  (__v16qi) __B,
 						  (__v16qi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask16) __M);
 }
 
@@ -1162,7 +1162,7 @@  _mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
 						  (__v32qi) __B,
 						  (__v32qi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask32) __M);
 }
 
@@ -1184,7 +1184,7 @@  _mm_maskz_min_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
 						  (__v16qi) __B,
 						  (__v16qi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask16) __M);
 }
 
@@ -1206,7 +1206,7 @@  _mm256_maskz_min_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
 						  (__v32qi) __B,
 						  (__v32qi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask32) __M);
 }
 
@@ -1228,7 +1228,7 @@  _mm_maskz_min_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
 						  (__v16qi) __B,
 						  (__v16qi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask16) __M);
 }
 
@@ -1250,7 +1250,7 @@  _mm256_maskz_max_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
 						  (__v16hi) __B,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) __M);
 }
 
@@ -1272,7 +1272,7 @@  _mm_maskz_max_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
 						  (__v8hi) __B,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __M);
 }
 
@@ -1294,7 +1294,7 @@  _mm256_maskz_max_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
 						  (__v16hi) __B,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) __M);
 }
 
@@ -1316,7 +1316,7 @@  _mm_maskz_max_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
 						  (__v8hi) __B,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __M);
 }
 
@@ -1338,7 +1338,7 @@  _mm_maskz_min_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
 						  (__v8hi) __B,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __M);
 }
 
@@ -1375,7 +1375,7 @@  _mm256_maskz_alignr_epi8 (__mmask32 __U, __m256i __A, __m256i __B,
 						   (__v4di) __B,
 						   __N * 8,
 						   (__v4di)
-						   _mm256_setzero_si256 (),
+						   _mm256_avx512_setzero_si256 (),
 						   (__mmask32) __U);
 }
 
@@ -1400,7 +1400,7 @@  _mm_maskz_alignr_epi8 (__mmask16 __U, __m128i __A, __m128i __B,
 						   (__v2di) __B,
 						   __N * 8,
 						   (__v2di)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   (__mmask16) __U);
 }
 
@@ -1412,7 +1412,7 @@  _mm256_dbsad_epu8 (__m256i __A, __m256i __B, const int __imm)
 						    (__v32qi) __B,
 						    __imm,
 						    (__v16hi)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask16) -1);
 }
 
@@ -1437,7 +1437,7 @@  _mm256_maskz_dbsad_epu8 (__mmask16 __U, __m256i __A, __m256i __B,
 						    (__v32qi) __B,
 						    __imm,
 						    (__v16hi)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask16) __U);
 }
 
@@ -1449,7 +1449,7 @@  _mm_dbsad_epu8 (__m128i __A, __m128i __B, const int __imm)
 						    (__v16qi) __B,
 						    __imm,
 						    (__v8hi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) -1);
 }
 
@@ -1474,7 +1474,7 @@  _mm_maskz_dbsad_epu8 (__mmask8 __U, __m128i __A, __m128i __B,
 						    (__v16qi) __B,
 						    __imm,
 						    (__v8hi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -1646,7 +1646,7 @@  _mm256_maskz_srli_epi16 (__mmask16 __U, __m256i __A, const int __imm)
 {
   return (__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi) __A, __imm,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) __U);
 }
 
@@ -1666,7 +1666,7 @@  _mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, const int __imm)
 {
   return (__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi) __A, __imm,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -1689,7 +1689,7 @@  _mm256_maskz_shufflehi_epi16 (__mmask16 __U, __m256i __A,
   return (__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi) __A,
 						   __imm,
 						   (__v16hi)
-						   _mm256_setzero_si256 (),
+						   _mm256_avx512_setzero_si256 (),
 						   (__mmask16) __U);
 }
 
@@ -1709,7 +1709,7 @@  _mm_maskz_shufflehi_epi16 (__mmask8 __U, __m128i __A, const int __imm)
 {
   return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
 						   (__v8hi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   (__mmask8) __U);
 }
 
@@ -1732,7 +1732,7 @@  _mm256_maskz_shufflelo_epi16 (__mmask16 __U, __m256i __A,
   return (__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi) __A,
 						   __imm,
 						   (__v16hi)
-						   _mm256_setzero_si256 (),
+						   _mm256_avx512_setzero_si256 (),
 						   (__mmask16) __U);
 }
 
@@ -1752,7 +1752,7 @@  _mm_maskz_shufflelo_epi16 (__mmask8 __U, __m128i __A, const int __imm)
 {
   return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
 						   (__v8hi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   (__mmask8) __U);
 }
 
@@ -1772,7 +1772,7 @@  _mm256_maskz_srai_epi16 (__mmask16 __U, __m256i __A, const unsigned int __imm)
 {
   return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) __U);
 }
 
@@ -1792,7 +1792,7 @@  _mm_maskz_srai_epi16 (__mmask8 __U, __m128i __A, const unsigned int __imm)
 {
   return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -1812,7 +1812,7 @@  _mm256_maskz_slli_epi16 (__mmask16 __U, __m256i __A, unsigned int __B)
 {
   return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) __U);
 }
 
@@ -1831,7 +1831,7 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 {
   return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -1847,7 +1847,7 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 
 #define _mm256_maskz_srli_epi16(U, A, B)                                \
   ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A),      \
-    (int)(B), (__v16hi)_mm256_setzero_si256 (), (__mmask16)(U)))
+    (int)(B), (__v16hi)_mm256_avx512_setzero_si256 (), (__mmask16)(U)))
 
 #define _mm_mask_srli_epi16(W, U, A, B)                                 \
   ((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A),       \
@@ -1855,7 +1855,7 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 
 #define _mm_maskz_srli_epi16(U, A, B)                                   \
   ((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A),       \
-    (int)(B), (__v8hi)_mm_setzero_si128 (), (__mmask8)(U)))
+    (int)(B), (__v8hi)_mm_avx512_setzero_si128 (), (__mmask8)(U)))
 
 #define _mm256_mask_srai_epi16(W, U, A, B)                              \
   ((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A),      \
@@ -1863,7 +1863,7 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 
 #define _mm256_maskz_srai_epi16(U, A, B)                                \
   ((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A),      \
-    (unsigned int)(B), (__v16hi)_mm256_setzero_si256 (), (__mmask16)(U)))
+    (unsigned int)(B), (__v16hi)_mm256_avx512_setzero_si256 (), (__mmask16)(U)))
 
 #define _mm_mask_srai_epi16(W, U, A, B)                                 \
   ((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A),       \
@@ -1871,7 +1871,7 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 
 #define _mm_maskz_srai_epi16(U, A, B)                                   \
   ((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A),       \
-    (unsigned int)(B), (__v8hi)_mm_setzero_si128(), (__mmask8)(U)))
+    (unsigned int)(B), (__v8hi)_mm_avx512_setzero_si128(), (__mmask8)(U)))
 
 #define _mm256_mask_shufflehi_epi16(W, U, A, B)                                     \
   ((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B),       \
@@ -1880,7 +1880,7 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 
 #define _mm256_maskz_shufflehi_epi16(U, A, B)                                       \
   ((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B),       \
-                                             (__v16hi)(__m256i)_mm256_setzero_si256 (), \
+                                             (__v16hi)(__m256i)_mm256_avx512_setzero_si256 (), \
                                              (__mmask16)(U)))
 
 #define _mm_mask_shufflehi_epi16(W, U, A, B)                                        \
@@ -1890,7 +1890,7 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 
 #define _mm_maskz_shufflehi_epi16(U, A, B)                                          \
   ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B),        \
-					     (__v8hi)(__m128i)_mm_setzero_si128 (), \
+					     (__v8hi)(__m128i)_mm_avx512_setzero_si128 (), \
                                              (__mmask8)(U)))
 
 #define _mm256_mask_shufflelo_epi16(W, U, A, B)                                     \
@@ -1900,7 +1900,7 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 
 #define _mm256_maskz_shufflelo_epi16(U, A, B)                                       \
   ((__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi)(__m256i)(A), (int)(B),       \
-                                             (__v16hi)(__m256i)_mm256_setzero_si256 (), \
+                                             (__v16hi)(__m256i)_mm256_avx512_setzero_si256 (), \
                                              (__mmask16)(U)))
 
 #define _mm_mask_shufflelo_epi16(W, U, A, B)                                        \
@@ -1910,13 +1910,13 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 
 #define _mm_maskz_shufflelo_epi16(U, A, B)                                          \
   ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B),        \
-					     (__v8hi)(__m128i)_mm_setzero_si128 (), \
+					     (__v8hi)(__m128i)_mm_avx512_setzero_si128 (), \
                                              (__mmask8)(U)))
 
 #define _mm256_maskz_alignr_epi8(U, X, Y, N)					    \
   ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X),		    \
 					    (__v4di)(__m256i)(Y), (int)((N) * 8),   \
-					    (__v4di)(__m256i)_mm256_setzero_si256 (),   \
+					    (__v4di)(__m256i)_mm256_avx512_setzero_si256 (),   \
 					    (__mmask32)(U)))
 
 #define _mm_mask_alignr_epi8(W, U, X, Y, N)					    \
@@ -1927,7 +1927,7 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 #define _mm_maskz_alignr_epi8(U, X, Y, N)					    \
   ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X),		    \
 					    (__v2di)(__m128i)(Y), (int)((N) * 8),   \
-					    (__v2di)(__m128i)_mm_setzero_si128 (),  \
+					    (__v2di)(__m128i)_mm_avx512_setzero_si128 (),  \
 					    (__mmask16)(U)))
 
 #define _mm_mask_slli_epi16(W, U, X, C)					\
@@ -1939,13 +1939,13 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 #define _mm_maskz_slli_epi16(U, X, C)					\
   ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X),	\
     (unsigned int)(C),							\
-    (__v8hi)(__m128i)_mm_setzero_si128 (),				\
+    (__v8hi)(__m128i)_mm_avx512_setzero_si128 (),				\
     (__mmask8)(U)))
 
 #define _mm256_dbsad_epu8(X, Y, C)                                                  \
   ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X),               \
                                               (__v32qi)(__m256i) (Y), (int) (C),    \
-                                              (__v16hi)(__m256i)_mm256_setzero_si256(),\
+                                              (__v16hi)(__m256i)_mm256_avx512_setzero_si256(),\
                                               (__mmask16)-1))
 
 #define _mm256_mask_slli_epi16(W, U, X, C)				\
@@ -1957,7 +1957,7 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 #define _mm256_maskz_slli_epi16(U, X, C)				\
   ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X),	\
     (unsigned int)(C),							\
-    (__v16hi)(__m256i)_mm256_setzero_si256 (),				\
+    (__v16hi)(__m256i)_mm256_avx512_setzero_si256 (),				\
     (__mmask16)(U)))
 
 #define _mm256_mask_dbsad_epu8(W, U, X, Y, C)                                       \
@@ -1969,13 +1969,13 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 #define _mm256_maskz_dbsad_epu8(U, X, Y, C)                                         \
   ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X),               \
                                               (__v32qi)(__m256i) (Y), (int) (C),    \
-                                              (__v16hi)(__m256i)_mm256_setzero_si256(),\
+                                              (__v16hi)(__m256i)_mm256_avx512_setzero_si256(),\
                                               (__mmask16)(U)))
 
 #define _mm_dbsad_epu8(X, Y, C)                                                     \
   ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X),               \
                                               (__v16qi)(__m128i) (Y), (int) (C),    \
-                                              (__v8hi)(__m128i)_mm_setzero_si128(), \
+                                              (__v8hi)(__m128i)_mm_avx512_setzero_si128(), \
                                               (__mmask8)-1))
 
 #define _mm_mask_dbsad_epu8(W, U, X, Y, C)                                          \
@@ -1987,7 +1987,7 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 #define _mm_maskz_dbsad_epu8(U, X, Y, C)                                            \
   ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X),               \
                                               (__v16qi)(__m128i) (Y), (int) (C),    \
-                                              (__v8hi)(__m128i)_mm_setzero_si128(), \
+                                              (__v8hi)(__m128i)_mm_avx512_setzero_si128(), \
                                               (__mmask8)(U)))
 
 #define _mm_cmp_epi16_mask(X, Y, P)				\
@@ -2305,7 +2305,7 @@  _mm256_maskz_mulhrs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y)
   return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
 						    (__v16hi) __Y,
 						    (__v16hi)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask16) __U);
 }
 
@@ -2327,7 +2327,7 @@  _mm256_maskz_mulhi_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
 						   (__v16hi) __B,
 						   (__v16hi)
-						   _mm256_setzero_si256 (),
+						   _mm256_avx512_setzero_si256 (),
 						   (__mmask16) __U);
 }
 
@@ -2349,7 +2349,7 @@  _mm256_maskz_mulhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
 						  (__v16hi) __B,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) __U);
 }
 
@@ -2371,7 +2371,7 @@  _mm_maskz_mulhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
 						  (__v8hi) __B,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -2393,7 +2393,7 @@  _mm_maskz_mulhi_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
 						   (__v8hi) __B,
 						   (__v8hi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   (__mmask8) __U);
 }
 
@@ -2415,7 +2415,7 @@  _mm_maskz_mulhrs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
 						    (__v8hi) __Y,
 						    (__v8hi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -2437,7 +2437,7 @@  _mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
 						  (__v16hi) __B,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) __U);
 }
 
@@ -2459,7 +2459,7 @@  _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
 						  (__v8hi) __B,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -2478,7 +2478,7 @@  _mm256_maskz_cvtepi8_epi16 (__mmask16 __U, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
 						    (__v16hi)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask16) __U);
 }
 
@@ -2497,7 +2497,7 @@  _mm_maskz_cvtepi8_epi16 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
 						    (__v8hi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -2516,7 +2516,7 @@  _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
 						    (__v16hi)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask16) __U);
 }
 
@@ -2535,7 +2535,7 @@  _mm_maskz_cvtepu8_epi16 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
 						    (__v8hi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -2557,7 +2557,7 @@  _mm256_maskz_avg_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
 						 (__v32qi) __B,
 						 (__v32qi)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask32) __U);
 }
 
@@ -2579,7 +2579,7 @@  _mm_maskz_avg_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
 						 (__v16qi) __B,
 						 (__v16qi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask16) __U);
 }
 
@@ -2601,7 +2601,7 @@  _mm256_maskz_avg_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
 						 (__v16hi) __B,
 						 (__v16hi)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask16) __U);
 }
 
@@ -2623,7 +2623,7 @@  _mm_maskz_avg_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
 						 (__v8hi) __B,
 						 (__v8hi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -2645,7 +2645,7 @@  _mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
 						 (__v32qi) __B,
 						 (__v32qi)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask32) __U);
 }
 
@@ -2667,7 +2667,7 @@  _mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
 						 (__v16hi) __B,
 						 (__v16hi)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask16) __U);
 }
 
@@ -2689,7 +2689,7 @@  _mm256_maskz_adds_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
 						  (__v32qi) __B,
 						  (__v32qi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask32) __U);
 }
 
@@ -2711,7 +2711,7 @@  _mm256_maskz_adds_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
 						  (__v16hi) __B,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) __U);
 }
 
@@ -2733,7 +2733,7 @@  _mm256_maskz_adds_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
 						   (__v32qi) __B,
 						   (__v32qi)
-						   _mm256_setzero_si256 (),
+						   _mm256_avx512_setzero_si256 (),
 						   (__mmask32) __U);
 }
 
@@ -2755,7 +2755,7 @@  _mm256_maskz_adds_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
 						   (__v16hi) __B,
 						   (__v16hi)
-						   _mm256_setzero_si256 (),
+						   _mm256_avx512_setzero_si256 (),
 						   (__mmask16) __U);
 }
 
@@ -2777,7 +2777,7 @@  _mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
 						 (__v32qi) __B,
 						 (__v32qi)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask32) __U);
 }
 
@@ -2799,7 +2799,7 @@  _mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
 						 (__v16hi) __B,
 						 (__v16hi)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask16) __U);
 }
 
@@ -2821,7 +2821,7 @@  _mm256_maskz_subs_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
 						  (__v32qi) __B,
 						  (__v32qi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask32) __U);
 }
 
@@ -2843,7 +2843,7 @@  _mm256_maskz_subs_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
 						  (__v16hi) __B,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) __U);
 }
 
@@ -2865,7 +2865,7 @@  _mm256_maskz_subs_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
 						   (__v32qi) __B,
 						   (__v32qi)
-						   _mm256_setzero_si256 (),
+						   _mm256_avx512_setzero_si256 (),
 						   (__mmask32) __U);
 }
 
@@ -2887,7 +2887,7 @@  _mm256_maskz_subs_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
 						   (__v16hi) __B,
 						   (__v16hi)
-						   _mm256_setzero_si256 (),
+						   _mm256_avx512_setzero_si256 (),
 						   (__mmask16) __U);
 }
 
@@ -2909,7 +2909,7 @@  _mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
 						 (__v16qi) __B,
 						 (__v16qi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask16) __U);
 }
 
@@ -2931,7 +2931,7 @@  _mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
 						 (__v8hi) __B,
 						 (__v8hi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -2953,7 +2953,7 @@  _mm256_maskz_unpackhi_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
 						     (__v32qi) __B,
 						     (__v32qi)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask32) __U);
 }
 
@@ -2975,7 +2975,7 @@  _mm_maskz_unpackhi_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
 						     (__v16qi) __B,
 						     (__v16qi)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask16) __U);
 }
 
@@ -2997,7 +2997,7 @@  _mm256_maskz_unpackhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
 						     (__v16hi) __B,
 						     (__v16hi)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask16) __U);
 }
 
@@ -3019,7 +3019,7 @@  _mm_maskz_unpackhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
 						     (__v8hi) __B,
 						     (__v8hi)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -3041,7 +3041,7 @@  _mm256_maskz_unpacklo_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
 						     (__v32qi) __B,
 						     (__v32qi)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask32) __U);
 }
 
@@ -3063,7 +3063,7 @@  _mm_maskz_unpacklo_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
 						     (__v16qi) __B,
 						     (__v16qi)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask16) __U);
 }
 
@@ -3085,7 +3085,7 @@  _mm256_maskz_unpacklo_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
 						     (__v16hi) __B,
 						     (__v16hi)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask16) __U);
 }
 
@@ -3107,7 +3107,7 @@  _mm_maskz_unpacklo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
 						     (__v8hi) __B,
 						     (__v8hi)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -3481,7 +3481,7 @@  _mm256_maskz_shuffle_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
 						  (__v32qi) __B,
 						  (__v32qi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask32) __U);
 }
 
@@ -3503,7 +3503,7 @@  _mm_maskz_shuffle_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
 						  (__v16qi) __B,
 						  (__v16qi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask16) __U);
 }
 
@@ -3514,7 +3514,7 @@  _mm256_maskz_packs_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
 						    (__v16hi) __B,
 						    (__v32qi)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    __M);
 }
 
@@ -3536,7 +3536,7 @@  _mm_maskz_packs_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
 						    (__v8hi) __B,
 						    (__v16qi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    __M);
 }
 
@@ -3558,7 +3558,7 @@  _mm256_maskz_packus_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
 						    (__v16hi) __B,
 						    (__v32qi)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    __M);
 }
 
@@ -3580,7 +3580,7 @@  _mm_maskz_packus_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
 						    (__v8hi) __B,
 						    (__v16qi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    __M);
 }
 
@@ -3610,7 +3610,7 @@  _mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
 						 (__v32qi)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask32) __U);
 }
 
@@ -3629,7 +3629,7 @@  _mm_maskz_abs_epi8 (__mmask16 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
 						 (__v16qi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask16) __U);
 }
 
@@ -3648,7 +3648,7 @@  _mm256_maskz_abs_epi16 (__mmask16 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
 						 (__v16hi)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask16) __U);
 }
 
@@ -3667,7 +3667,7 @@  _mm_maskz_abs_epi16 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
 						 (__v8hi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -3804,7 +3804,7 @@  _mm_maskz_subs_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
 						  (__v16qi) __B,
 						  (__v16qi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask16) __U);
 }
 
@@ -3826,7 +3826,7 @@  _mm_maskz_subs_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
 						  (__v8hi) __B,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -3848,7 +3848,7 @@  _mm_maskz_subs_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
 						   (__v16qi) __B,
 						   (__v16qi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   (__mmask16) __U);
 }
 
@@ -3870,7 +3870,7 @@  _mm_maskz_subs_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
 						   (__v8hi) __B,
 						   (__v8hi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   (__mmask8) __U);
 }
 
@@ -3892,7 +3892,7 @@  _mm256_maskz_srl_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
   return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
 						 (__v8hi) __B,
 						 (__v16hi)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask16) __U);
 }
 
@@ -3914,7 +3914,7 @@  _mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
 						 (__v8hi) __B,
 						 (__v8hi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -3936,7 +3936,7 @@  _mm256_maskz_sra_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
   return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
 						 (__v8hi) __B,
 						 (__v16hi)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask16) __U);
 }
 
@@ -3958,7 +3958,7 @@  _mm_maskz_sra_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
 						 (__v8hi) __B,
 						 (__v8hi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -3969,7 +3969,7 @@  _mm_maskz_adds_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
 						  (__v8hi) __B,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -3991,7 +3991,7 @@  _mm_maskz_adds_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
 						   (__v16qi) __B,
 						   (__v16qi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   (__mmask16) __U);
 }
 
@@ -4013,7 +4013,7 @@  _mm_maskz_adds_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
 						   (__v8hi) __B,
 						   (__v8hi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   (__mmask8) __U);
 }
 
@@ -4035,7 +4035,7 @@  _mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
 						 (__v16qi) __B,
 						 (__v16qi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask16) __U);
 }
 
@@ -4057,7 +4057,7 @@  _mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
 						 (__v8hi) __B,
 						 (__v8hi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -4079,7 +4079,7 @@  _mm_maskz_adds_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
 						  (__v16qi) __B,
 						  (__v16qi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask16) __U);
 }
 
@@ -4114,7 +4114,7 @@  _mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
 						  (__v16qi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -4125,7 +4125,7 @@  _mm256_srav_epi16 (__m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
 						  (__v16hi) __B,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) -1);
 }
 
@@ -4147,7 +4147,7 @@  _mm256_maskz_srav_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
 						  (__v16hi) __B,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) __U);
 }
 
@@ -4158,7 +4158,7 @@  _mm_srav_epi16 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
 						 (__v8hi) __B,
 						 (__v8hi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) -1);
 }
 
@@ -4180,7 +4180,7 @@  _mm_maskz_srav_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
 						 (__v8hi) __B,
 						 (__v8hi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -4191,7 +4191,7 @@  _mm256_srlv_epi16 (__m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
 						  (__v16hi) __B,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) -1);
 }
 
@@ -4213,7 +4213,7 @@  _mm256_maskz_srlv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
 						  (__v16hi) __B,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) __U);
 }
 
@@ -4224,7 +4224,7 @@  _mm_srlv_epi16 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
 						 (__v8hi) __B,
 						 (__v8hi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) -1);
 }
 
@@ -4246,7 +4246,7 @@  _mm_maskz_srlv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
 						 (__v8hi) __B,
 						 (__v8hi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -4257,7 +4257,7 @@  _mm256_sllv_epi16 (__m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
 						  (__v16hi) __B,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) -1);
 }
 
@@ -4279,7 +4279,7 @@  _mm256_maskz_sllv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
 						  (__v16hi) __B,
 						  (__v16hi)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask16) __U);
 }
 
@@ -4290,7 +4290,7 @@  _mm_sllv_epi16 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
 						 (__v8hi) __B,
 						 (__v8hi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) -1);
 }
 
@@ -4312,7 +4312,7 @@  _mm_maskz_sllv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
 						 (__v8hi) __B,
 						 (__v8hi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -4334,7 +4334,7 @@  _mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
 						 (__v8hi) __B,
 						 (__v8hi)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -4356,7 +4356,7 @@  _mm256_maskz_sll_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
   return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
 						 (__v8hi) __B,
 						 (__v16hi)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask16) __U);
 }
 
@@ -4367,7 +4367,7 @@  _mm256_maskz_packus_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
 						    (__v8si) __B,
 						    (__v16hi)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    __M);
 }
 
@@ -4389,7 +4389,7 @@  _mm_maskz_packus_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
 						    (__v4si) __B,
 						    (__v8hi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    __M);
 }
 
@@ -4410,7 +4410,7 @@  _mm256_maskz_packs_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
 						    (__v8si) __B,
 						    (__v16hi)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    __M);
 }
 
@@ -4432,7 +4432,7 @@  _mm_maskz_packs_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
 						    (__v4si) __B,
 						    (__v8hi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    __M);
 }
 
diff --git a/gcc/config/i386/avx512vldqintrin.h b/gcc/config/i386/avx512vldqintrin.h
index be4d59c34e4..7bb87bbd9be 100644
--- a/gcc/config/i386/avx512vldqintrin.h
+++ b/gcc/config/i386/avx512vldqintrin.h
@@ -40,7 +40,7 @@  _mm256_cvttpd_epi64 (__m256d __A)
 {
   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
 						     (__v4di)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) -1);
 }
 
@@ -59,7 +59,7 @@  _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A)
 {
   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
 						     (__v4di)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) __U);
 }
 
@@ -69,7 +69,7 @@  _mm_cvttpd_epi64 (__m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
 						     (__v2di)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) -1);
 }
 
@@ -88,7 +88,7 @@  _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
 						     (__v2di)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -98,7 +98,7 @@  _mm256_cvttpd_epu64 (__m256d __A)
 {
   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
 						      (__v4di)
-						      _mm256_setzero_si256 (),
+						      _mm256_avx512_setzero_si256 (),
 						      (__mmask8) -1);
 }
 
@@ -117,7 +117,7 @@  _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A)
 {
   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
 						      (__v4di)
-						      _mm256_setzero_si256 (),
+						      _mm256_avx512_setzero_si256 (),
 						      (__mmask8) __U);
 }
 
@@ -127,7 +127,7 @@  _mm_cvttpd_epu64 (__m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
 						      (__v2di)
-						      _mm_setzero_si128 (),
+						      _mm_avx512_setzero_si128 (),
 						      (__mmask8) -1);
 }
 
@@ -146,7 +146,7 @@  _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
 						      (__v2di)
-						      _mm_setzero_si128 (),
+						      _mm_avx512_setzero_si128 (),
 						      (__mmask8) __U);
 }
 
@@ -156,7 +156,7 @@  _mm256_cvtpd_epi64 (__m256d __A)
 {
   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
 						    (__v4di)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask8) -1);
 }
 
@@ -175,7 +175,7 @@  _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A)
 {
   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
 						    (__v4di)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask8) __U);
 }
 
@@ -185,7 +185,7 @@  _mm_cvtpd_epi64 (__m128d __A)
 {
   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
 						    (__v2di)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) -1);
 }
 
@@ -204,7 +204,7 @@  _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A)
 {
   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
 						    (__v2di)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -214,7 +214,7 @@  _mm256_cvtpd_epu64 (__m256d __A)
 {
   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
 						     (__v4di)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) -1);
 }
 
@@ -233,7 +233,7 @@  _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A)
 {
   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
 						     (__v4di)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) __U);
 }
 
@@ -243,7 +243,7 @@  _mm_cvtpd_epu64 (__m128d __A)
 {
   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
 						     (__v2di)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) -1);
 }
 
@@ -262,7 +262,7 @@  _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A)
 {
   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
 						     (__v2di)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -272,7 +272,7 @@  _mm256_cvttps_epi64 (__m128 __A)
 {
   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
 						     (__v4di)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) -1);
 }
 
@@ -291,7 +291,7 @@  _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
 {
   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
 						     (__v4di)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) __U);
 }
 
@@ -301,7 +301,7 @@  _mm_cvttps_epi64 (__m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
 						     (__v2di)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) -1);
 }
 
@@ -320,7 +320,7 @@  _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
 						     (__v2di)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -330,7 +330,7 @@  _mm256_cvttps_epu64 (__m128 __A)
 {
   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
 						      (__v4di)
-						      _mm256_setzero_si256 (),
+						      _mm256_avx512_setzero_si256 (),
 						      (__mmask8) -1);
 }
 
@@ -349,7 +349,7 @@  _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
 {
   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
 						      (__v4di)
-						      _mm256_setzero_si256 (),
+						      _mm256_avx512_setzero_si256 (),
 						      (__mmask8) __U);
 }
 
@@ -359,7 +359,7 @@  _mm_cvttps_epu64 (__m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
 						      (__v2di)
-						      _mm_setzero_si128 (),
+						      _mm_avx512_setzero_si128 (),
 						      (__mmask8) -1);
 }
 
@@ -378,7 +378,7 @@  _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
 						      (__v2di)
-						      _mm_setzero_si128 (),
+						      _mm_avx512_setzero_si128 (),
 						      (__mmask8) __U);
 }
 
@@ -409,7 +409,7 @@  _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
   return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
 							   __A,
 							   (__v4df)
-							   _mm256_setzero_ps (),
+							   _mm256_avx512_setzero_ps (),
 							   __M);
 }
 
@@ -440,7 +440,7 @@  _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
   return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
 							   __A,
 							   (__v4di)
-							   _mm256_setzero_si256 (),
+							   _mm256_avx512_setzero_si256 (),
 							   __M);
 }
 
@@ -468,7 +468,7 @@  _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
 {
   return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
 							  (__v8sf)
-							  _mm256_setzero_ps (),
+							  _mm256_avx512_setzero_ps (),
 							  __M);
 }
 
@@ -499,7 +499,7 @@  _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
   return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
 							   __A,
 							   (__v8si)
-							   _mm256_setzero_si256 (),
+							   _mm256_avx512_setzero_si256 (),
 							   __M);
 }
 
@@ -530,7 +530,7 @@  _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
   return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
 							   __A,
 							   (__v4si)
-							   _mm_setzero_si128 (),
+							   _mm_avx512_setzero_si128 (),
 							   __M);
 }
 
@@ -559,7 +559,7 @@  _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
 						  (__v4di) __B,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __U);
 }
 
@@ -588,7 +588,7 @@  _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
 						  (__v2di) __B,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -610,7 +610,7 @@  _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B)
   return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
 						  (__v4df) __B,
 						  (__v4df)
-						  _mm256_setzero_pd (),
+						  _mm256_avx512_setzero_pd (),
 						  (__mmask8) __U);
 }
 
@@ -632,7 +632,7 @@  _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
 						  (__v2df) __B,
 						  (__v2df)
-						  _mm_setzero_pd (),
+						  _mm_avx512_setzero_pd (),
 						  (__mmask8) __U);
 }
 
@@ -654,7 +654,7 @@  _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B)
   return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
 						 (__v8sf) __B,
 						 (__v8sf)
-						 _mm256_setzero_ps (),
+						 _mm256_avx512_setzero_ps (),
 						 (__mmask8) __U);
 }
 
@@ -675,7 +675,7 @@  _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
 						 (__v4sf) __B,
 						 (__v4sf)
-						 _mm_setzero_ps (),
+						 _mm_avx512_setzero_ps (),
 						 (__mmask8) __U);
 }
 
@@ -685,7 +685,7 @@  _mm256_cvtps_epi64 (__m128 __A)
 {
   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
 						    (__v4di)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask8) -1);
 }
 
@@ -704,7 +704,7 @@  _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
 {
   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
 						    (__v4di)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask8) __U);
 }
 
@@ -714,7 +714,7 @@  _mm_cvtps_epi64 (__m128 __A)
 {
   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
 						    (__v2di)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) -1);
 }
 
@@ -733,7 +733,7 @@  _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
 						    (__v2di)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -743,7 +743,7 @@  _mm256_cvtps_epu64 (__m128 __A)
 {
   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
 						     (__v4di)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) -1);
 }
 
@@ -762,7 +762,7 @@  _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
 {
   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
 						     (__v4di)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) __U);
 }
 
@@ -772,7 +772,7 @@  _mm_cvtps_epu64 (__m128 __A)
 {
   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
 						     (__v2di)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) -1);
 }
 
@@ -791,7 +791,7 @@  _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
 						     (__v2di)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -801,7 +801,7 @@  _mm256_cvtepi64_ps (__m256i __A)
 {
   return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) -1);
 }
 
@@ -820,7 +820,7 @@  _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A)
 {
   return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -830,7 +830,7 @@  _mm_cvtepi64_ps (__m128i __A)
 {
   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) -1);
 }
 
@@ -849,7 +849,7 @@  _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A)
 {
   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -859,7 +859,7 @@  _mm256_cvtepu64_ps (__m256i __A)
 {
   return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
 						    (__v4sf)
-						    _mm_setzero_ps (),
+						    _mm_avx512_setzero_ps (),
 						    (__mmask8) -1);
 }
 
@@ -878,7 +878,7 @@  _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A)
 {
   return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
 						    (__v4sf)
-						    _mm_setzero_ps (),
+						    _mm_avx512_setzero_ps (),
 						    (__mmask8) __U);
 }
 
@@ -888,7 +888,7 @@  _mm_cvtepu64_ps (__m128i __A)
 {
   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
 						    (__v4sf)
-						    _mm_setzero_ps (),
+						    _mm_avx512_setzero_ps (),
 						    (__mmask8) -1);
 }
 
@@ -907,7 +907,7 @@  _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A)
 {
   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
 						    (__v4sf)
-						    _mm_setzero_ps (),
+						    _mm_avx512_setzero_ps (),
 						    (__mmask8) __U);
 }
 
@@ -917,7 +917,7 @@  _mm256_cvtepi64_pd (__m256i __A)
 {
   return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
 						    (__v4df)
-						    _mm256_setzero_pd (),
+						    _mm256_avx512_setzero_pd (),
 						    (__mmask8) -1);
 }
 
@@ -936,7 +936,7 @@  _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A)
 {
   return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
 						    (__v4df)
-						    _mm256_setzero_pd (),
+						    _mm256_avx512_setzero_pd (),
 						    (__mmask8) __U);
 }
 
@@ -946,7 +946,7 @@  _mm_cvtepi64_pd (__m128i __A)
 {
   return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
 						    (__v2df)
-						    _mm_setzero_pd (),
+						    _mm_avx512_setzero_pd (),
 						    (__mmask8) -1);
 }
 
@@ -965,7 +965,7 @@  _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A)
 {
   return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
 						    (__v2df)
-						    _mm_setzero_pd (),
+						    _mm_avx512_setzero_pd (),
 						    (__mmask8) __U);
 }
 
@@ -975,7 +975,7 @@  _mm256_cvtepu64_pd (__m256i __A)
 {
   return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
 						     (__v4df)
-						     _mm256_setzero_pd (),
+						     _mm256_avx512_setzero_pd (),
 						     (__mmask8) -1);
 }
 
@@ -994,7 +994,7 @@  _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A)
 {
   return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
 						     (__v4df)
-						     _mm256_setzero_pd (),
+						     _mm256_avx512_setzero_pd (),
 						     (__mmask8) __U);
 }
 
@@ -1016,7 +1016,7 @@  _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B)
   return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
 						 (__v4df) __B,
 						 (__v4df)
-						 _mm256_setzero_pd (),
+						 _mm256_avx512_setzero_pd (),
 						 (__mmask8) __U);
 }
 
@@ -1037,7 +1037,7 @@  _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
 						 (__v2df) __B,
 						 (__v2df)
-						 _mm_setzero_pd (),
+						 _mm_avx512_setzero_pd (),
 						 (__mmask8) __U);
 }
 
@@ -1058,7 +1058,7 @@  _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B)
   return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
 						(__v8sf) __B,
 						(__v8sf)
-						_mm256_setzero_ps (),
+						_mm256_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -1079,7 +1079,7 @@  _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
 						(__v4sf) __B,
 						(__v4sf)
-						_mm_setzero_ps (),
+						_mm_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -1089,7 +1089,7 @@  _mm_cvtepu64_pd (__m128i __A)
 {
   return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
 						     (__v2df)
-						     _mm_setzero_pd (),
+						     _mm_avx512_setzero_pd (),
 						     (__mmask8) -1);
 }
 
@@ -1108,7 +1108,7 @@  _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A)
 {
   return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
 						     (__v2df)
-						     _mm_setzero_pd (),
+						     _mm_avx512_setzero_pd (),
 						     (__mmask8) __U);
 }
 
@@ -1130,7 +1130,7 @@  _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B)
   return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
 						 (__v4df) __B,
 						 (__v4df)
-						 _mm256_setzero_pd (),
+						 _mm256_avx512_setzero_pd (),
 						 (__mmask8) __U);
 }
 
@@ -1151,7 +1151,7 @@  _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
 						 (__v2df) __B,
 						 (__v2df)
-						 _mm_setzero_pd (),
+						 _mm_avx512_setzero_pd (),
 						 (__mmask8) __U);
 }
 
@@ -1172,7 +1172,7 @@  _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B)
   return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
 						(__v8sf) __B,
 						(__v8sf)
-						_mm256_setzero_ps (),
+						_mm256_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -1193,7 +1193,7 @@  _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
 						(__v4sf) __B,
 						(__v4sf)
-						_mm_setzero_ps (),
+						_mm_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -1214,7 +1214,7 @@  _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B)
   return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
 						(__v4df) __B,
 						(__v4df)
-						_mm256_setzero_pd (),
+						_mm256_avx512_setzero_pd (),
 						(__mmask8) __U);
 }
 
@@ -1235,7 +1235,7 @@  _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
 						(__v2df) __B,
 						(__v2df)
-						_mm_setzero_pd (),
+						_mm_avx512_setzero_pd (),
 						(__mmask8) __U);
 }
 
@@ -1256,7 +1256,7 @@  _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B)
   return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
 					       (__v8sf) __B,
 					       (__v8sf)
-					       _mm256_setzero_ps (),
+					       _mm256_avx512_setzero_ps (),
 					       (__mmask8) __U);
 }
 
@@ -1277,7 +1277,7 @@  _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
 					       (__v4sf) __B,
 					       (__v4sf)
-					       _mm_setzero_ps (),
+					       _mm_avx512_setzero_ps (),
 					       (__mmask8) __U);
 }
 
@@ -1345,7 +1345,7 @@  _mm256_extractf64x2_pd (__m256d __A, const int __imm)
   return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
 							 __imm,
 							 (__v2df)
-							 _mm_setzero_pd (),
+							 _mm_avx512_setzero_pd (),
 							 (__mmask8) -1);
 }
 
@@ -1369,7 +1369,7 @@  _mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A,
   return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
 							 __imm,
 							 (__v2df)
-							 _mm_setzero_pd (),
+							 _mm_avx512_setzero_pd (),
 							 (__mmask8)
 							 __U);
 }
@@ -1381,7 +1381,7 @@  _mm256_extracti64x2_epi64 (__m256i __A, const int __imm)
   return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
 							 __imm,
 							 (__v2di)
-							 _mm_setzero_si128 (),
+							 _mm_avx512_setzero_si128 (),
 							 (__mmask8) -1);
 }
 
@@ -1405,7 +1405,7 @@  _mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A,
   return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
 							 __imm,
 							 (__v2di)
-							 _mm_setzero_si128 (),
+							 _mm_avx512_setzero_si128 (),
 							 (__mmask8)
 							 __U);
 }
@@ -1416,7 +1416,7 @@  _mm256_reduce_pd (__m256d __A, int __B)
 {
   return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
 						    (__v4df)
-						    _mm256_setzero_pd (),
+						    _mm256_avx512_setzero_pd (),
 						    (__mmask8) -1);
 }
 
@@ -1435,7 +1435,7 @@  _mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B)
 {
   return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
 						    (__v4df)
-						    _mm256_setzero_pd (),
+						    _mm256_avx512_setzero_pd (),
 						    (__mmask8) __U);
 }
 
@@ -1445,7 +1445,7 @@  _mm_reduce_pd (__m128d __A, int __B)
 {
   return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
 						    (__v2df)
-						    _mm_setzero_pd (),
+						    _mm_avx512_setzero_pd (),
 						    (__mmask8) -1);
 }
 
@@ -1464,7 +1464,7 @@  _mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B)
 {
   return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
 						    (__v2df)
-						    _mm_setzero_pd (),
+						    _mm_avx512_setzero_pd (),
 						    (__mmask8) __U);
 }
 
@@ -1474,7 +1474,7 @@  _mm256_reduce_ps (__m256 __A, int __B)
 {
   return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
 						   (__v8sf)
-						   _mm256_setzero_ps (),
+						   _mm256_avx512_setzero_ps (),
 						   (__mmask8) -1);
 }
 
@@ -1493,7 +1493,7 @@  _mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B)
 {
   return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
 						   (__v8sf)
-						   _mm256_setzero_ps (),
+						   _mm256_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -1503,7 +1503,7 @@  _mm_reduce_ps (__m128 __A, int __B)
 {
   return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) -1);
 }
 
@@ -1522,7 +1522,7 @@  _mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B)
 {
   return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -1533,7 +1533,7 @@  _mm256_range_pd (__m256d __A, __m256d __B, int __C)
   return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
 						   (__v4df) __B, __C,
 						   (__v4df)
-						   _mm256_setzero_pd (),
+						   _mm256_avx512_setzero_pd (),
 						   (__mmask8) -1);
 }
 
@@ -1555,7 +1555,7 @@  _mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C)
   return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
 						   (__v4df) __B, __C,
 						   (__v4df)
-						   _mm256_setzero_pd (),
+						   _mm256_avx512_setzero_pd (),
 						   (__mmask8) __U);
 }
 
@@ -1566,7 +1566,7 @@  _mm_range_pd (__m128d __A, __m128d __B, int __C)
   return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
 						   (__v2df) __B, __C,
 						   (__v2df)
-						   _mm_setzero_pd (),
+						   _mm_avx512_setzero_pd (),
 						   (__mmask8) -1);
 }
 
@@ -1588,7 +1588,7 @@  _mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
   return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
 						   (__v2df) __B, __C,
 						   (__v2df)
-						   _mm_setzero_pd (),
+						   _mm_avx512_setzero_pd (),
 						   (__mmask8) __U);
 }
 
@@ -1599,7 +1599,7 @@  _mm256_range_ps (__m256 __A, __m256 __B, int __C)
   return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
 						  (__v8sf) __B, __C,
 						  (__v8sf)
-						  _mm256_setzero_ps (),
+						  _mm256_avx512_setzero_ps (),
 						  (__mmask8) -1);
 }
 
@@ -1621,7 +1621,7 @@  _mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C)
   return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
 						  (__v8sf) __B, __C,
 						  (__v8sf)
-						  _mm256_setzero_ps (),
+						  _mm256_avx512_setzero_ps (),
 						  (__mmask8) __U);
 }
 
@@ -1632,7 +1632,7 @@  _mm_range_ps (__m128 __A, __m128 __B, int __C)
   return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
 						  (__v4sf) __B, __C,
 						  (__v4sf)
-						  _mm_setzero_ps (),
+						  _mm_avx512_setzero_ps (),
 						  (__mmask8) -1);
 }
 
@@ -1654,7 +1654,7 @@  _mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C)
   return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
 						  (__v4sf) __B, __C,
 						  (__v4sf)
-						  _mm_setzero_ps (),
+						  _mm_avx512_setzero_ps (),
 						  (__mmask8) __U);
 }
 
@@ -1735,7 +1735,7 @@  _mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm)
 							(__v2di) __B,
 							__imm,
 							(__v4di)
-							_mm256_setzero_si256 (),
+							_mm256_avx512_setzero_si256 (),
 							(__mmask8) -1);
 }
 
@@ -1761,7 +1761,7 @@  _mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B,
 							(__v2di) __B,
 							__imm,
 							(__v4di)
-							_mm256_setzero_si256 (),
+							_mm256_avx512_setzero_si256 (),
 							(__mmask8)
 							__U);
 }
@@ -1774,7 +1774,7 @@  _mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm)
 							(__v2df) __B,
 							__imm,
 							(__v4df)
-							_mm256_setzero_pd (),
+							_mm256_avx512_setzero_pd (),
 							(__mmask8) -1);
 }
 
@@ -1800,7 +1800,7 @@  _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
 							(__v2df) __B,
 							__imm,
 							(__v4df)
-							_mm256_setzero_pd (),
+							_mm256_avx512_setzero_pd (),
 							(__mmask8)
 							__U);
 }
@@ -1809,7 +1809,7 @@  _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
 #define _mm256_insertf64x2(X, Y, C)                                     \
   ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
     (__v2df)(__m128d) (Y), (int) (C),					\
-    (__v4df)(__m256d)_mm256_setzero_pd(),				\
+    (__v4df)(__m256d)_mm256_avx512_setzero_pd(),				\
     (__mmask8)-1))
 
 #define _mm256_mask_insertf64x2(W, U, X, Y, C)                          \
@@ -1821,13 +1821,13 @@  _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
 #define _mm256_maskz_insertf64x2(U, X, Y, C)				\
   ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
     (__v2df)(__m128d) (Y), (int) (C),					\
-    (__v4df)(__m256d)_mm256_setzero_pd(),				\
+    (__v4df)(__m256d)_mm256_avx512_setzero_pd(),				\
     (__mmask8)(U)))
 
 #define _mm256_inserti64x2(X, Y, C)                                     \
   ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
     (__v2di)(__m128i) (Y), (int) (C),					\
-    (__v4di)(__m256i)_mm256_setzero_si256 (),				\
+    (__v4di)(__m256i)_mm256_avx512_setzero_si256 (),				\
     (__mmask8)-1))
 
 #define _mm256_mask_inserti64x2(W, U, X, Y, C)                          \
@@ -1839,12 +1839,12 @@  _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
 #define _mm256_maskz_inserti64x2(U, X, Y, C)                            \
   ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
     (__v2di)(__m128i) (Y), (int) (C),					\
-    (__v4di)(__m256i)_mm256_setzero_si256 (),				\
+    (__v4di)(__m256i)_mm256_avx512_setzero_si256 (),				\
     (__mmask8)(U)))
 
 #define _mm256_extractf64x2_pd(X, C)                                    \
   ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
-    (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
+    (int) (C), (__v2df)(__m128d) _mm_avx512_setzero_pd(), (__mmask8)-1))
 
 #define _mm256_mask_extractf64x2_pd(W, U, X, C)                         \
   ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
@@ -1852,11 +1852,11 @@  _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
 
 #define _mm256_maskz_extractf64x2_pd(U, X, C)                           \
   ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
-    (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
+    (int) (C), (__v2df)(__m128d) _mm_avx512_setzero_pd(), (__mmask8) (U)))
 
 #define _mm256_extracti64x2_epi64(X, C)                                 \
   ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
-    (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1))
+    (int) (C), (__v2di)(__m128i) _mm_avx512_setzero_si128 (), (__mmask8)-1))
 
 #define _mm256_mask_extracti64x2_epi64(W, U, X, C)                     \
   ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
@@ -1864,11 +1864,11 @@  _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
 
 #define _mm256_maskz_extracti64x2_epi64(U, X, C)                        \
   ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
-    (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
+    (int) (C), (__v2di)(__m128i) _mm_avx512_setzero_si128 (), (__mmask8) (U)))
 
 #define _mm256_reduce_pd(A, B)						\
   ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A),	\
-    (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
+    (int)(B), (__v4df)_mm256_avx512_setzero_pd(), (__mmask8)-1))
 
 #define _mm256_mask_reduce_pd(W, U, A, B)				\
   ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A),	\
@@ -1876,11 +1876,11 @@  _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
 
 #define _mm256_maskz_reduce_pd(U, A, B)					\
   ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A),	\
-    (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
+    (int)(B), (__v4df)_mm256_avx512_setzero_pd(), (__mmask8)(U)))
 
 #define _mm_reduce_pd(A, B)						\
   ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A),	\
-    (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1))
+    (int)(B), (__v2df)_mm_avx512_setzero_pd(), (__mmask8)-1))
 
 #define _mm_mask_reduce_pd(W, U, A, B)					\
   ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A),	\
@@ -1888,11 +1888,11 @@  _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
 
 #define _mm_maskz_reduce_pd(U, A, B)					\
   ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A),	\
-    (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
+    (int)(B), (__v2df)_mm_avx512_setzero_pd(), (__mmask8)(U)))
 
 #define _mm256_reduce_ps(A, B)						\
   ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A),	\
-    (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
+    (int)(B), (__v8sf)_mm256_avx512_setzero_ps(), (__mmask8)-1))
 
 #define _mm256_mask_reduce_ps(W, U, A, B)				\
   ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A),	\
@@ -1900,11 +1900,11 @@  _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
 
 #define _mm256_maskz_reduce_ps(U, A, B)					\
   ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A),	\
-    (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
+    (int)(B), (__v8sf)_mm256_avx512_setzero_ps(), (__mmask8)(U)))
 
 #define _mm_reduce_ps(A, B)						\
   ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A),	\
-    (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
+    (int)(B), (__v4sf)_mm_avx512_setzero_ps(), (__mmask8)-1))
 
 #define _mm_mask_reduce_ps(W, U, A, B)					\
   ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A),	\
@@ -1912,27 +1912,27 @@  _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
 
 #define _mm_maskz_reduce_ps(U, A, B)					\
   ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A),	\
-    (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
+    (int)(B), (__v4sf)_mm_avx512_setzero_ps(), (__mmask8)(U)))
 
 #define _mm256_range_pd(A, B, C)					\
   ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A),	\
     (__v4df)(__m256d)(B), (int)(C),					\
-    (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
+    (__v4df)_mm256_avx512_setzero_pd(), (__mmask8)-1))
 
 #define _mm256_maskz_range_pd(U, A, B, C)				\
   ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A),	\
     (__v4df)(__m256d)(B), (int)(C),					\
-    (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
+    (__v4df)_mm256_avx512_setzero_pd(), (__mmask8)(U)))
 
 #define _mm_range_pd(A, B, C)						\
   ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A),	\
     (__v2df)(__m128d)(B), (int)(C),					\
-    (__v2df)_mm_setzero_pd(), (__mmask8)-1))
+    (__v2df)_mm_avx512_setzero_pd(), (__mmask8)-1))
 
 #define _mm256_range_ps(A, B, C)					\
   ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A),	\
     (__v8sf)(__m256)(B), (int)(C),					\
-    (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
+    (__v8sf)_mm256_avx512_setzero_ps(), (__mmask8)-1))
 
 #define _mm256_mask_range_ps(W, U, A, B, C)				\
   ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A),	\
@@ -1942,12 +1942,12 @@  _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
 #define _mm256_maskz_range_ps(U, A, B, C)				\
   ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A),	\
     (__v8sf)(__m256)(B), (int)(C),					\
-    (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
+    (__v8sf)_mm256_avx512_setzero_ps(), (__mmask8)(U)))
 
 #define _mm_range_ps(A, B, C)						\
   ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A),	\
     (__v4sf)(__m128)(B), (int)(C),					\
-    (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
+    (__v4sf)_mm_avx512_setzero_ps(), (__mmask8)-1))
 
 #define _mm_mask_range_ps(W, U, A, B, C)				\
   ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A),	\
@@ -1957,7 +1957,7 @@  _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
 #define _mm_maskz_range_ps(U, A, B, C)					\
   ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A),	\
     (__v4sf)(__m128)(B), (int)(C),					\
-    (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
+    (__v4sf)_mm_avx512_setzero_ps(), (__mmask8)(U)))
 
 #define _mm256_mask_range_pd(W, U, A, B, C)				\
   ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A),	\
@@ -1972,7 +1972,7 @@  _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
 #define _mm_maskz_range_pd(U, A, B, C)					\
   ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A),	\
     (__v2df)(__m128d)(B), (int)(C),					\
-    (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
+    (__v2df)_mm_avx512_setzero_pd(), (__mmask8)(U)))
 
 #define _mm256_mask_fpclass_pd_mask(u, X, C)                            \
   ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index a40aa91b948..2b33b82b7ef 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -45,6 +45,31 @@  typedef long long __v2di_u __attribute__ ((__vector_size__ (16),	\
 typedef long long __v4di_u __attribute__ ((__vector_size__ (32),	\
 					   __may_alias__, __aligned__ (1)));
 
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avx512_setzero_si128 (void)
+{
+  return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_avx512_setzero_pd (void)
+{
+  return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_avx512_setzero_ps (void)
+{
+  return __extension__ (__m256){ 0.0, 0.0, 0.0, 0.0,
+				 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_avx512_setzero_si256 (void)
+{
+  return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
+}
+
 extern __inline __m256d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
@@ -60,7 +85,7 @@  _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
 {
   return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
 						  (__v4df)
-						  _mm256_setzero_pd (),
+						  _mm256_avx512_setzero_pd (),
 						  (__mmask8) __U);
 }
 
@@ -79,7 +104,7 @@  _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
 {
   return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
 						  (__v2df)
-						  _mm_setzero_pd (),
+						  _mm_avx512_setzero_pd (),
 						  (__mmask8) __U);
 }
 
@@ -98,7 +123,7 @@  _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
 {
   return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
 						   (__v4df)
-						   _mm256_setzero_pd (),
+						   _mm256_avx512_setzero_pd (),
 						   (__mmask8) __U);
 }
 
@@ -117,7 +142,7 @@  _mm_maskz_load_pd (__mmask8 __U, void const *__P)
 {
   return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
 						   (__v2df)
-						   _mm_setzero_pd (),
+						   _mm_avx512_setzero_pd (),
 						   (__mmask8) __U);
 }
 
@@ -154,7 +179,7 @@  _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
 {
   return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
 						 (__v8sf)
-						 _mm256_setzero_ps (),
+						 _mm256_avx512_setzero_ps (),
 						 (__mmask8) __U);
 }
 
@@ -173,7 +198,7 @@  _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
 {
   return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
 						 (__v4sf)
-						 _mm_setzero_ps (),
+						 _mm_avx512_setzero_ps (),
 						 (__mmask8) __U);
 }
 
@@ -192,7 +217,7 @@  _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
 {
   return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
 						  (__v8sf)
-						  _mm256_setzero_ps (),
+						  _mm256_avx512_setzero_ps (),
 						  (__mmask8) __U);
 }
 
@@ -211,7 +236,7 @@  _mm_maskz_load_ps (__mmask8 __U, void const *__P)
 {
   return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
 						  (__v4sf)
-						  _mm_setzero_ps (),
+						  _mm_avx512_setzero_ps (),
 						  (__mmask8) __U);
 }
 
@@ -248,7 +273,7 @@  _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
 						     (__v4di)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) __U);
 }
 
@@ -267,7 +292,7 @@  _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
 						     (__v2di)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -294,7 +319,7 @@  _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
 							(__v4di)
-							_mm256_setzero_si256 (),
+							_mm256_avx512_setzero_si256 (),
 							(__mmask8)
 							__U);
 }
@@ -322,7 +347,7 @@  _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
 							(__v2di)
-							_mm_setzero_si128 (),
+							_mm_avx512_setzero_si128 (),
 							(__mmask8)
 							__U);
 }
@@ -360,7 +385,7 @@  _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
 						     (__v8si)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) __U);
 }
 
@@ -379,7 +404,7 @@  _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
 						     (__v4si)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -406,7 +431,7 @@  _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
 							(__v8si)
-							_mm256_setzero_si256 (),
+							_mm256_avx512_setzero_si256 (),
 							(__mmask8)
 							__U);
 }
@@ -434,7 +459,7 @@  _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
 							(__v4si)
-							_mm_setzero_si128 (),
+							_mm_avx512_setzero_si128 (),
 							(__mmask8)
 							__U);
 }
@@ -488,7 +513,7 @@  _mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
 						 (__v2df) __B,
 						 (__v2df)
-						 _mm_setzero_pd (),
+						 _mm_avx512_setzero_pd (),
 						 (__mmask8) __U);
 }
 
@@ -510,7 +535,7 @@  _mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
   return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
 						 (__v4df) __B,
 						 (__v4df)
-						 _mm256_setzero_pd (),
+						 _mm256_avx512_setzero_pd (),
 						 (__mmask8) __U);
 }
 
@@ -531,7 +556,7 @@  _mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
 						(__v4sf) __B,
 						(__v4sf)
-						_mm_setzero_ps (),
+						_mm_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -552,7 +577,7 @@  _mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
   return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
 						(__v8sf) __B,
 						(__v8sf)
-						_mm256_setzero_ps (),
+						_mm256_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -573,7 +598,7 @@  _mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
 						 (__v2df) __B,
 						 (__v2df)
-						 _mm_setzero_pd (),
+						 _mm_avx512_setzero_pd (),
 						 (__mmask8) __U);
 }
 
@@ -595,7 +620,7 @@  _mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
   return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
 						 (__v4df) __B,
 						 (__v4df)
-						 _mm256_setzero_pd (),
+						 _mm256_avx512_setzero_pd (),
 						 (__mmask8) __U);
 }
 
@@ -616,7 +641,7 @@  _mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
 						(__v4sf) __B,
 						(__v4sf)
-						_mm_setzero_ps (),
+						_mm_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -637,7 +662,7 @@  _mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
   return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
 						(__v8sf) __B,
 						(__v8sf)
-						_mm256_setzero_ps (),
+						_mm256_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -670,7 +695,7 @@  _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
 {
   return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
 						   (__v4df)
-						   _mm256_setzero_pd (),
+						   _mm256_avx512_setzero_pd (),
 						   (__mmask8) __U);
 }
 
@@ -689,7 +714,7 @@  _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
 {
   return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
 						   (__v2df)
-						   _mm_setzero_pd (),
+						   _mm_avx512_setzero_pd (),
 						   (__mmask8) __U);
 }
 
@@ -726,7 +751,7 @@  _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
 {
   return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
 						  (__v8sf)
-						  _mm256_setzero_ps (),
+						  _mm256_avx512_setzero_ps (),
 						  (__mmask8) __U);
 }
 
@@ -745,7 +770,7 @@  _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
 {
   return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
 						  (__v4sf)
-						  _mm_setzero_ps (),
+						  _mm_avx512_setzero_ps (),
 						  (__mmask8) __U);
 }
 
@@ -789,7 +814,7 @@  _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
 						     (__v4di)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) __U);
 }
 
@@ -815,7 +840,7 @@  _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
 						     (__v2di)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -873,7 +898,7 @@  _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
 						     (__v8si)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) __U);
 }
 
@@ -899,7 +924,7 @@  _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
 						     (__v4si)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -1022,7 +1047,7 @@  _mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
 						 (__v8si)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -1041,7 +1066,7 @@  _mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
 						 (__v4si)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -1051,7 +1076,7 @@  _mm256_abs_epi64 (__m256i __A)
 {
   return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
 						 (__v4di)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) -1);
 }
 
@@ -1070,7 +1095,7 @@  _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
 						 (__v4di)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -1080,7 +1105,7 @@  _mm_abs_epi64 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
 						 (__v2di)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) -1);
 }
 
@@ -1099,7 +1124,7 @@  _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
 						 (__v2di)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -1109,7 +1134,7 @@  _mm256_cvtpd_epu32 (__m256d __A)
 {
   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
 						     (__v4si)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) -1);
 }
 
@@ -1128,7 +1153,7 @@  _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
 {
   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
 						     (__v4si)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -1138,7 +1163,7 @@  _mm_cvtpd_epu32 (__m128d __A)
 {
   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
 						     (__v4si)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) -1);
 }
 
@@ -1157,7 +1182,7 @@  _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
 {
   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
 						     (__v4si)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -1176,7 +1201,7 @@  _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
 {
   return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
 						     (__v8si)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) __U);
 }
 
@@ -1195,7 +1220,7 @@  _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
 						     (__v4si)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -1205,7 +1230,7 @@  _mm256_cvttps_epu32 (__m256 __A)
 {
   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
 						      (__v8si)
-						      _mm256_setzero_si256 (),
+						      _mm256_avx512_setzero_si256 (),
 						      (__mmask8) -1);
 }
 
@@ -1224,7 +1249,7 @@  _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
 {
   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
 						      (__v8si)
-						      _mm256_setzero_si256 (),
+						      _mm256_avx512_setzero_si256 (),
 						      (__mmask8) __U);
 }
 
@@ -1234,7 +1259,7 @@  _mm_cvttps_epu32 (__m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
 						      (__v4si)
-						      _mm_setzero_si128 (),
+						      _mm_avx512_setzero_si128 (),
 						      (__mmask8) -1);
 }
 
@@ -1253,7 +1278,7 @@  _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
 						      (__v4si)
-						      _mm_setzero_si128 (),
+						      _mm_avx512_setzero_si128 (),
 						      (__mmask8) __U);
 }
 
@@ -1272,7 +1297,7 @@  _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
 						     (__v4si)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -1291,7 +1316,7 @@  _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
 						     (__v4si)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -1301,7 +1326,7 @@  _mm256_cvttpd_epu32 (__m256d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
 						      (__v4si)
-						      _mm_setzero_si128 (),
+						      _mm_avx512_setzero_si128 (),
 						      (__mmask8) -1);
 }
 
@@ -1320,7 +1345,7 @@  _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
 						      (__v4si)
-						      _mm_setzero_si128 (),
+						      _mm_avx512_setzero_si128 (),
 						      (__mmask8) __U);
 }
 
@@ -1330,7 +1355,7 @@  _mm_cvttpd_epu32 (__m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
 						      (__v4si)
-						      _mm_setzero_si128 (),
+						      _mm_avx512_setzero_si128 (),
 						      (__mmask8) -1);
 }
 
@@ -1349,7 +1374,7 @@  _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
 						      (__v4si)
-						      _mm_setzero_si128 (),
+						      _mm_avx512_setzero_si128 (),
 						      (__mmask8) __U);
 }
 
@@ -1368,7 +1393,7 @@  _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
 {
   return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
 						    (__v4si)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -1387,7 +1412,7 @@  _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
 {
   return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
 						    (__v4si)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -1406,7 +1431,7 @@  _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
 {
   return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
 						    (__v4df)
-						    _mm256_setzero_pd (),
+						    _mm256_avx512_setzero_pd (),
 						    (__mmask8) __U);
 }
 
@@ -1425,7 +1450,7 @@  _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
 {
   return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
 						    (__v2df)
-						    _mm_setzero_pd (),
+						    _mm_avx512_setzero_pd (),
 						    (__mmask8) __U);
 }
 
@@ -1435,7 +1460,7 @@  _mm256_cvtepu32_pd (__m128i __A)
 {
   return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
 						     (__v4df)
-						     _mm256_setzero_pd (),
+						     _mm256_avx512_setzero_pd (),
 						     (__mmask8) -1);
 }
 
@@ -1454,7 +1479,7 @@  _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
 {
   return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
 						     (__v4df)
-						     _mm256_setzero_pd (),
+						     _mm256_avx512_setzero_pd (),
 						     (__mmask8) __U);
 }
 
@@ -1464,7 +1489,7 @@  _mm_cvtepu32_pd (__m128i __A)
 {
   return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
 						     (__v2df)
-						     _mm_setzero_pd (),
+						     _mm_avx512_setzero_pd (),
 						     (__mmask8) -1);
 }
 
@@ -1483,7 +1508,7 @@  _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
 {
   return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
 						     (__v2df)
-						     _mm_setzero_pd (),
+						     _mm_avx512_setzero_pd (),
 						     (__mmask8) __U);
 }
 
@@ -1502,7 +1527,7 @@  _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
 {
   return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
 						   (__v8sf)
-						   _mm256_setzero_ps (),
+						   _mm256_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -1521,7 +1546,7 @@  _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
 {
   return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -1531,7 +1556,7 @@  _mm256_cvtepu32_ps (__m256i __A)
 {
   return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
 						    (__v8sf)
-						    _mm256_setzero_ps (),
+						    _mm256_avx512_setzero_ps (),
 						    (__mmask8) -1);
 }
 
@@ -1550,7 +1575,7 @@  _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
 {
   return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
 						    (__v8sf)
-						    _mm256_setzero_ps (),
+						    _mm256_avx512_setzero_ps (),
 						    (__mmask8) __U);
 }
 
@@ -1560,7 +1585,7 @@  _mm_cvtepu32_ps (__m128i __A)
 {
   return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
 						    (__v4sf)
-						    _mm_setzero_ps (),
+						    _mm_avx512_setzero_ps (),
 						    (__mmask8) -1);
 }
 
@@ -1579,7 +1604,7 @@  _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
 {
   return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
 						    (__v4sf)
-						    _mm_setzero_ps (),
+						    _mm_avx512_setzero_ps (),
 						    (__mmask8) __U);
 }
 
@@ -1598,7 +1623,7 @@  _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
 {
   return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
 						    (__v4df)
-						    _mm256_setzero_pd (),
+						    _mm256_avx512_setzero_pd (),
 						    (__mmask8) __U);
 }
 
@@ -1617,7 +1642,7 @@  _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
 {
   return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
 						    (__v2df)
-						    _mm_setzero_pd (),
+						    _mm_avx512_setzero_pd (),
 						    (__mmask8) __U);
 }
 
@@ -1652,7 +1677,7 @@  _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
 						  (__v16qi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -1687,7 +1712,7 @@  _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
 						  (__v16qi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -1722,7 +1747,7 @@  _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
 						   (__v16qi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   __M);
 }
 
@@ -1757,7 +1782,7 @@  _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
 						   (__v16qi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   __M);
 }
 
@@ -1793,7 +1818,7 @@  _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
 						    (__v16qi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    __M);
 }
 
@@ -1829,7 +1854,7 @@  _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
 						    (__v16qi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    __M);
 }
 
@@ -1839,7 +1864,7 @@  _mm_cvtepi32_epi16 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) -1);
 }
 
@@ -1864,7 +1889,7 @@  _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -1874,7 +1899,7 @@  _mm256_cvtepi32_epi16 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) -1);
 }
 
@@ -1899,7 +1924,7 @@  _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -1909,7 +1934,7 @@  _mm_cvtsepi32_epi16 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
 						   (__v8hi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   (__mmask8) -1);
 }
 
@@ -1935,7 +1960,7 @@  _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
 						   (__v8hi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   __M);
 }
 
@@ -1970,7 +1995,7 @@  _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
 						   (__v8hi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   __M);
 }
 
@@ -2005,7 +2030,7 @@  _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
 						    (__v8hi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    __M);
 }
 
@@ -2040,7 +2065,7 @@  _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
 						    (__v8hi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    __M);
 }
 
@@ -2075,7 +2100,7 @@  _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
 						  (__v16qi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -2110,7 +2135,7 @@  _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
 						  (__v16qi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -2145,7 +2170,7 @@  _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
 						   (__v16qi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   __M);
 }
 
@@ -2180,7 +2205,7 @@  _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
 						   (__v16qi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   __M);
 }
 
@@ -2216,7 +2241,7 @@  _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
 						    (__v16qi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    __M);
 }
 
@@ -2252,7 +2277,7 @@  _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
 						    (__v16qi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    __M);
 }
 
@@ -2288,7 +2313,7 @@  _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -2323,7 +2348,7 @@  _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -2358,7 +2383,7 @@  _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
 						   (__v8hi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   __M);
 }
 
@@ -2393,7 +2418,7 @@  _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
 						   (__v8hi)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   __M);
 }
 
@@ -2428,7 +2453,7 @@  _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
 						    (__v8hi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    __M);
 }
 
@@ -2463,7 +2488,7 @@  _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
 						    (__v8hi)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    __M);
 }
 
@@ -2499,7 +2524,7 @@  _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -2534,7 +2559,7 @@  _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -2569,7 +2594,7 @@  _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
 						   (__v4si)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   __M);
 }
 
@@ -2605,7 +2630,7 @@  _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
 						   (__v4si)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   __M);
 }
 
@@ -2640,7 +2665,7 @@  _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
 						    (__v4si)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    __M);
 }
 
@@ -2675,7 +2700,7 @@  _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
 						    (__v4si)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    __M);
 }
 
@@ -2694,7 +2719,7 @@  _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
 {
   return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
 						      (__v8sf)
-						      _mm256_setzero_ps (),
+						      _mm256_avx512_setzero_ps (),
 						      __M);
 }
 
@@ -2713,7 +2738,7 @@  _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
 {
   return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
 						      (__v4sf)
-						      _mm_setzero_ps (),
+						      _mm_avx512_setzero_ps (),
 						      __M);
 }
 
@@ -2732,7 +2757,7 @@  _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
 {
   return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
 						       (__v4df)
-						       _mm256_setzero_pd (),
+						       _mm256_avx512_setzero_pd (),
 						       __M);
 }
 
@@ -2751,7 +2776,7 @@  _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
 						       (__v8si)
-						       _mm256_setzero_si256 (),
+						       _mm256_avx512_setzero_si256 (),
 						       __M);
 }
 
@@ -2769,7 +2794,7 @@  _mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
 {
   return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
 							   (__v8si)
-							   _mm256_setzero_si256 (),
+							   _mm256_avx512_setzero_si256 (),
 							   __M);
 }
 
@@ -2788,7 +2813,7 @@  _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
 						       (__v4si)
-						       _mm_setzero_si128 (),
+						       _mm_avx512_setzero_si128 (),
 						       __M);
 }
 
@@ -2806,7 +2831,7 @@  _mm_maskz_set1_epi32 (__mmask8 __M, int __A)
 {
   return (__m128i)
 	 __builtin_ia32_pbroadcastd128_gpr_mask (__A,
-						 (__v4si) _mm_setzero_si128 (),
+						 (__v4si) _mm_avx512_setzero_si128 (),
 						 __M);
 }
 
@@ -2825,7 +2850,7 @@  _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
 						       (__v4di)
-						       _mm256_setzero_si256 (),
+						       _mm256_avx512_setzero_si256 (),
 						       __M);
 }
 
@@ -2843,7 +2868,7 @@  _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
 {
   return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
 							   (__v4di)
-							   _mm256_setzero_si256 (),
+							   _mm256_avx512_setzero_si256 (),
 							   __M);
 }
 
@@ -2862,7 +2887,7 @@  _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
 						       (__v2di)
-						       _mm_setzero_si128 (),
+						       _mm_avx512_setzero_si128 (),
 						       __M);
 }
 
@@ -2880,7 +2905,7 @@  _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
 {
   return (__m128i)
 	 __builtin_ia32_pbroadcastq128_gpr_mask (__A,
-						 (__v2di) _mm_setzero_si128 (),
+						 (__v2di) _mm_avx512_setzero_si128 (),
 						 __M);
 }
 
@@ -2908,7 +2933,7 @@  _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
 {
   return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
 							  (__v8sf)
-							  _mm256_setzero_ps (),
+							  _mm256_avx512_setzero_ps (),
 							  __M);
 }
 
@@ -2939,7 +2964,7 @@  _mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
   return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
 							   __A,
 							   (__v8si)
-							   _mm256_setzero_si256 (),
+							   _mm256_avx512_setzero_si256 (),
 							   __M);
 }
 
@@ -2958,7 +2983,7 @@  _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
 						    (__v8si)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask8) __U);
 }
 
@@ -2977,7 +3002,7 @@  _mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
 						    (__v4si)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -2996,7 +3021,7 @@  _mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
 						    (__v4di)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask8) __U);
 }
 
@@ -3015,7 +3040,7 @@  _mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
 						    (__v2di)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -3034,7 +3059,7 @@  _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
 						    (__v8si)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask8) __U);
 }
 
@@ -3053,7 +3078,7 @@  _mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
 						    (__v4si)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -3072,7 +3097,7 @@  _mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
 						    (__v4di)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask8) __U);
 }
 
@@ -3091,7 +3116,7 @@  _mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
 						    (__v2di)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -3110,7 +3135,7 @@  _mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
 {
   return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
 						    (__v4di)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask8) __U);
 }
 
@@ -3129,7 +3154,7 @@  _mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
 {
   return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
 						    (__v2di)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -3148,7 +3173,7 @@  _mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
 						    (__v8si)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask8) __U);
 }
 
@@ -3167,7 +3192,7 @@  _mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
 						    (__v4si)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -3186,7 +3211,7 @@  _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
 						    (__v4di)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask8) __U);
 }
 
@@ -3205,7 +3230,7 @@  _mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
 						    (__v2di)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -3224,7 +3249,7 @@  _mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
 						    (__v8si)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask8) __U);
 }
 
@@ -3243,7 +3268,7 @@  _mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
 						    (__v4si)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -3262,7 +3287,7 @@  _mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
 						    (__v4di)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask8) __U);
 }
 
@@ -3281,7 +3306,7 @@  _mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
 						    (__v2di)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -3300,7 +3325,7 @@  _mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
 {
   return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
 						    (__v4di)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask8) __U);
 }
 
@@ -3319,7 +3344,7 @@  _mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
 {
   return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
 						    (__v2di)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -3329,7 +3354,7 @@  _mm256_rcp14_pd (__m256d __A)
 {
   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
 					      (__v4df)
-					      _mm256_setzero_pd (),
+					      _mm256_avx512_setzero_pd (),
 					      (__mmask8) -1);
 }
 
@@ -3348,7 +3373,7 @@  _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
 {
   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
 					      (__v4df)
-					      _mm256_setzero_pd (),
+					      _mm256_avx512_setzero_pd (),
 					      (__mmask8) __U);
 }
 
@@ -3358,7 +3383,7 @@  _mm_rcp14_pd (__m128d __A)
 {
   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
 					      (__v2df)
-					      _mm_setzero_pd (),
+					      _mm_avx512_setzero_pd (),
 					      (__mmask8) -1);
 }
 
@@ -3377,7 +3402,7 @@  _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
 {
   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
 					      (__v2df)
-					      _mm_setzero_pd (),
+					      _mm_avx512_setzero_pd (),
 					      (__mmask8) __U);
 }
 
@@ -3387,7 +3412,7 @@  _mm256_rcp14_ps (__m256 __A)
 {
   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
 					     (__v8sf)
-					     _mm256_setzero_ps (),
+					     _mm256_avx512_setzero_ps (),
 					     (__mmask8) -1);
 }
 
@@ -3406,7 +3431,7 @@  _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
 {
   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
 					     (__v8sf)
-					     _mm256_setzero_ps (),
+					     _mm256_avx512_setzero_ps (),
 					     (__mmask8) __U);
 }
 
@@ -3416,7 +3441,7 @@  _mm_rcp14_ps (__m128 __A)
 {
   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
 					     (__v4sf)
-					     _mm_setzero_ps (),
+					     _mm_avx512_setzero_ps (),
 					     (__mmask8) -1);
 }
 
@@ -3435,7 +3460,7 @@  _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
 {
   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
 					     (__v4sf)
-					     _mm_setzero_ps (),
+					     _mm_avx512_setzero_ps (),
 					     (__mmask8) __U);
 }
 
@@ -3445,7 +3470,7 @@  _mm256_rsqrt14_pd (__m256d __A)
 {
   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
 						     (__v4df)
-						     _mm256_setzero_pd (),
+						     _mm256_avx512_setzero_pd (),
 						     (__mmask8) -1);
 }
 
@@ -3464,7 +3489,7 @@  _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
 {
   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
 						     (__v4df)
-						     _mm256_setzero_pd (),
+						     _mm256_avx512_setzero_pd (),
 						     (__mmask8) __U);
 }
 
@@ -3474,7 +3499,7 @@  _mm_rsqrt14_pd (__m128d __A)
 {
   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
 						     (__v2df)
-						     _mm_setzero_pd (),
+						     _mm_avx512_setzero_pd (),
 						     (__mmask8) -1);
 }
 
@@ -3493,7 +3518,7 @@  _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
 {
   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
 						     (__v2df)
-						     _mm_setzero_pd (),
+						     _mm_avx512_setzero_pd (),
 						     (__mmask8) __U);
 }
 
@@ -3503,7 +3528,7 @@  _mm256_rsqrt14_ps (__m256 __A)
 {
   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
 						    (__v8sf)
-						    _mm256_setzero_ps (),
+						    _mm256_avx512_setzero_ps (),
 						    (__mmask8) -1);
 }
 
@@ -3522,7 +3547,7 @@  _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
 {
   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
 						    (__v8sf)
-						    _mm256_setzero_ps (),
+						    _mm256_avx512_setzero_ps (),
 						    (__mmask8) __U);
 }
 
@@ -3532,7 +3557,7 @@  _mm_rsqrt14_ps (__m128 __A)
 {
   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
 						    (__v4sf)
-						    _mm_setzero_ps (),
+						    _mm_avx512_setzero_ps (),
 						    (__mmask8) -1);
 }
 
@@ -3551,7 +3576,7 @@  _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
 {
   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
 						    (__v4sf)
-						    _mm_setzero_ps (),
+						    _mm_avx512_setzero_ps (),
 						    (__mmask8) __U);
 }
 
@@ -3570,7 +3595,7 @@  _mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
 {
   return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
 						  (__v4df)
-						  _mm256_setzero_pd (),
+						  _mm256_avx512_setzero_pd (),
 						  (__mmask8) __U);
 }
 
@@ -3589,7 +3614,7 @@  _mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
 {
   return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
 						  (__v2df)
-						  _mm_setzero_pd (),
+						  _mm_avx512_setzero_pd (),
 						  (__mmask8) __U);
 }
 
@@ -3608,7 +3633,7 @@  _mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
 {
   return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
 						 (__v8sf)
-						 _mm256_setzero_ps (),
+						 _mm256_avx512_setzero_ps (),
 						 (__mmask8) __U);
 }
 
@@ -3627,7 +3652,7 @@  _mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
 {
   return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
 						 (__v4sf)
-						 _mm_setzero_ps (),
+						 _mm_avx512_setzero_ps (),
 						 (__mmask8) __U);
 }
 
@@ -3649,7 +3674,7 @@  _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
 						 (__v8si) __B,
 						 (__v8si)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -3671,7 +3696,7 @@  _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
 						 (__v4di) __B,
 						 (__v4di)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -3693,7 +3718,7 @@  _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
 						 (__v8si) __B,
 						 (__v8si)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -3715,7 +3740,7 @@  _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
 						 (__v4di) __B,
 						 (__v4di)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -3737,7 +3762,7 @@  _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
 						 (__v4si) __B,
 						 (__v4si)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -3759,7 +3784,7 @@  _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
 						 (__v2di) __B,
 						 (__v2di)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -3781,7 +3806,7 @@  _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
 						 (__v4si) __B,
 						 (__v4si)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -3803,7 +3828,7 @@  _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
 						 (__v2di) __B,
 						 (__v2di)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -3813,7 +3838,7 @@  _mm256_getexp_ps (__m256 __A)
 {
   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
 						   (__v8sf)
-						   _mm256_setzero_ps (),
+						   _mm256_avx512_setzero_ps (),
 						   (__mmask8) -1);
 }
 
@@ -3832,7 +3857,7 @@  _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
 {
   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
 						   (__v8sf)
-						   _mm256_setzero_ps (),
+						   _mm256_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -3842,7 +3867,7 @@  _mm256_getexp_pd (__m256d __A)
 {
   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
 						    (__v4df)
-						    _mm256_setzero_pd (),
+						    _mm256_avx512_setzero_pd (),
 						    (__mmask8) -1);
 }
 
@@ -3861,7 +3886,7 @@  _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
 {
   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
 						    (__v4df)
-						    _mm256_setzero_pd (),
+						    _mm256_avx512_setzero_pd (),
 						    (__mmask8) __U);
 }
 
@@ -3871,7 +3896,7 @@  _mm_getexp_ps (__m128 __A)
 {
   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) -1);
 }
 
@@ -3890,7 +3915,7 @@  _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
 {
   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -3900,7 +3925,7 @@  _mm_getexp_pd (__m128d __A)
 {
   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
 						    (__v2df)
-						    _mm_setzero_pd (),
+						    _mm_avx512_setzero_pd (),
 						    (__mmask8) -1);
 }
 
@@ -3919,7 +3944,7 @@  _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
 {
   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
 						    (__v2df)
-						    _mm_setzero_pd (),
+						    _mm_avx512_setzero_pd (),
 						    (__mmask8) __U);
 }
 
@@ -3941,7 +3966,7 @@  _mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
   return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
 						 (__v4si) __B,
 						 (__v8si)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -3963,7 +3988,7 @@  _mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
 						 (__v4si) __B,
 						 (__v4si)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -3985,7 +4010,7 @@  _mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
   return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
 						 (__v2di) __B,
 						 (__v4di)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -4007,7 +4032,7 @@  _mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
 						 (__v2di) __B,
 						 (__v2di)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -4029,7 +4054,7 @@  _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
 						 (__v8si) __B,
 						 (__v8si)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -4040,7 +4065,7 @@  _mm256_scalef_pd (__m256d __A, __m256d __B)
   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
 						    (__v4df) __B,
 						    (__v4df)
-						    _mm256_setzero_pd (),
+						    _mm256_avx512_setzero_pd (),
 						    (__mmask8) -1);
 }
 
@@ -4062,7 +4087,7 @@  _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
 						    (__v4df) __B,
 						    (__v4df)
-						    _mm256_setzero_pd (),
+						    _mm256_avx512_setzero_pd (),
 						    (__mmask8) __U);
 }
 
@@ -4073,7 +4098,7 @@  _mm256_scalef_ps (__m256 __A, __m256 __B)
   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
 						   (__v8sf) __B,
 						   (__v8sf)
-						   _mm256_setzero_ps (),
+						   _mm256_avx512_setzero_ps (),
 						   (__mmask8) -1);
 }
 
@@ -4095,7 +4120,7 @@  _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
 						   (__v8sf) __B,
 						   (__v8sf)
-						   _mm256_setzero_ps (),
+						   _mm256_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -4106,7 +4131,7 @@  _mm_scalef_pd (__m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
 						    (__v2df) __B,
 						    (__v2df)
-						    _mm_setzero_pd (),
+						    _mm_avx512_setzero_pd (),
 						    (__mmask8) -1);
 }
 
@@ -4128,7 +4153,7 @@  _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
 						    (__v2df) __B,
 						    (__v2df)
-						    _mm_setzero_pd (),
+						    _mm_avx512_setzero_pd (),
 						    (__mmask8) __U);
 }
 
@@ -4139,7 +4164,7 @@  _mm_scalef_ps (__m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
 						   (__v4sf) __B,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) -1);
 }
 
@@ -4160,7 +4185,7 @@  _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
 						   (__v4sf) __B,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -4964,7 +4989,7 @@  _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
 						 (__v4si) __B,
 						 (__v4si)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -4986,7 +5011,7 @@  _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
 						  (__v8si) __B,
 						  (__v8si)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __U);
 }
 
@@ -5008,7 +5033,7 @@  _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
 						  (__v4si) __B,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -5030,7 +5055,7 @@  _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
 						(__v8si) __B,
 						(__v8si)
-						_mm256_setzero_si256 (),
+						_mm256_avx512_setzero_si256 (),
 						(__mmask8) __U);
 }
 
@@ -5057,7 +5082,7 @@  _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
 						(__v4si) __B,
 						(__v4si)
-						_mm_setzero_si128 (),
+						_mm_avx512_setzero_si128 (),
 						(__mmask8) __U);
 }
 
@@ -5085,7 +5110,7 @@  _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
 						 (__v8si) __B,
 						 (__v8si)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -5113,7 +5138,7 @@  _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
 						 (__v4si) __B,
 						 (__v4si)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -5138,7 +5163,7 @@  _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
 {
   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
 						(__v4sf)
-						_mm_setzero_ps (),
+						_mm_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -5157,7 +5182,7 @@  _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
 {
   return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -5176,7 +5201,7 @@  _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
 {
   return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
 						    (__v8si)
-						    _mm256_setzero_si256 (),
+						    _mm256_avx512_setzero_si256 (),
 						    (__mmask8) __U);
 }
 
@@ -5195,7 +5220,7 @@  _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
 						    (__v4si)
-						    _mm_setzero_si128 (),
+						    _mm_avx512_setzero_si128 (),
 						    (__mmask8) __U);
 }
 
@@ -5205,7 +5230,7 @@  _mm256_cvtps_epu32 (__m256 __A)
 {
   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
 						     (__v8si)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) -1);
 }
 
@@ -5224,7 +5249,7 @@  _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
 {
   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
 						     (__v8si)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) __U);
 }
 
@@ -5234,7 +5259,7 @@  _mm_cvtps_epu32 (__m128 __A)
 {
   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
 						     (__v4si)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) -1);
 }
 
@@ -5253,7 +5278,7 @@  _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
 						     (__v4si)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -5272,7 +5297,7 @@  _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
 {
   return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
 						   (__v4df)
-						   _mm256_setzero_pd (),
+						   _mm256_avx512_setzero_pd (),
 						   (__mmask8) __U);
 }
 
@@ -5291,7 +5316,7 @@  _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
 {
   return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
 						   (__v2df)
-						   _mm_setzero_pd (),
+						   _mm_avx512_setzero_pd (),
 						   (__mmask8) __U);
 }
 
@@ -5310,7 +5335,7 @@  _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
 {
   return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
 						   (__v8sf)
-						   _mm256_setzero_ps (),
+						   _mm256_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -5329,7 +5354,7 @@  _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
 {
   return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -5348,7 +5373,7 @@  _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
 {
   return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
 						   (__v8sf)
-						   _mm256_setzero_ps (),
+						   _mm256_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -5367,7 +5392,7 @@  _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
 {
   return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -5389,7 +5414,7 @@  _mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
 						     (__v4si) __B,
 						     (__v4si)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -5411,7 +5436,7 @@  _mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
 						     (__v8si) __B,
 						     (__v8si)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) __U);
 }
 
@@ -5433,7 +5458,7 @@  _mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
 						      (__v2di) __B,
 						      (__v2di)
-						      _mm_setzero_si128 (),
+						      _mm_avx512_setzero_si128 (),
 						      (__mmask8) __U);
 }
 
@@ -5455,7 +5480,7 @@  _mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
 						      (__v4di) __B,
 						      (__v4di)
-						      _mm256_setzero_si256 (),
+						      _mm256_avx512_setzero_si256 (),
 						      (__mmask8) __U);
 }
 
@@ -5477,7 +5502,7 @@  _mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
 						     (__v4si) __B,
 						     (__v4si)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -5499,7 +5524,7 @@  _mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
 						     (__v8si) __B,
 						     (__v8si)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) __U);
 }
 
@@ -5521,7 +5546,7 @@  _mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
 						      (__v2di) __B,
 						      (__v2di)
-						      _mm_setzero_si128 (),
+						      _mm_avx512_setzero_si128 (),
 						      (__mmask8) __U);
 }
 
@@ -5543,7 +5568,7 @@  _mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
 						      (__v4di) __B,
 						      (__v4di)
-						      _mm256_setzero_si256 (),
+						      _mm256_avx512_setzero_si256 (),
 						      (__mmask8) __U);
 }
 
@@ -5970,7 +5995,7 @@  _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
 {
   return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
 						      (__v4df)
-						      _mm256_setzero_pd (),
+						      _mm256_avx512_setzero_pd (),
 						      (__mmask8) __U);
 }
 
@@ -5998,7 +6023,7 @@  _mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
 {
   return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
 						      (__v2df)
-						      _mm_setzero_pd (),
+						      _mm_avx512_setzero_pd (),
 						      (__mmask8) __U);
 }
 
@@ -6026,7 +6051,7 @@  _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
 {
   return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
 						     (__v8sf)
-						     _mm256_setzero_ps (),
+						     _mm256_avx512_setzero_ps (),
 						     (__mmask8) __U);
 }
 
@@ -6054,7 +6079,7 @@  _mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
 {
   return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
 						     (__v4sf)
-						     _mm_setzero_ps (),
+						     _mm_avx512_setzero_ps (),
 						     (__mmask8) __U);
 }
 
@@ -6082,7 +6107,7 @@  _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
 						      (__v4di)
-						      _mm256_setzero_si256 (),
+						      _mm256_avx512_setzero_si256 (),
 						      (__mmask8) __U);
 }
 
@@ -6110,7 +6135,7 @@  _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
 						      (__v2di)
-						      _mm_setzero_si128 (),
+						      _mm_avx512_setzero_si128 (),
 						      (__mmask8) __U);
 }
 
@@ -6138,7 +6163,7 @@  _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
 						      (__v8si)
-						      _mm256_setzero_si256 (),
+						      _mm256_avx512_setzero_si256 (),
 						      (__mmask8) __U);
 }
 
@@ -6166,7 +6191,7 @@  _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
 						      (__v4si)
-						      _mm_setzero_si128 (),
+						      _mm_avx512_setzero_si128 (),
 						      (__mmask8) __U);
 }
 
@@ -6194,7 +6219,7 @@  _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
 {
   return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
 						     (__v4df)
-						     _mm256_setzero_pd (),
+						     _mm256_avx512_setzero_pd (),
 						     (__mmask8) __U);
 }
 
@@ -6214,7 +6239,7 @@  _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
 {
   return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
 							 (__v4df)
-							 _mm256_setzero_pd (),
+							 _mm256_avx512_setzero_pd (),
 							 (__mmask8)
 							 __U);
 }
@@ -6234,7 +6259,7 @@  _mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
 {
   return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
 						     (__v2df)
-						     _mm_setzero_pd (),
+						     _mm_avx512_setzero_pd (),
 						     (__mmask8) __U);
 }
 
@@ -6254,7 +6279,7 @@  _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
 {
   return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
 							 (__v2df)
-							 _mm_setzero_pd (),
+							 _mm_avx512_setzero_pd (),
 							 (__mmask8)
 							 __U);
 }
@@ -6274,7 +6299,7 @@  _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
 {
   return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
 						    (__v8sf)
-						    _mm256_setzero_ps (),
+						    _mm256_avx512_setzero_ps (),
 						    (__mmask8) __U);
 }
 
@@ -6293,7 +6318,7 @@  _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
 {
   return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
 							(__v8sf)
-							_mm256_setzero_ps (),
+							_mm256_avx512_setzero_ps (),
 							(__mmask8)
 							__U);
 }
@@ -6313,7 +6338,7 @@  _mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
 {
   return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
 						    (__v4sf)
-						    _mm_setzero_ps (),
+						    _mm_avx512_setzero_ps (),
 						    (__mmask8) __U);
 }
 
@@ -6332,7 +6357,7 @@  _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
 {
   return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
 							(__v4sf)
-							_mm_setzero_ps (),
+							_mm_avx512_setzero_ps (),
 							(__mmask8)
 							__U);
 }
@@ -6352,7 +6377,7 @@  _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
 						     (__v4di)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) __U);
 }
 
@@ -6373,7 +6398,7 @@  _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
 							 (__v4di)
-							 _mm256_setzero_si256 (),
+							 _mm256_avx512_setzero_si256 (),
 							 (__mmask8)
 							 __U);
 }
@@ -6393,7 +6418,7 @@  _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
 						     (__v2di)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -6413,7 +6438,7 @@  _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
 							 (__v2di)
-							 _mm_setzero_si128 (),
+							 _mm_avx512_setzero_si128 (),
 							 (__mmask8)
 							 __U);
 }
@@ -6433,7 +6458,7 @@  _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
 						     (__v8si)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) __U);
 }
 
@@ -6454,7 +6479,7 @@  _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
 							 (__v8si)
-							 _mm256_setzero_si256 (),
+							 _mm256_avx512_setzero_si256 (),
 							 (__mmask8)
 							 __U);
 }
@@ -6474,7 +6499,7 @@  _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
 						     (__v4si)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -6494,7 +6519,7 @@  _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
 							 (__v4si)
-							 _mm_setzero_si128 (),
+							 _mm_avx512_setzero_si128 (),
 							 (__mmask8)
 							 __U);
 }
@@ -6894,7 +6919,7 @@  _mm_srav_epi64 (__m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
 						  (__v2di) __Y,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) -1);
 }
 
@@ -6916,7 +6941,7 @@  _mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
 						  (__v2di) __Y,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -6938,7 +6963,7 @@  _mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
   return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
 						 (__v8si) __Y,
 						 (__v8si)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -6960,7 +6985,7 @@  _mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
 						 (__v4si) __Y,
 						 (__v4si)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -6982,7 +7007,7 @@  _mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
   return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
 						 (__v4di) __Y,
 						 (__v4di)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -7004,7 +7029,7 @@  _mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
 						 (__v2di) __Y,
 						 (__v2di)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -7026,7 +7051,7 @@  _mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
   return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
 						 (__v8si) __Y,
 						 (__v8si)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -7048,7 +7073,7 @@  _mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
 						 (__v4si) __Y,
 						 (__v4si)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -7070,7 +7095,7 @@  _mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
   return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
 						 (__v8si) __Y,
 						 (__v8si)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -7092,7 +7117,7 @@  _mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
 						 (__v4si) __Y,
 						 (__v4si)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -7114,7 +7139,7 @@  _mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
   return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
 						 (__v4di) __Y,
 						 (__v4di)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -7136,7 +7161,7 @@  _mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
 						 (__v2di) __Y,
 						 (__v2di)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -7147,7 +7172,7 @@  _mm256_rolv_epi32 (__m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
 						  (__v8si) __B,
 						  (__v8si)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) -1);
 }
 
@@ -7169,7 +7194,7 @@  _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
 						  (__v8si) __B,
 						  (__v8si)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __U);
 }
 
@@ -7180,7 +7205,7 @@  _mm_rolv_epi32 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
 						  (__v4si) __B,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) -1);
 }
 
@@ -7202,7 +7227,7 @@  _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
 						  (__v4si) __B,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -7213,7 +7238,7 @@  _mm256_rorv_epi32 (__m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
 						  (__v8si) __B,
 						  (__v8si)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) -1);
 }
 
@@ -7235,7 +7260,7 @@  _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
 						  (__v8si) __B,
 						  (__v8si)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __U);
 }
 
@@ -7246,7 +7271,7 @@  _mm_rorv_epi32 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
 						  (__v4si) __B,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) -1);
 }
 
@@ -7268,7 +7293,7 @@  _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
 						  (__v4si) __B,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -7279,7 +7304,7 @@  _mm256_rolv_epi64 (__m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
 						  (__v4di) __B,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) -1);
 }
 
@@ -7301,7 +7326,7 @@  _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
 						  (__v4di) __B,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __U);
 }
 
@@ -7312,7 +7337,7 @@  _mm_rolv_epi64 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
 						  (__v2di) __B,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) -1);
 }
 
@@ -7334,7 +7359,7 @@  _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
 						  (__v2di) __B,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -7345,7 +7370,7 @@  _mm256_rorv_epi64 (__m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
 						  (__v4di) __B,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) -1);
 }
 
@@ -7367,7 +7392,7 @@  _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
 						  (__v4di) __B,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __U);
 }
 
@@ -7378,7 +7403,7 @@  _mm_rorv_epi64 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
 						  (__v2di) __B,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) -1);
 }
 
@@ -7400,7 +7425,7 @@  _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
 						  (__v2di) __B,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -7411,7 +7436,7 @@  _mm256_srav_epi64 (__m256i __X, __m256i __Y)
   return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
 						  (__v4di) __Y,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) -1);
 }
 
@@ -7433,7 +7458,7 @@  _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
   return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
 						  (__v4di) __Y,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __U);
 }
 
@@ -7454,7 +7479,7 @@  _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
 						 (__v4di) __B,
 						 (__v4di)
-						 _mm256_setzero_pd (),
+						 _mm256_avx512_setzero_pd (),
 						 __U);
 }
 
@@ -7475,7 +7500,7 @@  _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
 						 (__v2di) __B,
 						 (__v2di)
-						 _mm_setzero_pd (),
+						 _mm_avx512_setzero_pd (),
 						 __U);
 }
 
@@ -7496,7 +7521,7 @@  _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
 						  (__v4di) __B,
 						  (__v4di)
-						  _mm256_setzero_pd (),
+						  _mm256_avx512_setzero_pd (),
 						  __U);
 }
 
@@ -7517,7 +7542,7 @@  _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
 						  (__v2di) __B,
 						  (__v2di)
-						  _mm_setzero_pd (),
+						  _mm_avx512_setzero_pd (),
 						  __U);
 }
 
@@ -7539,7 +7564,7 @@  _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
 						(__v4di) __B,
 						(__v4di)
-						_mm256_setzero_si256 (),
+						_mm256_avx512_setzero_si256 (),
 						(__mmask8) __U);
 }
 
@@ -7566,7 +7591,7 @@  _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
 						(__v2di) __B,
 						(__v2di)
-						_mm_setzero_si128 (),
+						_mm_avx512_setzero_si128 (),
 						(__mmask8) __U);
 }
 
@@ -7594,7 +7619,7 @@  _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
 						 (__v4di) __B,
 						 (__v4di)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -7622,7 +7647,7 @@  _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
 						 (__v2di) __B,
 						 (__v2di)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -7650,7 +7675,7 @@  _mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
   return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
 						 (__v4df) __B,
 						 (__v4df)
-						 _mm256_setzero_pd (),
+						 _mm256_avx512_setzero_pd (),
 						 (__mmask8) __U);
 }
 
@@ -7671,7 +7696,7 @@  _mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
   return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
 						(__v8sf) __B,
 						(__v8sf)
-						_mm256_setzero_ps (),
+						_mm256_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -7692,7 +7717,7 @@  _mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
 					     (__v4sf) __B,
 					     (__v4sf)
-					     _mm_setzero_ps (),
+					     _mm_avx512_setzero_ps (),
 					     (__mmask8) __U);
 }
 
@@ -7713,7 +7738,7 @@  _mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
 					      (__v2df) __B,
 					      (__v2df)
-					      _mm_setzero_pd (),
+					      _mm_avx512_setzero_pd (),
 					      (__mmask8) __U);
 }
 
@@ -7746,7 +7771,7 @@  _mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
   return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
 						 (__v4df) __B,
 						 (__v4df)
-						 _mm256_setzero_pd (),
+						 _mm256_avx512_setzero_pd (),
 						 (__mmask8) __U);
 }
 
@@ -7767,7 +7792,7 @@  _mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
   return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
 						 (__v4df) __B,
 						 (__v4df)
-						 _mm256_setzero_pd (),
+						 _mm256_avx512_setzero_pd (),
 						 (__mmask8) __U);
 }
 
@@ -7788,7 +7813,7 @@  _mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
   return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
 						(__v8sf) __B,
 						(__v8sf)
-						_mm256_setzero_ps (),
+						_mm256_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -7799,7 +7824,7 @@  _mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
   return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
 						(__v8sf) __B,
 						(__v8sf)
-						_mm256_setzero_ps (),
+						_mm256_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -7830,7 +7855,7 @@  _mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
 					     (__v4sf) __B,
 					     (__v4sf)
-					     _mm_setzero_ps (),
+					     _mm_avx512_setzero_ps (),
 					     (__mmask8) __U);
 }
 
@@ -7841,7 +7866,7 @@  _mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
 					     (__v4sf) __B,
 					     (__v4sf)
-					     _mm_setzero_ps (),
+					     _mm_avx512_setzero_ps (),
 					     (__mmask8) __U);
 }
 
@@ -7862,7 +7887,7 @@  _mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
 					     (__v4sf) __B,
 					     (__v4sf)
-					     _mm_setzero_ps (),
+					     _mm_avx512_setzero_ps (),
 					     (__mmask8) __U);
 }
 
@@ -7883,7 +7908,7 @@  _mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
 					      (__v2df) __B,
 					      (__v2df)
-					      _mm_setzero_pd (),
+					      _mm_avx512_setzero_pd (),
 					      (__mmask8) __U);
 }
 
@@ -7904,7 +7929,7 @@  _mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
 					      (__v2df) __B,
 					      (__v2df)
-					      _mm_setzero_pd (),
+					      _mm_avx512_setzero_pd (),
 					      (__mmask8) __U);
 }
 
@@ -7925,7 +7950,7 @@  _mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
 					      (__v2df) __B,
 					      (__v2df)
-					      _mm_setzero_pd (),
+					      _mm_avx512_setzero_pd (),
 					      (__mmask8) __U);
 }
 
@@ -7946,7 +7971,7 @@  _mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
   return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
 						(__v8sf) __B,
 						(__v8sf)
-						_mm256_setzero_ps (),
+						_mm256_avx512_setzero_ps (),
 						(__mmask8) __U);
 }
 
@@ -7968,7 +7993,7 @@  _mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
   return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
 						 (__v4df) __B,
 						 (__v4df)
-						 _mm256_setzero_pd (),
+						 _mm256_avx512_setzero_pd (),
 						 (__mmask8) __U);
 }
 
@@ -7979,7 +8004,7 @@  _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
 						  (__v4di) __B,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  __M);
 }
 
@@ -8000,7 +8025,7 @@  _mm256_min_epi64 (__m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
 						  (__v4di) __B,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) -1);
 }
 
@@ -8021,7 +8046,7 @@  _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
 						  (__v4di) __B,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  __M);
 }
 
@@ -8032,7 +8057,7 @@  _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
 						  (__v4di) __B,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  __M);
 }
 
@@ -8043,7 +8068,7 @@  _mm256_max_epi64 (__m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
 						  (__v4di) __B,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) -1);
 }
 
@@ -8054,7 +8079,7 @@  _mm256_max_epu64 (__m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
 						  (__v4di) __B,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) -1);
 }
 
@@ -8075,7 +8100,7 @@  _mm256_min_epu64 (__m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
 						  (__v4di) __B,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) -1);
 }
 
@@ -8096,7 +8121,7 @@  _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
 						  (__v4di) __B,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  __M);
 }
 
@@ -8107,7 +8132,7 @@  _mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
 						  (__v8si) __B,
 						  (__v8si)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  __M);
 }
 
@@ -8128,7 +8153,7 @@  _mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
 						  (__v8si) __B,
 						  (__v8si)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  __M);
 }
 
@@ -8149,7 +8174,7 @@  _mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
 						  (__v8si) __B,
 						  (__v8si)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  __M);
 }
 
@@ -8170,7 +8195,7 @@  _mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
 						  (__v8si) __B,
 						  (__v8si)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  __M);
 }
 
@@ -8191,7 +8216,7 @@  _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
 						  (__v2di) __B,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -8212,7 +8237,7 @@  _mm_min_epi64 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
 						  (__v2di) __B,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) -1);
 }
 
@@ -8233,7 +8258,7 @@  _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
 						  (__v2di) __B,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -8244,7 +8269,7 @@  _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
 						  (__v2di) __B,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -8255,7 +8280,7 @@  _mm_max_epi64 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
 						  (__v2di) __B,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) -1);
 }
 
@@ -8266,7 +8291,7 @@  _mm_max_epu64 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
 						  (__v2di) __B,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) -1);
 }
 
@@ -8287,7 +8312,7 @@  _mm_min_epu64 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
 						  (__v2di) __B,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) -1);
 }
 
@@ -8308,7 +8333,7 @@  _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
 						  (__v2di) __B,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -8319,7 +8344,7 @@  _mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
 						  (__v4si) __B,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -8340,7 +8365,7 @@  _mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
 						  (__v4si) __B,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -8361,7 +8386,7 @@  _mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
 						  (__v4si) __B,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -8382,7 +8407,7 @@  _mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
 						  (__v4si) __B,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -8414,7 +8439,7 @@  _mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
   return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
 						    (__v4df) __B,
 						    (__v4df)
-						    _mm256_setzero_pd (),
+						    _mm256_avx512_setzero_pd (),
 						    (__mmask8) __U);
 }
 
@@ -8436,7 +8461,7 @@  _mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
 						    (__v2df) __B,
 						    (__v2df)
-						    _mm_setzero_pd (),
+						    _mm_avx512_setzero_pd (),
 						    (__mmask8) __U);
 }
 
@@ -8469,7 +8494,7 @@  _mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
   return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
 						    (__v4df) __B,
 						    (__v4df)
-						    _mm256_setzero_pd (),
+						    _mm256_avx512_setzero_pd (),
 						    (__mmask8) __U);
 }
 
@@ -8491,7 +8516,7 @@  _mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
   return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
 						    (__v2df) __B,
 						    (__v2df)
-						    _mm_setzero_pd (),
+						    _mm_avx512_setzero_pd (),
 						    (__mmask8) __U);
 }
 
@@ -8513,7 +8538,7 @@  _mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
   return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
 						   (__v8sf) __B,
 						   (__v8sf)
-						   _mm256_setzero_ps (),
+						   _mm256_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -8534,7 +8559,7 @@  _mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
 						   (__v4sf) __B,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -8553,7 +8578,7 @@  _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
 {
   return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
 						 (__v4sf)
-						 _mm_setzero_ps (),
+						 _mm_avx512_setzero_ps (),
 						 (__mmask8) __U);
 }
 
@@ -8564,7 +8589,7 @@  _mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
   return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
 						   (__v8sf) __B,
 						   (__v8sf)
-						   _mm256_setzero_ps (),
+						   _mm256_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -8583,7 +8608,7 @@  _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
 {
   return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
 						    (__v8sf)
-						    _mm256_setzero_ps (),
+						    _mm256_avx512_setzero_ps (),
 						    (__mmask8) __U);
 }
 
@@ -8604,7 +8629,7 @@  _mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
   return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
 						   (__v4sf) __B,
 						   (__v4sf)
-						   _mm_setzero_ps (),
+						   _mm_avx512_setzero_ps (),
 						   (__mmask8) __U);
 }
 
@@ -8626,7 +8651,7 @@  _mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
   return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
 						 (__v4si) __B,
 						 (__v8si)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -8648,7 +8673,7 @@  _mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
 						 (__v4si) __B,
 						 (__v4si)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -8659,7 +8684,7 @@  _mm256_sra_epi64 (__m256i __A, __m128i __B)
   return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
 						 (__v2di) __B,
 						 (__v4di)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) -1);
 }
 
@@ -8681,7 +8706,7 @@  _mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
   return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
 						 (__v2di) __B,
 						 (__v4di)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -8692,7 +8717,7 @@  _mm_sra_epi64 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
 						 (__v2di) __B,
 						 (__v2di)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) -1);
 }
 
@@ -8714,7 +8739,7 @@  _mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
 						 (__v2di) __B,
 						 (__v2di)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -8736,7 +8761,7 @@  _mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
 						 (__v4si) __B,
 						 (__v4si)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -8758,7 +8783,7 @@  _mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
 						 (__v2di) __B,
 						 (__v2di)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -8780,7 +8805,7 @@  _mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
   return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
 						 (__v4si) __B,
 						 (__v8si)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -8802,7 +8827,7 @@  _mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
   return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
 						 (__v2di) __B,
 						 (__v4di)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -8824,7 +8849,7 @@  _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
   return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
 						    (__v8si) __X,
 						    (__v8sf)
-						    _mm256_setzero_ps (),
+						    _mm256_avx512_setzero_ps (),
 						    (__mmask8) __U);
 }
 
@@ -8835,7 +8860,7 @@  _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
   return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
 						     (__v4di) __X,
 						     (__v4df)
-						     _mm256_setzero_pd (),
+						     _mm256_avx512_setzero_pd (),
 						     (__mmask8) -1);
 }
 
@@ -8857,7 +8882,7 @@  _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
   return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
 						     (__v4di) __X,
 						     (__v4df)
-						     _mm256_setzero_pd (),
+						     _mm256_avx512_setzero_pd (),
 						     (__mmask8) __U);
 }
 
@@ -8880,7 +8905,7 @@  _mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
   return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
 							(__v4di) __C,
 							(__v4df)
-							_mm256_setzero_pd (),
+							_mm256_avx512_setzero_pd (),
 							(__mmask8)
 							__U);
 }
@@ -8903,7 +8928,7 @@  _mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
   return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
 						       (__v8si) __C,
 						       (__v8sf)
-						       _mm256_setzero_ps (),
+						       _mm256_avx512_setzero_ps (),
 						       (__mmask8) __U);
 }
 
@@ -8925,7 +8950,7 @@  _mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
   return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
 						     (__v2di) __C,
 						     (__v2df)
-						     _mm_setzero_pd (),
+						     _mm_avx512_setzero_pd (),
 						     (__mmask8) __U);
 }
 
@@ -8947,7 +8972,7 @@  _mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
   return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
 						    (__v4si) __C,
 						    (__v4sf)
-						    _mm_setzero_ps (),
+						    _mm_avx512_setzero_ps (),
 						    (__mmask8) __U);
 }
 
@@ -8958,7 +8983,7 @@  _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
   return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
 						  (__v8si) __B,
 						  (__v8si)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  __M);
 }
 
@@ -8969,7 +8994,7 @@  _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
   return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
 						     (__v4di) __X,
 						     (__v4di)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     __M);
 }
 
@@ -8990,7 +9015,7 @@  _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
 						  (__v4si) __B,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -9021,7 +9046,7 @@  _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
   return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
 						  (__v8si) __Y,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  __M);
 }
 
@@ -9042,7 +9067,7 @@  _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
 						  (__v4si) __Y,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  __M);
 }
 
@@ -9053,7 +9078,7 @@  _mm256_permutexvar_epi64 (__m256i __X, __m256i __Y)
   return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
 						     (__v4di) __X,
 						     (__v4di)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) -1);
 }
 
@@ -9085,7 +9110,7 @@  _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
   return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
 						     (__v8si) __X,
 						     (__v8si)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     __M);
 }
 
@@ -9096,7 +9121,7 @@  _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
   return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
 						   (__v8si) __Y,
 						   (__v4di)
-						   _mm256_setzero_si256 (),
+						   _mm256_avx512_setzero_si256 (),
 						   __M);
 }
 
@@ -9117,7 +9142,7 @@  _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
 						   (__v4si) __Y,
 						   (__v2di)
-						   _mm_setzero_si128 (),
+						   _mm_avx512_setzero_si128 (),
 						   __M);
 }
 
@@ -9128,7 +9153,7 @@  _mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
   return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
 						     (__v8si) __X,
 						     (__v8si)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) -1);
 }
 
@@ -9727,7 +9752,7 @@  _mm256_permutex_epi64 (__m256i __X, const int __I)
   return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
 					      __I,
 					      (__v4di)
-					      _mm256_setzero_si256(),
+					      _mm256_avx512_setzero_si256(),
 					      (__mmask8) -1);
 }
 
@@ -9749,7 +9774,7 @@  _mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
   return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
 						  __I,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __M);
 }
 
@@ -9772,7 +9797,7 @@  _mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
   return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
 						  (__v4df) __B, __imm,
 						  (__v4df)
-						  _mm256_setzero_pd (),
+						  _mm256_avx512_setzero_pd (),
 						  (__mmask8) __U);
 }
 
@@ -9795,7 +9820,7 @@  _mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
   return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
 						  (__v2df) __B, __imm,
 						  (__v2df)
-						  _mm_setzero_pd (),
+						  _mm_avx512_setzero_pd (),
 						  (__mmask8) __U);
 }
 
@@ -9818,7 +9843,7 @@  _mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
   return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
 						 (__v8sf) __B, __imm,
 						 (__v8sf)
-						 _mm256_setzero_ps (),
+						 _mm256_avx512_setzero_ps (),
 						 (__mmask8) __U);
 }
 
@@ -9841,7 +9866,7 @@  _mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
   return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
 						 (__v4sf) __B, __imm,
 						 (__v4sf)
-						 _mm_setzero_ps (),
+						 _mm_avx512_setzero_ps (),
 						 (__mmask8) __U);
 }
 
@@ -9853,7 +9878,7 @@  _mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
 							(__v4si) __B,
 							__imm,
 							(__v8si)
-							_mm256_setzero_si256 (),
+							_mm256_avx512_setzero_si256 (),
 							(__mmask8) -1);
 }
 
@@ -9879,7 +9904,7 @@  _mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
 							(__v4si) __B,
 							__imm,
 							(__v8si)
-							_mm256_setzero_si256 (),
+							_mm256_avx512_setzero_si256 (),
 							(__mmask8)
 							__U);
 }
@@ -9892,7 +9917,7 @@  _mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
 						       (__v4sf) __B,
 						       __imm,
 						       (__v8sf)
-						       _mm256_setzero_ps (),
+						       _mm256_avx512_setzero_ps (),
 						       (__mmask8) -1);
 }
 
@@ -9917,7 +9942,7 @@  _mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
 						       (__v4sf) __B,
 						       __imm,
 						       (__v8sf)
-						       _mm256_setzero_ps (),
+						       _mm256_avx512_setzero_ps (),
 						       (__mmask8) __U);
 }
 
@@ -9928,7 +9953,7 @@  _mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
   return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
 							 __imm,
 							 (__v4si)
-							 _mm_setzero_si128 (),
+							 _mm_avx512_setzero_si128 (),
 							 (__mmask8) -1);
 }
 
@@ -9952,7 +9977,7 @@  _mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
   return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
 							 __imm,
 							 (__v4si)
-							 _mm_setzero_si128 (),
+							 _mm_avx512_setzero_si128 (),
 							 (__mmask8)
 							 __U);
 }
@@ -9964,7 +9989,7 @@  _mm256_extractf32x4_ps (__m256 __A, const int __imm)
   return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
 							__imm,
 							(__v4sf)
-							_mm_setzero_ps (),
+							_mm_avx512_setzero_ps (),
 							(__mmask8) -1);
 }
 
@@ -9988,7 +10013,7 @@  _mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
   return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
 							__imm,
 							(__v4sf)
-							_mm_setzero_ps (),
+							_mm_avx512_setzero_ps (),
 							(__mmask8)
 							__U);
 }
@@ -10001,7 +10026,7 @@  _mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
 						       (__v4di) __B,
 						       __imm,
 						       (__v4di)
-						       _mm256_setzero_si256 (),
+						       _mm256_avx512_setzero_si256 (),
 						       (__mmask8) -1);
 }
 
@@ -10026,7 +10051,7 @@  _mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
 						       (__v4di) __B,
 						       __imm,
 						       (__v4di)
-						       _mm256_setzero_si256 (),
+						       _mm256_avx512_setzero_si256 (),
 						       (__mmask8) __U);
 }
 
@@ -10038,7 +10063,7 @@  _mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
 						       (__v8si) __B,
 						       __imm,
 						       (__v8si)
-						       _mm256_setzero_si256 (),
+						       _mm256_avx512_setzero_si256 (),
 						       (__mmask8) -1);
 }
 
@@ -10063,7 +10088,7 @@  _mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
 						       (__v8si) __B,
 						       __imm,
 						       (__v8si)
-						       _mm256_setzero_si256 (),
+						       _mm256_avx512_setzero_si256 (),
 						       (__mmask8) __U);
 }
 
@@ -10075,7 +10100,7 @@  _mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
 						       (__v4df) __B,
 						       __imm,
 						       (__v4df)
-						       _mm256_setzero_pd (),
+						       _mm256_avx512_setzero_pd (),
 						       (__mmask8) -1);
 }
 
@@ -10100,7 +10125,7 @@  _mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
 						       (__v4df) __B,
 						       __imm,
 						       (__v4df)
-						       _mm256_setzero_pd (),
+						       _mm256_avx512_setzero_pd (),
 						       (__mmask8) __U);
 }
 
@@ -10112,7 +10137,7 @@  _mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
 						      (__v8sf) __B,
 						      __imm,
 						      (__v8sf)
-						      _mm256_setzero_ps (),
+						      _mm256_avx512_setzero_ps (),
 						      (__mmask8) -1);
 }
 
@@ -10137,7 +10162,7 @@  _mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
 						      (__v8sf) __B,
 						      __imm,
 						      (__v8sf)
-						      _mm256_setzero_ps (),
+						      _mm256_avx512_setzero_ps (),
 						      (__mmask8) __U);
 }
 
@@ -10300,7 +10325,7 @@  _mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const unsigned int __imm)
 {
   return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
 						  (__v8si)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __U);
 }
 
@@ -10320,7 +10345,7 @@  _mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const unsigned int __imm)
 {
   return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -10340,7 +10365,7 @@  _mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const unsigned int __imm)
 {
   return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __U);
 }
 
@@ -10360,7 +10385,7 @@  _mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const unsigned int __imm)
 {
   return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -10535,7 +10560,7 @@  _mm256_roundscale_ps (__m256 __A, const int __imm)
   return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
 						      __imm,
 						      (__v8sf)
-						      _mm256_setzero_ps (),
+						      _mm256_avx512_setzero_ps (),
 						      (__mmask8) -1);
 }
 
@@ -10557,7 +10582,7 @@  _mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
   return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
 						      __imm,
 						      (__v8sf)
-						      _mm256_setzero_ps (),
+						      _mm256_avx512_setzero_ps (),
 						      (__mmask8) __U);
 }
 
@@ -10568,7 +10593,7 @@  _mm256_roundscale_pd (__m256d __A, const int __imm)
   return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
 						       __imm,
 						       (__v4df)
-						       _mm256_setzero_pd (),
+						       _mm256_avx512_setzero_pd (),
 						       (__mmask8) -1);
 }
 
@@ -10590,7 +10615,7 @@  _mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
   return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
 						       __imm,
 						       (__v4df)
-						       _mm256_setzero_pd (),
+						       _mm256_avx512_setzero_pd (),
 						       (__mmask8) __U);
 }
 
@@ -10601,7 +10626,7 @@  _mm_roundscale_ps (__m128 __A, const int __imm)
   return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
 						      __imm,
 						      (__v4sf)
-						      _mm_setzero_ps (),
+						      _mm_avx512_setzero_ps (),
 						      (__mmask8) -1);
 }
 
@@ -10623,7 +10648,7 @@  _mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
   return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
 						      __imm,
 						      (__v4sf)
-						      _mm_setzero_ps (),
+						      _mm_avx512_setzero_ps (),
 						      (__mmask8) __U);
 }
 
@@ -10634,7 +10659,7 @@  _mm_roundscale_pd (__m128d __A, const int __imm)
   return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
 						       __imm,
 						       (__v2df)
-						       _mm_setzero_pd (),
+						       _mm_avx512_setzero_pd (),
 						       (__mmask8) -1);
 }
 
@@ -10656,7 +10681,7 @@  _mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
   return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
 						       __imm,
 						       (__v2df)
-						       _mm_setzero_pd (),
+						       _mm_avx512_setzero_pd (),
 						       (__mmask8) __U);
 }
 
@@ -10668,7 +10693,7 @@  _mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
   return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
 						    (__C << 2) | __B,
 						    (__v8sf)
-						    _mm256_setzero_ps (),
+						    _mm256_avx512_setzero_ps (),
 						    (__mmask8) -1);
 }
 
@@ -10693,7 +10718,7 @@  _mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
   return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
 						    (__C << 2) | __B,
 						    (__v8sf)
-						    _mm256_setzero_ps (),
+						    _mm256_avx512_setzero_ps (),
 						    (__mmask8) __U);
 }
 
@@ -10705,7 +10730,7 @@  _mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
   return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
 						    (__C << 2) | __B,
 						    (__v4sf)
-						    _mm_setzero_ps (),
+						    _mm_avx512_setzero_ps (),
 						    (__mmask8) -1);
 }
 
@@ -10730,7 +10755,7 @@  _mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
   return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
 						    (__C << 2) | __B,
 						    (__v4sf)
-						    _mm_setzero_ps (),
+						    _mm_avx512_setzero_ps (),
 						    (__mmask8) __U);
 }
 
@@ -10742,7 +10767,7 @@  _mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
   return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
 						     (__C << 2) | __B,
 						     (__v4df)
-						     _mm256_setzero_pd (),
+						     _mm256_avx512_setzero_pd (),
 						     (__mmask8) -1);
 }
 
@@ -10767,7 +10792,7 @@  _mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
   return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
 						     (__C << 2) | __B,
 						     (__v4df)
-						     _mm256_setzero_pd (),
+						     _mm256_avx512_setzero_pd (),
 						     (__mmask8) __U);
 }
 
@@ -10779,7 +10804,7 @@  _mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
   return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
 						     (__C << 2) | __B,
 						     (__v2df)
-						     _mm_setzero_pd (),
+						     _mm_avx512_setzero_pd (),
 						     (__mmask8) -1);
 }
 
@@ -10804,7 +10829,7 @@  _mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
   return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
 						     (__C << 2) | __B,
 						     (__v2df)
-						     _mm_setzero_pd (),
+						     _mm_avx512_setzero_pd (),
 						     (__mmask8) __U);
 }
 
@@ -11337,7 +11362,7 @@  _mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
 {
   return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
 						  (__v8si)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __U);
 }
 
@@ -11358,7 +11383,7 @@  _mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
 {
   return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -11368,7 +11393,7 @@  _mm256_rol_epi32 (__m256i __A, const int __B)
 {
   return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
 						 (__v8si)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) -1);
 }
 
@@ -11388,7 +11413,7 @@  _mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
 {
   return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
 						 (__v8si)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -11398,7 +11423,7 @@  _mm_rol_epi32 (__m128i __A, const int __B)
 {
   return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
 						 (__v4si)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) -1);
 }
 
@@ -11418,7 +11443,7 @@  _mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
 {
   return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
 						 (__v4si)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -11428,7 +11453,7 @@  _mm256_ror_epi32 (__m256i __A, const int __B)
 {
   return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
 						 (__v8si)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) -1);
 }
 
@@ -11448,7 +11473,7 @@  _mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
 {
   return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
 						 (__v8si)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -11458,7 +11483,7 @@  _mm_ror_epi32 (__m128i __A, const int __B)
 {
   return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
 						 (__v4si)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) -1);
 }
 
@@ -11478,7 +11503,7 @@  _mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
 {
   return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
 						 (__v4si)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -11488,7 +11513,7 @@  _mm256_rol_epi64 (__m256i __A, const int __B)
 {
   return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
 						 (__v4di)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) -1);
 }
 
@@ -11508,7 +11533,7 @@  _mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
 {
   return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
 						 (__v4di)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -11518,7 +11543,7 @@  _mm_rol_epi64 (__m128i __A, const int __B)
 {
   return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
 						 (__v2di)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) -1);
 }
 
@@ -11538,7 +11563,7 @@  _mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
 {
   return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
 						 (__v2di)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -11548,7 +11573,7 @@  _mm256_ror_epi64 (__m256i __A, const int __B)
 {
   return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
 						 (__v4di)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) -1);
 }
 
@@ -11568,7 +11593,7 @@  _mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
 {
   return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
 						 (__v4di)
-						 _mm256_setzero_si256 (),
+						 _mm256_avx512_setzero_si256 (),
 						 (__mmask8) __U);
 }
 
@@ -11578,7 +11603,7 @@  _mm_ror_epi64 (__m128i __A, const int __B)
 {
   return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
 						 (__v2di)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) -1);
 }
 
@@ -11598,7 +11623,7 @@  _mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
 {
   return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
 						 (__v2di)
-						 _mm_setzero_si128 (),
+						 _mm_avx512_setzero_si128 (),
 						 (__mmask8) __U);
 }
 
@@ -11609,7 +11634,7 @@  _mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
   return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
 						  (__v4si) __B, __imm,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) -1);
 }
 
@@ -11632,7 +11657,7 @@  _mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
   return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
 						  (__v4si) __B, __imm,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -11643,7 +11668,7 @@  _mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
   return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
 						  (__v2di) __B, __imm,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) -1);
 }
 
@@ -11666,7 +11691,7 @@  _mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
   return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
 						  (__v2di) __B, __imm,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -11677,7 +11702,7 @@  _mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
   return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
 						  (__v8si) __B, __imm,
 						  (__v8si)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) -1);
 }
 
@@ -11700,7 +11725,7 @@  _mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
   return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
 						  (__v8si) __B, __imm,
 						  (__v8si)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __U);
 }
 
@@ -11711,7 +11736,7 @@  _mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
   return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
 						  (__v4di) __B, __imm,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) -1);
 }
 
@@ -11734,7 +11759,7 @@  _mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
   return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
 						  (__v4di) __B, __imm,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __U);
 }
 
@@ -11754,7 +11779,7 @@  _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
 {
   return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
 						  (__v8hi)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -11774,7 +11799,7 @@  _mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
 {
   return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
 						     (__v8hi)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -11794,7 +11819,7 @@  _mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const unsigned int __imm)
 {
   return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
 						  (__v8si)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __U);
 }
 
@@ -11814,7 +11839,7 @@  _mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const unsigned int __imm)
 {
   return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -11824,7 +11849,7 @@  _mm256_srai_epi64 (__m256i __A, const unsigned int __imm)
 {
   return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) -1);
 }
 
@@ -11844,7 +11869,7 @@  _mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const unsigned int __imm)
 {
   return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __U);
 }
 
@@ -11854,7 +11879,7 @@  _mm_srai_epi64 (__m128i __A, const unsigned int __imm)
 {
   return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) -1);
 }
 
@@ -11874,7 +11899,7 @@  _mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const unsigned int __imm)
 {
   return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -11893,7 +11918,7 @@  _mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, unsigned int __B)
 {
   return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
 						  (__v4si)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -11912,7 +11937,7 @@  _mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, unsigned int __B)
 {
   return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
 						  (__v2di)
-						  _mm_setzero_si128 (),
+						  _mm_avx512_setzero_si128 (),
 						  (__mmask8) __U);
 }
 
@@ -11932,7 +11957,7 @@  _mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, unsigned int __B)
 {
   return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
 						  (__v8si)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __U);
 }
 
@@ -11952,7 +11977,7 @@  _mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, unsigned int __B)
 {
   return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
 						  (__v4di)
-						  _mm256_setzero_si256 (),
+						  _mm256_avx512_setzero_si256 (),
 						  (__mmask8) __U);
 }
 
@@ -11972,7 +11997,7 @@  _mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
 {
   return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
 						  (__v4df)
-						  _mm256_setzero_pd (),
+						  _mm256_avx512_setzero_pd (),
 						  (__mmask8) __U);
 }
 
@@ -11992,7 +12017,7 @@  _mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
 {
   return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
 						     (__v4df)
-						     _mm256_setzero_pd (),
+						     _mm256_avx512_setzero_pd (),
 						     (__mmask8) __U);
 }
 
@@ -12012,7 +12037,7 @@  _mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
 {
   return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
 						  (__v2df)
-						  _mm_setzero_pd (),
+						  _mm_avx512_setzero_pd (),
 						  (__mmask8) __U);
 }
 
@@ -12032,7 +12057,7 @@  _mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
 {
   return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
 						    (__v8sf)
-						    _mm256_setzero_ps (),
+						    _mm256_avx512_setzero_ps (),
 						    (__mmask8) __U);
 }
 
@@ -12052,7 +12077,7 @@  _mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
 {
   return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
 						 (__v4sf)
-						 _mm_setzero_ps (),
+						 _mm_avx512_setzero_ps (),
 						 (__mmask8) __U);
 }
 
@@ -12305,14 +12330,14 @@  _mm256_permutex_pd (__m256d __X, const int __M)
   ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
 					    (int)(I),		\
 					    (__v4di)(__m256i)	\
-					    (_mm256_setzero_si256 ()),\
+					    (_mm256_avx512_setzero_si256 ()),\
 					    (__mmask8) -1))
 
 #define _mm256_maskz_permutex_epi64(M, X, I)                    \
   ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X),    \
 					    (int)(I),                \
 					    (__v4di)(__m256i)        \
-					    (_mm256_setzero_si256 ()),\
+					    (_mm256_avx512_setzero_si256 ()),\
 					    (__mmask8)(M)))
 
 #define _mm256_mask_permutex_epi64(W, M, X, I)               \
@@ -12324,7 +12349,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm256_insertf32x4(X, Y, C)                                     \
   ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),  \
     (__v4sf)(__m128) (Y), (int) (C),					\
-    (__v8sf)(__m256)_mm256_setzero_ps (),				\
+    (__v8sf)(__m256)_mm256_avx512_setzero_ps (),				\
     (__mmask8)-1))
 
 #define _mm256_mask_insertf32x4(W, U, X, Y, C)                          \
@@ -12336,13 +12361,13 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm256_maskz_insertf32x4(U, X, Y, C)                            \
   ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),	\
     (__v4sf)(__m128) (Y), (int) (C),					\
-    (__v8sf)(__m256)_mm256_setzero_ps (),				\
+    (__v8sf)(__m256)_mm256_avx512_setzero_ps (),				\
     (__mmask8)(U)))
 
 #define _mm256_inserti32x4(X, Y, C)                                     \
   ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
     (__v4si)(__m128i) (Y), (int) (C),					\
-    (__v8si)(__m256i)_mm256_setzero_si256 (),				\
+    (__v8si)(__m256i)_mm256_avx512_setzero_si256 (),				\
     (__mmask8)-1))
 
 #define _mm256_mask_inserti32x4(W, U, X, Y, C)                          \
@@ -12354,13 +12379,13 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm256_maskz_inserti32x4(U, X, Y, C)                            \
   ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
     (__v4si)(__m128i) (Y), (int) (C),					\
-    (__v8si)(__m256i)_mm256_setzero_si256 (),				\
+    (__v8si)(__m256i)_mm256_avx512_setzero_si256 (),				\
     (__mmask8)(U)))
 
 #define _mm256_extractf32x4_ps(X, C)                                    \
   ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
     (int) (C),								\
-    (__v4sf)(__m128)_mm_setzero_ps (),					\
+    (__v4sf)(__m128)_mm_avx512_setzero_ps (),					\
     (__mmask8)-1))
 
 #define _mm256_mask_extractf32x4_ps(W, U, X, C)                         \
@@ -12372,12 +12397,12 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm256_maskz_extractf32x4_ps(U, X, C)                           \
   ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
     (int) (C),								\
-    (__v4sf)(__m128)_mm_setzero_ps (),					\
+    (__v4sf)(__m128)_mm_avx512_setzero_ps (),					\
     (__mmask8)(U)))
 
 #define _mm256_extracti32x4_epi32(X, C)                                 \
   ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
-    (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
+    (int) (C), (__v4si)(__m128i)_mm_avx512_setzero_si128 (), (__mmask8)-1))
 
 #define _mm256_mask_extracti32x4_epi32(W, U, X, C)                      \
   ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
@@ -12385,12 +12410,12 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_maskz_extracti32x4_epi32(U, X, C)                        \
   ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
-    (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
+    (int) (C), (__v4si)(__m128i)_mm_avx512_setzero_si128 (), (__mmask8)(U)))
 
 #define _mm256_shuffle_i64x2(X, Y, C)                                                   \
   ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
                                                   (__v4di)(__m256i)(Y), (int)(C),       \
-                                                  (__v4di)(__m256i)_mm256_setzero_si256 (), \
+                                                  (__v4di)(__m256i)_mm256_avx512_setzero_si256 (), \
                                                   (__mmask8)-1))
 
 #define _mm256_mask_shuffle_i64x2(W, U, X, Y, C)                                        \
@@ -12402,14 +12427,14 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm256_maskz_shuffle_i64x2(U, X, Y, C)                                          \
   ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
                                                   (__v4di)(__m256i)(Y), (int)(C),       \
-                                                  (__v4di)(__m256i)_mm256_setzero_si256 (), \
+                                                  (__v4di)(__m256i)_mm256_avx512_setzero_si256 (), \
                                                   (__mmask8)(U)))
 
 #define _mm256_shuffle_i32x4(X, Y, C)                                                   \
   ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
                                                   (__v8si)(__m256i)(Y), (int)(C),       \
 						  (__v8si)(__m256i)			\
-						  _mm256_setzero_si256 (),		\
+						  _mm256_avx512_setzero_si256 (),		\
                                                   (__mmask8)-1))
 
 #define _mm256_mask_shuffle_i32x4(W, U, X, Y, C)                                        \
@@ -12422,13 +12447,13 @@  _mm256_permutex_pd (__m256d __X, const int __M)
   ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
                                                   (__v8si)(__m256i)(Y), (int)(C),       \
 						  (__v8si)(__m256i)			\
-						  _mm256_setzero_si256 (),		\
+						  _mm256_avx512_setzero_si256 (),		\
                                                   (__mmask8)(U)))
 
 #define _mm256_shuffle_f64x2(X, Y, C)                                                   \
   ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
                                                   (__v4df)(__m256d)(Y), (int)(C),       \
-						  (__v4df)(__m256d)_mm256_setzero_pd (),\
+						  (__v4df)(__m256d)_mm256_avx512_setzero_pd (),\
                                                   (__mmask8)-1))
 
 #define _mm256_mask_shuffle_f64x2(W, U, X, Y, C)                                        \
@@ -12440,13 +12465,13 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm256_maskz_shuffle_f64x2(U, X, Y, C)                                          \
   ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
                                                   (__v4df)(__m256d)(Y), (int)(C),       \
-						  (__v4df)(__m256d)_mm256_setzero_pd( ),\
+						  (__v4df)(__m256d)_mm256_avx512_setzero_pd( ),\
                                                   (__mmask8)(U)))
 
 #define _mm256_shuffle_f32x4(X, Y, C)                                                   \
   ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
                                                  (__v8sf)(__m256)(Y), (int)(C),         \
-						 (__v8sf)(__m256)_mm256_setzero_ps (),  \
+						 (__v8sf)(__m256)_mm256_avx512_setzero_ps (),  \
                                                  (__mmask8)-1))
 
 #define _mm256_mask_shuffle_f32x4(W, U, X, Y, C)                                        \
@@ -12458,7 +12483,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm256_maskz_shuffle_f32x4(U, X, Y, C)                                          \
   ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
                                                  (__v8sf)(__m256)(Y), (int)(C),         \
-						 (__v8sf)(__m256)_mm256_setzero_ps (),  \
+						 (__v8sf)(__m256)_mm256_avx512_setzero_ps (),  \
                                                  (__mmask8)(U)))
 
 #define _mm256_mask_shuffle_pd(W, U, A, B, C)                                   \
@@ -12471,7 +12496,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
   ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A),                \
                                            (__v4df)(__m256d)(B), (int)(C),      \
 					   (__v4df)(__m256d)			\
-					   _mm256_setzero_pd (),		\
+					   _mm256_avx512_setzero_pd (),		\
                                            (__mmask8)(U)))
 
 #define _mm_mask_shuffle_pd(W, U, A, B, C)                                      \
@@ -12483,7 +12508,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm_maskz_shuffle_pd(U, A, B, C)                                        \
   ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A),                \
                                            (__v2df)(__m128d)(B), (int)(C),      \
-					   (__v2df)(__m128d)_mm_setzero_pd (),  \
+					   (__v2df)(__m128d)_mm_avx512_setzero_pd (),  \
                                            (__mmask8)(U)))
 
 #define _mm256_mask_shuffle_ps(W, U, A, B, C)                                   \
@@ -12495,7 +12520,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm256_maskz_shuffle_ps(U, A, B, C)                                     \
   ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A),                 \
                                            (__v8sf)(__m256)(B), (int)(C),       \
-					   (__v8sf)(__m256)_mm256_setzero_ps (),\
+					   (__v8sf)(__m256)_mm256_avx512_setzero_ps (),\
                                            (__mmask8)(U)))
 
 #define _mm_mask_shuffle_ps(W, U, A, B, C)                                      \
@@ -12507,7 +12532,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm_maskz_shuffle_ps(U, A, B, C)                                        \
   ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A),                 \
                                            (__v4sf)(__m128)(B), (int)(C),       \
-					   (__v4sf)(__m128)_mm_setzero_ps (),   \
+					   (__v4sf)(__m128)_mm_avx512_setzero_ps (),   \
                                            (__mmask8)(U)))
 
 #define _mm256_fixupimm_pd(X, Y, Z, C)                                          \
@@ -12590,7 +12615,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_maskz_srli_epi32(U, A, B)				\
   ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A),	\
-    (unsigned int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
+    (unsigned int)(B), (__v8si)_mm256_avx512_setzero_si256 (), (__mmask8)(U)))
 
 #define _mm_mask_srli_epi32(W, U, A, B)                                 \
   ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A),       \
@@ -12598,7 +12623,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_maskz_srli_epi32(U, A, B)                                   \
   ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A),       \
-    (unsigned int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
+    (unsigned int)(B), (__v4si)_mm_avx512_setzero_si128 (), (__mmask8)(U)))
 
 #define _mm256_mask_srli_epi64(W, U, A, B)				\
   ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A),	\
@@ -12606,7 +12631,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_maskz_srli_epi64(U, A, B)				\
   ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A),	\
-    (unsigned int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
+    (unsigned int)(B), (__v4di)_mm256_avx512_setzero_si256 (), (__mmask8)(U)))
 
 #define _mm_mask_srli_epi64(W, U, A, B)                                 \
   ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A),       \
@@ -12614,7 +12639,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_maskz_srli_epi64(U, A, B)                                   \
   ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A),       \
-    (unsigned int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
+    (unsigned int)(B), (__v2di)_mm_avx512_setzero_si128 (), (__mmask8)(U)))
 
 #define _mm256_mask_slli_epi32(W, U, X, C)				\
   ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X),	\
@@ -12625,7 +12650,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm256_maskz_slli_epi32(U, X, C)				\
   ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X),	\
     (unsigned int)(C),							\
-    (__v8si)(__m256i)_mm256_setzero_si256 (),				\
+    (__v8si)(__m256i)_mm256_avx512_setzero_si256 (),				\
     (__mmask8)(U)))
 
 #define _mm256_mask_slli_epi64(W, U, X, C)				\
@@ -12637,7 +12662,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm256_maskz_slli_epi64(U, X, C)				\
   ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X),	\
     (unsigned int)(C),							\
-    (__v4di)(__m256i)_mm256_setzero_si256 (),				\
+    (__v4di)(__m256i)_mm256_avx512_setzero_si256 (),				\
     (__mmask8)(U)))
 
 #define _mm_mask_slli_epi32(W, U, X, C)					\
@@ -12649,7 +12674,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm_maskz_slli_epi32(U, X, C)					\
   ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X),	\
     (unsigned int)(C),							\
-    (__v4si)(__m128i)_mm_setzero_si128 (),				\
+    (__v4si)(__m128i)_mm_avx512_setzero_si128 (),				\
     (__mmask8)(U)))
 
 #define _mm_mask_slli_epi64(W, U, X, C)					\
@@ -12661,7 +12686,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm_maskz_slli_epi64(U, X, C)					\
   ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X),	\
     (unsigned int)(C),							\
-    (__v2di)(__m128i)_mm_setzero_si128 (),				\
+    (__v2di)(__m128i)_mm_avx512_setzero_si128 (),				\
     (__mmask8)(U)))
 
 #define _mm256_ternarylogic_epi64(A, B, C, I)			\
@@ -12762,7 +12787,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_roundscale_ps(A, B)				        \
   ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
-    (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1))
+    (int)(B), (__v8sf)(__m256)_mm256_avx512_setzero_ps (), (__mmask8)-1))
 
 #define _mm256_mask_roundscale_ps(W, U, A, B)			        \
   ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
@@ -12770,11 +12795,11 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_maskz_roundscale_ps(U, A, B)			        \
   ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
-    (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U)))
+    (int)(B), (__v8sf)(__m256)_mm256_avx512_setzero_ps (), (__mmask8)(U)))
 
 #define _mm256_roundscale_pd(A, B)				        \
   ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
-    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)-1))
+    (int)(B), (__v4df)(__m256d)_mm256_avx512_setzero_pd (), (__mmask8)-1))
 
 #define _mm256_mask_roundscale_pd(W, U, A, B)			        \
   ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
@@ -12782,11 +12807,11 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_maskz_roundscale_pd(U, A, B)			        \
   ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
-    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
+    (int)(B), (__v4df)(__m256d)_mm256_avx512_setzero_pd (), (__mmask8)(U)))
 
 #define _mm_roundscale_ps(A, B)					        \
   ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
-    (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)-1))
+    (int)(B), (__v4sf)(__m128)_mm_avx512_setzero_ps (), (__mmask8)-1))
 
 #define _mm_mask_roundscale_ps(W, U, A, B)			        \
   ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
@@ -12794,11 +12819,11 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_maskz_roundscale_ps(U, A, B)			        \
   ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
-    (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)(U)))
+    (int)(B), (__v4sf)(__m128)_mm_avx512_setzero_ps (), (__mmask8)(U)))
 
 #define _mm_roundscale_pd(A, B)					        \
   ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
-    (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)-1))
+    (int)(B), (__v2df)(__m128d)_mm_avx512_setzero_pd (), (__mmask8)-1))
 
 #define _mm_mask_roundscale_pd(W, U, A, B)			        \
   ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
@@ -12806,12 +12831,12 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_maskz_roundscale_pd(U, A, B)			        \
   ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
-    (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)(U)))
+    (int)(B), (__v2df)(__m128d)_mm_avx512_setzero_pd (), (__mmask8)(U)))
 
 #define _mm256_getmant_ps(X, B, C)                                              \
   ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
                                          (int)(((C)<<2) | (B)),                 \
-					 (__v8sf)(__m256)_mm256_setzero_ps (),  \
+					 (__v8sf)(__m256)_mm256_avx512_setzero_ps (),  \
                                          (__mmask8)-1))
 
 #define _mm256_mask_getmant_ps(W, U, X, B, C)                                   \
@@ -12823,13 +12848,13 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm256_maskz_getmant_ps(U, X, B, C)                                     \
   ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
                                          (int)(((C)<<2) | (B)),                 \
-					 (__v8sf)(__m256)_mm256_setzero_ps (),  \
+					 (__v8sf)(__m256)_mm256_avx512_setzero_ps (),  \
                                          (__mmask8)(U)))
 
 #define _mm_getmant_ps(X, B, C)                                                 \
   ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
                                          (int)(((C)<<2) | (B)),                 \
-					 (__v4sf)(__m128)_mm_setzero_ps (),     \
+					 (__v4sf)(__m128)_mm_avx512_setzero_ps (),     \
                                          (__mmask8)-1))
 
 #define _mm_mask_getmant_ps(W, U, X, B, C)                                      \
@@ -12841,13 +12866,13 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm_maskz_getmant_ps(U, X, B, C)                                        \
   ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
                                          (int)(((C)<<2) | (B)),                 \
-					 (__v4sf)(__m128)_mm_setzero_ps (),     \
+					 (__v4sf)(__m128)_mm_avx512_setzero_ps (),     \
                                          (__mmask8)(U)))
 
 #define _mm256_getmant_pd(X, B, C)                                              \
   ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
                                          (int)(((C)<<2) | (B)),                 \
-					  (__v4df)(__m256d)_mm256_setzero_pd (),\
+					  (__v4df)(__m256d)_mm256_avx512_setzero_pd (),\
                                           (__mmask8)-1))
 
 #define _mm256_mask_getmant_pd(W, U, X, B, C)                                   \
@@ -12859,13 +12884,13 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm256_maskz_getmant_pd(U, X, B, C)                                     \
   ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
                                          (int)(((C)<<2) | (B)),                 \
-					  (__v4df)(__m256d)_mm256_setzero_pd (),\
+					  (__v4df)(__m256d)_mm256_avx512_setzero_pd (),\
                                           (__mmask8)(U)))
 
 #define _mm_getmant_pd(X, B, C)                                                 \
   ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
                                          (int)(((C)<<2) | (B)),                 \
-					  (__v2df)(__m128d)_mm_setzero_pd (),   \
+					  (__v2df)(__m128d)_mm_avx512_setzero_pd (),   \
                                           (__mmask8)-1))
 
 #define _mm_mask_getmant_pd(W, U, X, B, C)                                      \
@@ -12877,7 +12902,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm_maskz_getmant_pd(U, X, B, C)                                        \
   ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
                                          (int)(((C)<<2) | (B)),                 \
-					  (__v2df)(__m128d)_mm_setzero_pd (),   \
+					  (__v2df)(__m128d)_mm_avx512_setzero_pd (),   \
                                           (__mmask8)(U)))
 
 #define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
@@ -13160,7 +13185,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm256_maskz_shuffle_epi32(U, X, C)                                         \
   ((__m256i)  __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C),        \
 					     (__v8si)(__m256i)			    \
-					     _mm256_setzero_si256 (),		    \
+					     _mm256_avx512_setzero_si256 (),		    \
                                              (__mmask8)(U)))
 
 #define _mm_mask_shuffle_epi32(W, U, X, C)                                          \
@@ -13170,12 +13195,12 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_maskz_shuffle_epi32(U, X, C)                                            \
   ((__m128i)  __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C),        \
-					     (__v4si)(__m128i)_mm_setzero_si128 (), \
+					     (__v4si)(__m128i)_mm_avx512_setzero_si128 (), \
                                              (__mmask8)(U)))
 
 #define _mm256_rol_epi64(A, B)                                                 \
   ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
-                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
+                                          (__v4di)(__m256i)_mm256_avx512_setzero_si256 (),\
                                           (__mmask8)-1))
 
 #define _mm256_mask_rol_epi64(W, U, A, B)                                      \
@@ -13185,12 +13210,12 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_maskz_rol_epi64(U, A, B)                                        \
   ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
-                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
+                                          (__v4di)(__m256i)_mm256_avx512_setzero_si256 (),\
                                           (__mmask8)(U)))
 
 #define _mm_rol_epi64(A, B)                                                    \
   ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
-					  (__v2di)(__m128i)_mm_setzero_si128 (),\
+					  (__v2di)(__m128i)_mm_avx512_setzero_si128 (),\
                                           (__mmask8)-1))
 
 #define _mm_mask_rol_epi64(W, U, A, B)                                         \
@@ -13200,12 +13225,12 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_maskz_rol_epi64(U, A, B)                                           \
   ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
-					  (__v2di)(__m128i)_mm_setzero_si128 (),\
+					  (__v2di)(__m128i)_mm_avx512_setzero_si128 (),\
                                           (__mmask8)(U)))
 
 #define _mm256_ror_epi64(A, B)                                                 \
   ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
-                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
+                                          (__v4di)(__m256i)_mm256_avx512_setzero_si256 (),\
                                           (__mmask8)-1))
 
 #define _mm256_mask_ror_epi64(W, U, A, B)                                      \
@@ -13215,12 +13240,12 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_maskz_ror_epi64(U, A, B)                                        \
   ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
-                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
+                                          (__v4di)(__m256i)_mm256_avx512_setzero_si256 (),\
                                           (__mmask8)(U)))
 
 #define _mm_ror_epi64(A, B)                                                    \
   ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
-					  (__v2di)(__m128i)_mm_setzero_si128 (),\
+					  (__v2di)(__m128i)_mm_avx512_setzero_si128 (),\
                                           (__mmask8)-1))
 
 #define _mm_mask_ror_epi64(W, U, A, B)                                         \
@@ -13230,12 +13255,12 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_maskz_ror_epi64(U, A, B)                                           \
   ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
-					  (__v2di)(__m128i)_mm_setzero_si128 (),\
+					  (__v2di)(__m128i)_mm_avx512_setzero_si128 (),\
                                           (__mmask8)(U)))
 
 #define _mm256_rol_epi32(A, B)                                                 \
   ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
-					  (__v8si)(__m256i)_mm256_setzero_si256 (),\
+					  (__v8si)(__m256i)_mm256_avx512_setzero_si256 (),\
                                           (__mmask8)-1))
 
 #define _mm256_mask_rol_epi32(W, U, A, B)                                      \
@@ -13245,12 +13270,12 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_maskz_rol_epi32(U, A, B)                                        \
   ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
-					  (__v8si)(__m256i)_mm256_setzero_si256 (),\
+					  (__v8si)(__m256i)_mm256_avx512_setzero_si256 (),\
                                           (__mmask8)(U)))
 
 #define _mm_rol_epi32(A, B)                                                    \
   ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
-					  (__v4si)(__m128i)_mm_setzero_si128 (),\
+					  (__v4si)(__m128i)_mm_avx512_setzero_si128 (),\
                                           (__mmask8)-1))
 
 #define _mm_mask_rol_epi32(W, U, A, B)                                         \
@@ -13260,12 +13285,12 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_maskz_rol_epi32(U, A, B)                                           \
   ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
-					  (__v4si)(__m128i)_mm_setzero_si128 (),\
+					  (__v4si)(__m128i)_mm_avx512_setzero_si128 (),\
                                           (__mmask8)(U)))
 
 #define _mm256_ror_epi32(A, B)                                                 \
   ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
-					  (__v8si)(__m256i)_mm256_setzero_si256 (),\
+					  (__v8si)(__m256i)_mm256_avx512_setzero_si256 (),\
                                           (__mmask8)-1))
 
 #define _mm256_mask_ror_epi32(W, U, A, B)                                      \
@@ -13276,12 +13301,12 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 #define _mm256_maskz_ror_epi32(U, A, B)                                        \
   ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
 					  (__v8si)(__m256i)		       \
-					  _mm256_setzero_si256 (),	       \
+					  _mm256_avx512_setzero_si256 (),	       \
                                           (__mmask8)(U)))
 
 #define _mm_ror_epi32(A, B)                                                    \
   ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
-					  (__v4si)(__m128i)_mm_setzero_si128 (),\
+					  (__v4si)(__m128i)_mm_avx512_setzero_si128 (),\
                                           (__mmask8)-1))
 
 #define _mm_mask_ror_epi32(W, U, A, B)                                         \
@@ -13291,7 +13316,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_maskz_ror_epi32(U, A, B)                                           \
   ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
-					  (__v4si)(__m128i)_mm_setzero_si128 (),\
+					  (__v4si)(__m128i)_mm_avx512_setzero_si128 (),\
                                           (__mmask8)(U)))
 
 #define _mm256_alignr_epi32(X, Y, C)                                        \
@@ -13304,7 +13329,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_maskz_alignr_epi32(U, X, Y, C)                               \
     ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
-        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
+        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_avx512_setzero_si256 (),\
         (__mmask8)(U)))
 
 #define _mm256_alignr_epi64(X, Y, C)                                        \
@@ -13317,7 +13342,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_maskz_alignr_epi64(U, X, Y, C)                               \
     ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
-        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
+        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_avx512_setzero_si256 (),\
         (__mmask8)(U)))
 
 #define _mm_alignr_epi32(X, Y, C)                                           \
@@ -13330,7 +13355,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_maskz_alignr_epi32(U, X, Y, C)                                  \
     ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
-	(__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (),\
+	(__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_avx512_setzero_si128 (),\
         (__mmask8)(U)))
 
 #define _mm_alignr_epi64(X, Y, C)                                           \
@@ -13343,7 +13368,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_maskz_alignr_epi64(U, X, Y, C)                                  \
     ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
-	(__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (),\
+	(__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_avx512_setzero_si128 (),\
         (__mmask8)(U)))
 
 #define _mm_mask_cvtps_ph(W, U, A, I)						\
@@ -13352,7 +13377,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_maskz_cvtps_ph(U, A, I)						\
   ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) (A), (int) (I),	\
-      (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
+      (__v8hi)(__m128i) _mm_avx512_setzero_si128 (), (__mmask8) (U)))
 
 #define _mm256_mask_cvtps_ph(W, U, A, I)					\
   ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) (A), (int) (I),	\
@@ -13360,7 +13385,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_maskz_cvtps_ph(U, A, I)						\
   ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) (A), (int) (I),	\
-      (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
+      (__v8hi)(__m128i) _mm_avx512_setzero_si128 (), (__mmask8) (U)))
 
 #define _mm256_mask_srai_epi32(W, U, A, B)				\
   ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A),	\
@@ -13368,7 +13393,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_maskz_srai_epi32(U, A, B)				\
   ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A),	\
-    (unsigned int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
+    (unsigned int)(B), (__v8si)_mm256_avx512_setzero_si256 (), (__mmask8)(U)))
 
 #define _mm_mask_srai_epi32(W, U, A, B)                                 \
   ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A),       \
@@ -13376,11 +13401,11 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_maskz_srai_epi32(U, A, B)                                   \
   ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A),       \
-    (unsigned int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
+    (unsigned int)(B), (__v4si)_mm_avx512_setzero_si128 (), (__mmask8)(U)))
 
 #define _mm256_srai_epi64(A, B)						\
   ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
-    (unsigned int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
+    (unsigned int)(B), (__v4di)_mm256_avx512_setzero_si256 (), (__mmask8)-1))
 
 #define _mm256_mask_srai_epi64(W, U, A, B)				\
   ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
@@ -13388,11 +13413,11 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_maskz_srai_epi64(U, A, B)				\
   ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
-    (unsigned int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
+    (unsigned int)(B), (__v4di)_mm256_avx512_setzero_si256 (), (__mmask8)(U)))
 
 #define _mm_srai_epi64(A, B)						\
   ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
-    (unsigned int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1))
+    (unsigned int)(B), (__v2di)_mm_avx512_setzero_si128 (), (__mmask8)-1))
 
 #define _mm_mask_srai_epi64(W, U, A, B)                                 \
   ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
@@ -13400,7 +13425,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_maskz_srai_epi64(U, A, B)                                   \
   ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
-    (unsigned int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
+    (unsigned int)(B), (__v2di)_mm_avx512_setzero_si128 (), (__mmask8)(U)))
 
 #define _mm256_mask_permutex_pd(W, U, A, B)                             \
   ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A),       \
@@ -13408,7 +13433,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_maskz_permutex_pd(U, A, B)				\
   ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A),       \
-    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
+    (int)(B), (__v4df)(__m256d)_mm256_avx512_setzero_pd (), (__mmask8)(U)))
 
 #define _mm256_mask_permute_pd(W, U, X, C)					    \
   ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C),	    \
@@ -13417,7 +13442,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_maskz_permute_pd(U, X, C)					    \
   ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C),	    \
-					      (__v4df)(__m256d)_mm256_setzero_pd (),\
+					      (__v4df)(__m256d)_mm256_avx512_setzero_pd (),\
 					      (__mmask8)(U)))
 
 #define _mm256_mask_permute_ps(W, U, X, C)					    \
@@ -13426,7 +13451,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm256_maskz_permute_ps(U, X, C)					    \
   ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C),	    \
-					      (__v8sf)(__m256)_mm256_setzero_ps (), \
+					      (__v8sf)(__m256)_mm256_avx512_setzero_ps (), \
 					      (__mmask8)(U)))
 
 #define _mm_mask_permute_pd(W, U, X, C)						    \
@@ -13435,7 +13460,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_maskz_permute_pd(U, X, C)						    \
   ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C),	    \
-					    (__v2df)(__m128d)_mm_setzero_pd (),	    \
+					    (__v2df)(__m128d)_mm_avx512_setzero_pd (),	    \
 					    (__mmask8)(U)))
 
 #define _mm_mask_permute_ps(W, U, X, C)						    \
@@ -13444,7 +13469,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_maskz_permute_ps(U, X, C)						    \
   ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C),	    \
-					  (__v4sf)(__m128)_mm_setzero_ps (),	    \
+					  (__v4sf)(__m128)_mm_avx512_setzero_ps (),	    \
 					  (__mmask8)(U)))
 
 #define _mm256_cmp_epu32_mask(X, Y, P)					\
@@ -13623,7 +13648,7 @@  _mm256_lzcnt_epi32 (__m256i __A)
 {
   return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
 						     (__v8si)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) -1);
 }
 
@@ -13642,7 +13667,7 @@  _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
 						     (__v8si)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) __U);
 }
 
@@ -13652,7 +13677,7 @@  _mm256_lzcnt_epi64 (__m256i __A)
 {
   return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
 						     (__v4di)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) -1);
 }
 
@@ -13671,7 +13696,7 @@  _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
 						     (__v4di)
-						     _mm256_setzero_si256 (),
+						     _mm256_avx512_setzero_si256 (),
 						     (__mmask8) __U);
 }
 
@@ -13681,7 +13706,7 @@  _mm256_conflict_epi64 (__m256i __A)
 {
   return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
 							 (__v4di)
-							 _mm256_setzero_si256 (),
+							 _mm256_avx512_setzero_si256 (),
 							 (__mmask8) -1);
 }
 
@@ -13701,7 +13726,7 @@  _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
 							 (__v4di)
-							 _mm256_setzero_si256 (),
+							 _mm256_avx512_setzero_si256 (),
 							 (__mmask8)
 							 __U);
 }
@@ -13712,7 +13737,7 @@  _mm256_conflict_epi32 (__m256i __A)
 {
   return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
 							 (__v8si)
-							 _mm256_setzero_si256 (),
+							 _mm256_avx512_setzero_si256 (),
 							 (__mmask8) -1);
 }
 
@@ -13732,7 +13757,7 @@  _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
 							 (__v8si)
-							 _mm256_setzero_si256 (),
+							 _mm256_avx512_setzero_si256 (),
 							 (__mmask8)
 							 __U);
 }
@@ -13743,7 +13768,7 @@  _mm_lzcnt_epi32 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
 						     (__v4si)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) -1);
 }
 
@@ -13762,7 +13787,7 @@  _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
 						     (__v4si)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -13772,7 +13797,7 @@  _mm_lzcnt_epi64 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
 						     (__v2di)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) -1);
 }
 
@@ -13791,7 +13816,7 @@  _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
 						     (__v2di)
-						     _mm_setzero_si128 (),
+						     _mm_avx512_setzero_si128 (),
 						     (__mmask8) __U);
 }
 
@@ -13801,7 +13826,7 @@  _mm_conflict_epi64 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
 							 (__v2di)
-							 _mm_setzero_si128 (),
+							 _mm_avx512_setzero_si128 (),
 							 (__mmask8) -1);
 }
 
@@ -13821,7 +13846,7 @@  _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
 							 (__v2di)
-							 _mm_setzero_si128 (),
+							 _mm_avx512_setzero_si128 (),
 							 (__mmask8)
 							 __U);
 }
@@ -13832,7 +13857,7 @@  _mm_conflict_epi32 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
 							 (__v4si)
-							 _mm_setzero_si128 (),
+							 _mm_avx512_setzero_si128 (),
 							 (__mmask8) -1);
 }
 
@@ -13852,7 +13877,7 @@  _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
 							 (__v4si)
-							 _mm_setzero_si128 (),
+							 _mm_avx512_setzero_si128 (),
 							 (__mmask8)
 							 __U);
 }
diff --git a/gcc/config/i386/avx512vpopcntdqvlintrin.h b/gcc/config/i386/avx512vpopcntdqvlintrin.h
index 972ab3b66d9..df487a269de 100644
--- a/gcc/config/i386/avx512vpopcntdqvlintrin.h
+++ b/gcc/config/i386/avx512vpopcntdqvlintrin.h
@@ -56,7 +56,7 @@  _mm_maskz_popcnt_epi32 (__mmask16 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
 							 (__v4si)
-							 _mm_setzero_si128 (),
+							 _mm_avx512_setzero_si128 (),
 							 (__mmask16) __U);
 }
 
@@ -82,7 +82,7 @@  _mm256_maskz_popcnt_epi32 (__mmask16 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
 						(__v8si)
-						_mm256_setzero_si256 (),
+						_mm256_avx512_setzero_si256 (),
 						(__mmask16) __U);
 }
 
@@ -108,7 +108,7 @@  _mm_maskz_popcnt_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
 							(__v2di)
-							_mm_setzero_si128 (),
+							_mm_avx512_setzero_si128 (),
 							(__mmask8) __U);
 }
 
@@ -134,7 +134,7 @@  _mm256_maskz_popcnt_epi64 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
 						(__v4di)
-						_mm256_setzero_si256 (),
+						_mm256_avx512_setzero_si256 (),
 						(__mmask8) __U);
 }
 
diff --git a/gcc/config/i386/gfniintrin.h b/gcc/config/i386/gfniintrin.h
index 907e7a0cf7a..38c96b6e4dd 100644
--- a/gcc/config/i386/gfniintrin.h
+++ b/gcc/config/i386/gfniintrin.h
@@ -139,7 +139,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_gf2p8mul_epi8 (__mmask16 __A, __m128i __B, __m128i __C)
 {
   return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __B,
-			(__v16qi) __C, (__v16qi) _mm_setzero_si128 (), __A);
+			(__v16qi) __C, (__v16qi) _mm_avx512_setzero_si128 (), __A);
 }
 
 #ifdef __OPTIMIZE__
@@ -162,7 +162,7 @@  _mm_maskz_gf2p8affineinv_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
 {
   return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __B,
 						(__v16qi) __C, __D,
-						(__v16qi) _mm_setzero_si128 (),
+						(__v16qi) _mm_avx512_setzero_si128 (),
 						 __A);
 }
 
@@ -181,7 +181,7 @@  _mm_maskz_gf2p8affine_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
 				  const int __D)
 {
   return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __B,
-		     (__v16qi) __C, __D, (__v16qi) _mm_setzero_si128 (), __A);
+		     (__v16qi) __C, __D, (__v16qi) _mm_avx512_setzero_si128 (), __A);
 }
 #else
 #define _mm_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) 		   \
@@ -191,7 +191,7 @@  _mm_maskz_gf2p8affine_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
 #define _mm_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
   ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask(		   \
 			(__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C),	   \
-			(int)(D), (__v16qi)(__m128i) _mm_setzero_si128 (), \
+			(int)(D), (__v16qi)(__m128i) _mm_avx512_setzero_si128 (), \
 			(__mmask16)(A)))
 #define _mm_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
   ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(C),\
@@ -199,7 +199,7 @@  _mm_maskz_gf2p8affine_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
 #define _mm_maskz_gf2p8affine_epi64_epi8(A, B, C, D)			    \
   ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(B),\
 		(__v16qi)(__m128i)(C), (int)(D),			    \
-		(__v16qi)(__m128i) _mm_setzero_si128 (), (__mmask16)(A)))
+		(__v16qi)(__m128i) _mm_avx512_setzero_si128 (), (__mmask16)(A)))
 #endif
 
 #ifdef __DISABLE_GFNIAVX512VL__
@@ -228,7 +228,7 @@  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_gf2p8mul_epi8 (__mmask32 __A, __m256i __B, __m256i __C)
 {
   return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __B,
-			(__v32qi) __C, (__v32qi) _mm256_setzero_si256 (), __A);
+			(__v32qi) __C, (__v32qi) _mm256_avx512_setzero_si256 (), __A);
 }
 
 #ifdef __OPTIMIZE__
@@ -251,7 +251,7 @@  _mm256_maskz_gf2p8affineinv_epi64_epi8 (__mmask32 __A, __m256i __B,
 {
   return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __B,
 				      (__v32qi) __C, __D,
-				      (__v32qi) _mm256_setzero_si256 (), __A);
+				      (__v32qi) _mm256_avx512_setzero_si256 (), __A);
 }
 
 extern __inline __m256i
@@ -272,7 +272,7 @@  _mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
 				     __m256i __C, const int __D)
 {
   return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __B,
-		(__v32qi) __C, __D, (__v32qi)_mm256_setzero_si256 (), __A);
+		(__v32qi) __C, __D, (__v32qi)_mm256_avx512_setzero_si256 (), __A);
 }
 #else
 #define _mm256_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E)		\
@@ -282,14 +282,14 @@  _mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
 #define _mm256_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D)		\
   ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask(		\
 	(__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D),		\
-	(__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
+	(__v32qi)(__m256i) _mm256_avx512_setzero_si256 (), (__mmask32)(A)))
 #define _mm256_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) 		    \
   ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(C),\
 	(__v32qi)(__m256i)(D), (int)(E), (__v32qi)(__m256i)(A), (__mmask32)(B)))
 #define _mm256_maskz_gf2p8affine_epi64_epi8(A, B, C, D)			    \
   ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(B),\
 	 (__v32qi)(__m256i)(C), (int)(D),				    \
-	 (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
+	 (__v32qi)(__m256i) _mm256_avx512_setzero_si256 (), (__mmask32)(A)))
 #endif
 
 #ifdef __DISABLE_GFNIAVX512VLBW__