i386: Fix vpblendm{b,w} intrins and insns
Checks
Commit Message
Hi all,
For vpblendm{b,w}, they actually do not have constant parameters.
Therefore, there is no need for them been wrapped in __OPTIMIZE__.
Also, we should check TARGET_AVX512VL for 128/256 bit vectors in patterns.
This patch did the fixes mentioned above. Tested on x86_64-pc-linux-gnu.
Ok for trunk?
BRs,
Haochen
gcc/ChangeLog:
* config/i386/avx512vlbwintrin.h
(_mm_mask_blend_epi16): Remove __OPTIMIZE__ wrapper.
(_mm_mask_blend_epi8): Ditto.
(_mm256_mask_blend_epi16): Ditto.
(_mm256_mask_blend_epi8): Ditto.
* config/i386/avx512vlintrin.h
(_mm256_mask_blend_pd): Ditto.
(_mm256_mask_blend_ps): Ditto.
(_mm256_mask_blend_epi64): Ditto.
(_mm256_mask_blend_epi32): Ditto.
(_mm_mask_blend_pd): Ditto.
(_mm_mask_blend_ps): Ditto.
(_mm_mask_blend_epi64): Ditto.
(_mm_mask_blend_epi32): Ditto.
* config/i386/sse.md (VF_AVX512BWHFBF16): Removed.
(VF_AVX512HFBFVL): Move it before the first usage.
(<avx512>_blendm<mode>): Change iterator from VF_AVX512BWHFBF16
to VF_AVX512HFBFVL.
---
gcc/config/i386/avx512vlbwintrin.h | 92 ++++++---------
gcc/config/i386/avx512vlintrin.h | 184 +++++++++++------------------
gcc/config/i386/sse.md | 17 ++-
3 files changed, 115 insertions(+), 178 deletions(-)
Comments
On Tue, Apr 18, 2023 at 3:15 PM Haochen Jiang via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi all,
>
> For vpblendm{b,w}, they actually do not have constant parameters.
> Therefore, there is no need for them been wrapped in __OPTIMIZE__.
>
> Also, we should check TARGET_AVX512VL for 128/256 bit vectors in patterns.
>
> This patch did the fixes mentioned above. Tested on x86_64-pc-linux-gnu.
> Ok for trunk?
Ok.
>
> BRs,
> Haochen
>
> gcc/ChangeLog:
>
> * config/i386/avx512vlbwintrin.h
> (_mm_mask_blend_epi16): Remove __OPTIMIZE__ wrapper.
> (_mm_mask_blend_epi8): Ditto.
> (_mm256_mask_blend_epi16): Ditto.
> (_mm256_mask_blend_epi8): Ditto.
> * config/i386/avx512vlintrin.h
> (_mm256_mask_blend_pd): Ditto.
> (_mm256_mask_blend_ps): Ditto.
> (_mm256_mask_blend_epi64): Ditto.
> (_mm256_mask_blend_epi32): Ditto.
> (_mm_mask_blend_pd): Ditto.
> (_mm_mask_blend_ps): Ditto.
> (_mm_mask_blend_epi64): Ditto.
> (_mm_mask_blend_epi32): Ditto.
> * config/i386/sse.md (VF_AVX512BWHFBF16): Removed.
> (VF_AVX512HFBFVL): Move it before the first usage.
> (<avx512>_blendm<mode>): Change iterator from VF_AVX512BWHFBF16
> to VF_AVX512HFBFVL.
> ---
> gcc/config/i386/avx512vlbwintrin.h | 92 ++++++---------
> gcc/config/i386/avx512vlintrin.h | 184 +++++++++++------------------
> gcc/config/i386/sse.md | 17 ++-
> 3 files changed, 115 insertions(+), 178 deletions(-)
>
> diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h
> index 0232783a362..9d2aba2a8ff 100644
> --- a/gcc/config/i386/avx512vlbwintrin.h
> +++ b/gcc/config/i386/avx512vlbwintrin.h
> @@ -257,6 +257,42 @@ _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
> (__mmask16) __U);
> }
>
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
> +{
> + return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A,
> + (__v8hi) __W,
> + (__mmask8) __U);
> +}
> +
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
> +{
> + return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A,
> + (__v16qi) __W,
> + (__mmask16) __U);
> +}
> +
> +extern __inline __m256i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
> +{
> + return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A,
> + (__v16hi) __W,
> + (__mmask16) __U);
> +}
> +
> +extern __inline __m256i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
> +{
> + return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A,
> + (__v32qi) __W,
> + (__mmask32) __U);
> +}
> +
> extern __inline __m128i
> __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> _mm256_cvtepi16_epi8 (__m256i __A)
> @@ -1442,42 +1478,6 @@ _mm_maskz_dbsad_epu8 (__mmask8 __U, __m128i __A, __m128i __B,
> (__mmask8) __U);
> }
>
> -extern __inline __m128i
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
> -{
> - return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A,
> - (__v8hi) __W,
> - (__mmask8) __U);
> -}
> -
> -extern __inline __m128i
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
> -{
> - return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A,
> - (__v16qi) __W,
> - (__mmask16) __U);
> -}
> -
> -extern __inline __m256i
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
> -{
> - return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A,
> - (__v16hi) __W,
> - (__mmask16) __U);
> -}
> -
> -extern __inline __m256i
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
> -{
> - return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A,
> - (__v32qi) __W,
> - (__mmask32) __U);
> -}
> -
> extern __inline __mmask8
> __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> _mm_mask_cmp_epi16_mask (__mmask8 __U, __m128i __X, __m128i __Y,
> @@ -1986,26 +1986,6 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B)
> (__v8hi)(__m128i)_mm_setzero_si128(), \
> (__mmask8)(U)))
>
> -#define _mm_mask_blend_epi16(__U, __A, __W) \
> - ((__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) (__A), \
> - (__v8hi) (__W), \
> - (__mmask8) (__U)))
> -
> -#define _mm_mask_blend_epi8(__U, __A, __W) \
> - ((__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) (__A), \
> - (__v16qi) (__W), \
> - (__mmask16) (__U)))
> -
> -#define _mm256_mask_blend_epi16(__U, __A, __W) \
> - ((__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) (__A), \
> - (__v16hi) (__W), \
> - (__mmask16) (__U)))
> -
> -#define _mm256_mask_blend_epi8(__U, __A, __W) \
> - ((__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) (__A), \
> - (__v32qi) (__W), \
> - (__mmask32) (__U)))
> -
> #define _mm_cmp_epi16_mask(X, Y, P) \
> ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X), \
> (__v8hi)(__m128i)(Y), (int)(P),\
> diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
> index 758b71a9dbd..4a717a7e52f 100644
> --- a/gcc/config/i386/avx512vlintrin.h
> +++ b/gcc/config/i386/avx512vlintrin.h
> @@ -935,6 +935,78 @@ _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
> (__mmask8) __U);
> }
>
> +extern __inline __m256d
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
> +{
> + return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
> + (__v4df) __W,
> + (__mmask8) __U);
> +}
> +
> +extern __inline __m256
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
> +{
> + return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
> + (__v8sf) __W,
> + (__mmask8) __U);
> +}
> +
> +extern __inline __m256i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
> +{
> + return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
> + (__v4di) __W,
> + (__mmask8) __U);
> +}
> +
> +extern __inline __m256i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
> +{
> + return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
> + (__v8si) __W,
> + (__mmask8) __U);
> +}
> +
> +extern __inline __m128d
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
> +{
> + return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
> + (__v2df) __W,
> + (__mmask8) __U);
> +}
> +
> +extern __inline __m128
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
> +{
> + return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
> + (__v4sf) __W,
> + (__mmask8) __U);
> +}
> +
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
> +{
> + return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
> + (__v2di) __W,
> + (__mmask8) __U);
> +}
> +
> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
> +{
> + return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
> + (__v4si) __W,
> + (__mmask8) __U);
> +}
> +
> extern __inline __m256i
> __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> _mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
> @@ -12262,78 +12334,6 @@ _mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
> (__mmask8) __U);
> }
>
> -extern __inline __m256d
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
> -{
> - return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
> - (__v4df) __W,
> - (__mmask8) __U);
> -}
> -
> -extern __inline __m256
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
> -{
> - return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
> - (__v8sf) __W,
> - (__mmask8) __U);
> -}
> -
> -extern __inline __m256i
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
> -{
> - return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
> - (__v4di) __W,
> - (__mmask8) __U);
> -}
> -
> -extern __inline __m256i
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
> -{
> - return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
> - (__v8si) __W,
> - (__mmask8) __U);
> -}
> -
> -extern __inline __m128d
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
> -{
> - return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
> - (__v2df) __W,
> - (__mmask8) __U);
> -}
> -
> -extern __inline __m128
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
> -{
> - return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
> - (__v4sf) __W,
> - (__mmask8) __U);
> -}
> -
> -extern __inline __m128i
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
> -{
> - return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
> - (__v2di) __W,
> - (__mmask8) __U);
> -}
> -
> -extern __inline __m128i
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
> -{
> - return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
> - (__v4si) __W,
> - (__mmask8) __U);
> -}
> -
> extern __inline __mmask8
> __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> _mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
> @@ -13717,46 +13717,6 @@ _mm256_permutex_pd (__m256d __X, const int __M)
> (__v4sf)(__m128)_mm_setzero_ps (), \
> (__mmask8)(U)))
>
> -#define _mm256_mask_blend_pd(__U, __A, __W) \
> - ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
> - (__v4df) (__W), \
> - (__mmask8) (__U)))
> -
> -#define _mm256_mask_blend_ps(__U, __A, __W) \
> - ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
> - (__v8sf) (__W), \
> - (__mmask8) (__U)))
> -
> -#define _mm256_mask_blend_epi64(__U, __A, __W) \
> - ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
> - (__v4di) (__W), \
> - (__mmask8) (__U)))
> -
> -#define _mm256_mask_blend_epi32(__U, __A, __W) \
> - ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
> - (__v8si) (__W), \
> - (__mmask8) (__U)))
> -
> -#define _mm_mask_blend_pd(__U, __A, __W) \
> - ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
> - (__v2df) (__W), \
> - (__mmask8) (__U)))
> -
> -#define _mm_mask_blend_ps(__U, __A, __W) \
> - ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
> - (__v4sf) (__W), \
> - (__mmask8) (__U)))
> -
> -#define _mm_mask_blend_epi64(__U, __A, __W) \
> - ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
> - (__v2di) (__W), \
> - (__mmask8) (__U)))
> -
> -#define _mm_mask_blend_epi32(__U, __A, __W) \
> - ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
> - (__v4si) (__W), \
> - (__mmask8) (__U)))
> -
> #define _mm256_cmp_epu32_mask(X, Y, P) \
> ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
> (__v8si)(__m256i)(Y), (int)(P),\
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 3bb78b26758..26812ab6106 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -445,8 +445,9 @@
> [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
> (V8HF "TARGET_AVX512FP16") V32BF V16BF V8BF])
>
> -(define_mode_iterator VF_AVX512BWHFBF16
> - [V32HF V16HF V8HF V32BF V16BF V8BF])
> +(define_mode_iterator VF_AVX512HFBFVL
> + [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
> + V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
>
> (define_mode_iterator VF_AVX512FP16VL
> [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")])
> @@ -1586,10 +1587,10 @@
> (set_attr "mode" "<sseinsnmode>")])
>
> (define_insn "<avx512>_blendm<mode>"
> - [(set (match_operand:VF_AVX512BWHFBF16 0 "register_operand" "=v,v")
> - (vec_merge:VF_AVX512BWHFBF16
> - (match_operand:VF_AVX512BWHFBF16 2 "nonimmediate_operand" "vm,vm")
> - (match_operand:VF_AVX512BWHFBF16 1 "nonimm_or_0_operand" "0C,v")
> + [(set (match_operand:VF_AVX512HFBFVL 0 "register_operand" "=v,v")
> + (vec_merge:VF_AVX512HFBFVL
> + (match_operand:VF_AVX512HFBFVL 2 "nonimmediate_operand" "vm,vm")
> + (match_operand:VF_AVX512HFBFVL 1 "nonimm_or_0_operand" "0C,v")
> (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
> "TARGET_AVX512BW"
> "@
> @@ -4546,10 +4547,6 @@
> DONE;
> })
>
> -(define_mode_iterator VF_AVX512HFBFVL
> - [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
> - V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
> -
> (define_expand "vcond<mode><sseintvecmodelower>"
> [(set (match_operand:VF_AVX512HFBFVL 0 "register_operand")
> (if_then_else:VF_AVX512HFBFVL
> --
> 2.31.1
>
@@ -257,6 +257,42 @@ _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
(__mmask16) __U);
}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
+{
+ return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
+{
+ return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
+{
+ return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
+{
+ return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi16_epi8 (__m256i __A)
@@ -1442,42 +1478,6 @@ _mm_maskz_dbsad_epu8 (__mmask8 __U, __m128i __A, __m128i __B,
(__mmask8) __U);
}
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
-{
- return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A,
- (__v8hi) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
-{
- return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A,
- (__v16qi) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
-{
- return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A,
- (__v16hi) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
-{
- return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A,
- (__v32qi) __W,
- (__mmask32) __U);
-}
-
extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmp_epi16_mask (__mmask8 __U, __m128i __X, __m128i __Y,
@@ -1986,26 +1986,6 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B)
(__v8hi)(__m128i)_mm_setzero_si128(), \
(__mmask8)(U)))
-#define _mm_mask_blend_epi16(__U, __A, __W) \
- ((__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) (__A), \
- (__v8hi) (__W), \
- (__mmask8) (__U)))
-
-#define _mm_mask_blend_epi8(__U, __A, __W) \
- ((__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) (__A), \
- (__v16qi) (__W), \
- (__mmask16) (__U)))
-
-#define _mm256_mask_blend_epi16(__U, __A, __W) \
- ((__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) (__A), \
- (__v16hi) (__W), \
- (__mmask16) (__U)))
-
-#define _mm256_mask_blend_epi8(__U, __A, __W) \
- ((__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) (__A), \
- (__v32qi) (__W), \
- (__mmask32) (__U)))
-
#define _mm_cmp_epi16_mask(X, Y, P) \
((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X), \
(__v8hi)(__m128i)(Y), (int)(P),\
@@ -935,6 +935,78 @@ _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
(__mmask8) __U);
}
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
+{
+ return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
+{
+ return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
+{
+ return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
+{
+ return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
+{
+ return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
+{
+ return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
+{
+ return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
+{
+ return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
@@ -12262,78 +12334,6 @@ _mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
(__mmask8) __U);
}
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
-{
- return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
- (__v4df) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
-{
- return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
- (__v8sf) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
-{
- return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
- (__v4di) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
-{
- return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
- (__v8si) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
-{
- return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
- (__v2df) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
-{
- return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
- (__v4sf) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
-{
- return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
- (__v2di) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
-{
- return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
- (__v4si) __W,
- (__mmask8) __U);
-}
-
extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
@@ -13717,46 +13717,6 @@ _mm256_permutex_pd (__m256d __X, const int __M)
(__v4sf)(__m128)_mm_setzero_ps (), \
(__mmask8)(U)))
-#define _mm256_mask_blend_pd(__U, __A, __W) \
- ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
- (__v4df) (__W), \
- (__mmask8) (__U)))
-
-#define _mm256_mask_blend_ps(__U, __A, __W) \
- ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
- (__v8sf) (__W), \
- (__mmask8) (__U)))
-
-#define _mm256_mask_blend_epi64(__U, __A, __W) \
- ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
- (__v4di) (__W), \
- (__mmask8) (__U)))
-
-#define _mm256_mask_blend_epi32(__U, __A, __W) \
- ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
- (__v8si) (__W), \
- (__mmask8) (__U)))
-
-#define _mm_mask_blend_pd(__U, __A, __W) \
- ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
- (__v2df) (__W), \
- (__mmask8) (__U)))
-
-#define _mm_mask_blend_ps(__U, __A, __W) \
- ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
- (__v4sf) (__W), \
- (__mmask8) (__U)))
-
-#define _mm_mask_blend_epi64(__U, __A, __W) \
- ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
- (__v2di) (__W), \
- (__mmask8) (__U)))
-
-#define _mm_mask_blend_epi32(__U, __A, __W) \
- ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
- (__v4si) (__W), \
- (__mmask8) (__U)))
-
#define _mm256_cmp_epu32_mask(X, Y, P) \
((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
(__v8si)(__m256i)(Y), (int)(P),\
@@ -445,8 +445,9 @@
[(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
(V8HF "TARGET_AVX512FP16") V32BF V16BF V8BF])
-(define_mode_iterator VF_AVX512BWHFBF16
- [V32HF V16HF V8HF V32BF V16BF V8BF])
+(define_mode_iterator VF_AVX512HFBFVL
+ [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
+ V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
(define_mode_iterator VF_AVX512FP16VL
[V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")])
@@ -1586,10 +1587,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<avx512>_blendm<mode>"
- [(set (match_operand:VF_AVX512BWHFBF16 0 "register_operand" "=v,v")
- (vec_merge:VF_AVX512BWHFBF16
- (match_operand:VF_AVX512BWHFBF16 2 "nonimmediate_operand" "vm,vm")
- (match_operand:VF_AVX512BWHFBF16 1 "nonimm_or_0_operand" "0C,v")
+ [(set (match_operand:VF_AVX512HFBFVL 0 "register_operand" "=v,v")
+ (vec_merge:VF_AVX512HFBFVL
+ (match_operand:VF_AVX512HFBFVL 2 "nonimmediate_operand" "vm,vm")
+ (match_operand:VF_AVX512HFBFVL 1 "nonimm_or_0_operand" "0C,v")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
"TARGET_AVX512BW"
"@
@@ -4546,10 +4547,6 @@
DONE;
})
-(define_mode_iterator VF_AVX512HFBFVL
- [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
- V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
-
(define_expand "vcond<mode><sseintvecmodelower>"
[(set (match_operand:VF_AVX512HFBFVL 0 "register_operand")
(if_then_else:VF_AVX512HFBFVL