@@ -793,6 +793,8 @@ get_available_features (struct __processor_model *cpu_model,
{
if (eax & bit_AVXVNNI)
set_feature (FEATURE_AVXVNNI);
+ if (eax & bit_AVXIFMA)
+ set_feature (FEATURE_AVXIFMA);
}
if (avx512_usable)
{
@@ -76,6 +76,7 @@ along with GCC; see the file COPYING3. If not see
(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512F_SET)
#define OPTION_MASK_ISA_AVX512IFMA_SET \
(OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA2_AVXIFMA_SET OPTION_MASK_ISA2_AVXIFMA
#define OPTION_MASK_ISA_AVX512VBMI_SET \
(OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512BW_SET)
#define OPTION_MASK_ISA2_AVX5124FMAPS_SET OPTION_MASK_ISA2_AVX5124FMAPS
@@ -212,7 +213,8 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA_AVX2_UNSET \
(OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
#define OPTION_MASK_ISA2_AVX2_UNSET \
- (OPTION_MASK_ISA2_AVXVNNI_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET)
+ (OPTION_MASK_ISA2_AVXIFMA_UNSET | OPTION_MASK_ISA2_AVXVNNI_UNSET \
+ | OPTION_MASK_ISA2_AVX512F_UNSET)
#define OPTION_MASK_ISA_AVX512F_UNSET \
(OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \
| OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \
@@ -230,6 +232,7 @@ along with GCC; see the file COPYING3. If not see
(OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VBMI_UNSET)
#define OPTION_MASK_ISA_AVX512VL_UNSET OPTION_MASK_ISA_AVX512VL
#define OPTION_MASK_ISA_AVX512IFMA_UNSET OPTION_MASK_ISA_AVX512IFMA
+#define OPTION_MASK_ISA2_AVXIFMA_UNSET OPTION_MASK_ISA2_AVXIFMA
#define OPTION_MASK_ISA_AVX512VBMI_UNSET OPTION_MASK_ISA_AVX512VBMI
#define OPTION_MASK_ISA2_AVX5124FMAPS_UNSET OPTION_MASK_ISA2_AVX5124FMAPS
#define OPTION_MASK_ISA2_AVX5124VNNIW_UNSET OPTION_MASK_ISA2_AVX5124VNNIW
@@ -1124,6 +1127,21 @@ ix86_handle_option (struct gcc_options *opts,
}
return true;
+ case OPT_mavxifma:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXIFMA_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVXIFMA_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVXIFMA_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVXIFMA_UNSET;
+ }
+ return true;
+
case OPT_mfma:
if (value)
{
@@ -240,6 +240,7 @@ enum processor_features
FEATURE_X86_64_V2,
FEATURE_X86_64_V3,
FEATURE_X86_64_V4,
+ FEATURE_AVXIFMA,
CPU_FEATURE_MAX
};
@@ -175,4 +175,5 @@ ISA_NAMES_TABLE_START
ISA_NAMES_TABLE_ENTRY("x86-64-v2", FEATURE_X86_64_V2, P_X86_64_V2, NULL)
ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL)
ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL)
+ ISA_NAMES_TABLE_ENTRY("avxifma", FEATURE_AVXIFMA, P_NONE, "-mavxifma")
ISA_NAMES_TABLE_END
@@ -421,7 +421,8 @@ i[34567]86-*-* | x86_64-*-*)
tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
amxbf16intrin.h x86gprintrin.h uintrintrin.h
hresetintrin.h keylockerintrin.h avxvnniintrin.h
- mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h"
+ mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
+ avxifmaintrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
new file mode 100644
@@ -0,0 +1,78 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avxifmaintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVXIFMAINTRIN_H_INCLUDED
+#define _AVXIFMAINTRIN_H_INCLUDED
+
+#ifndef __AVXIFMA__
+#pragma GCC push_options
+#pragma GCC target("avxifma")
+#define __DISABLE_AVXIFMA__
+#endif /* __AVXIFMA__ */
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_madd52lo_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
+{
+ return (__m128i) __builtin_ia32_avx_vpmadd52luq128 ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di) __Z);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_madd52hi_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
+{
+ return (__m128i) __builtin_ia32_avx_vpmadd52huq128 ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di) __Z);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_madd52lo_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
+{
+ return (__m256i) __builtin_ia32_avx_vpmadd52luq256 ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di) __Z);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_madd52hi_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
+{
+ return (__m256i) __builtin_ia32_avx_vpmadd52huq256 ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di) __Z);
+}
+
+#ifdef __DISABLE_AVXIFMA__
+#undef __DISABLE_AVXIFMA__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVXIFMA__ */
+
+#endif /* _AVXIFMAINTRIN_H_INCLUDED */
@@ -28,6 +28,7 @@
#define bit_AVXVNNI (1 << 4)
#define bit_AVX512BF16 (1 << 5)
#define bit_HRESET (1 << 22)
+#define bit_AVXIFMA (1 << 23)
/* %ecx */
#define bit_SSE3 (1 << 0)
@@ -2499,6 +2499,12 @@ BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd
BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
+/* AVX_IFMA */
+BDESC (0, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_avx_vpmadd52luq_v4di, "__builtin_ia32_avx_vpmadd52luq256", IX86_BUINTIN_AVX_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI)
+BDESC (0, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_avx_vpmadd52huq_v4di, "__builtin_ia32_avx_vpmadd52huq256", IX86_BUINTIN_AVX_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI)
+BDESC (0, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_avx_vpmadd52luq_v2di, "__builtin_ia32_avx_vpmadd52luq128", IX86_BUINTIN_AVX_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI)
+BDESC (0, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_avx_vpmadd52huq_v2di, "__builtin_ia32_avx_vpmadd52huq128", IX86_BUINTIN_AVX_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI)
+
/* AVX512VBMI */
BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
@@ -633,6 +633,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__WIDEKL__");
if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNI)
def_or_undef (parse_in, "__AVXVNNI__");
+ if (isa_flag2 & OPTION_MASK_ISA2_AVXIFMA)
+ def_or_undef (parse_in, "__AVXIFMA__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
@@ -109,3 +109,4 @@ DEF_PTA(KL)
DEF_PTA(WIDEKL)
DEF_PTA(AVXVNNI)
DEF_PTA(AVX512FP16)
+DEF_PTA(AVXIFMA)
@@ -226,7 +226,8 @@ static struct ix86_target_opts isa2_opts[] =
{ "-mkl", OPTION_MASK_ISA2_KL },
{ "-mwidekl", OPTION_MASK_ISA2_WIDEKL },
{ "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI },
- { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 }
+ { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 },
+ { "-mavxifma", OPTION_MASK_ISA2_AVXIFMA }
};
static struct ix86_target_opts isa_opts[] =
{
@@ -1072,6 +1073,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
IX86_ATTR_ISA ("hreset", OPT_mhreset),
IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni),
IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16),
+ IX86_ATTR_ISA ("avxifma", OPT_mavxifma),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
@@ -1214,3 +1214,8 @@ Do not use GOT to access external symbols.
-param=x86-stlf-window-ninsns=
Target Joined UInteger Var(x86_stlf_window_ninsns) Init(64) Param
Instructions number above which STFL stall penalty can be compensated.
+
+mavxifma
+Target Mask(ISA2_AVXIFMA) Var(ix86_isa_flags2) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and
+AVXIFMA built-in functions and code generation.
@@ -44,6 +44,8 @@
#include <avxvnniintrin.h>
+#include <avxifmaintrin.h>
+
#include <avx2intrin.h>
#include <avx512fintrin.h>
@@ -27867,6 +27867,19 @@
(define_int_attr vpmadd52type
[(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
+(define_insn "avx_vpmadd52<vpmadd52type>_<mode>"
+ [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
+ (unspec:VI8_AVX2
+ [(match_operand:VI8_AVX2 1 "register_operand" "0")
+ (match_operand:VI8_AVX2 2 "register_operand" "x")
+ (match_operand:VI8_AVX2 3 "nonimmediate_operand" "xm")]
+ VPMADD52))]
+ "TARGET_AVXIFMA"
+ "%{vex%} vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_expand "vpamdd52huq<mode>_maskz"
[(match_operand:VI8_AVX512VL 0 "register_operand")
(match_operand:VI8_AVX512VL 1 "register_operand")
@@ -27895,7 +27908,7 @@
DONE;
})
-(define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
+(define_insn "vpamdd52<vpmadd52type><mode>"
[(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
(unspec:VI8_AVX512VL
[(match_operand:VI8_AVX512VL 1 "register_operand" "0")
@@ -27903,7 +27916,32 @@
(match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
VPMADD52))]
"TARGET_AVX512IFMA"
- "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
+{
+ if (<MODE_SIZE> <=32
+ && TARGET_AVXIFMA
+ && !EXT_REX_SSE_REG_P (operands[1])
+ && !EXT_REX_SSE_REG_P (operands[2])
+ && !EXT_REX_SSE_REG_P (operands[3]))
+ return "%{vex%} vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}";
+ else
+ return "vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "ssemuladd")
+ (set_attr "prefix" "maybe_evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vpamdd52<vpmadd52type><mode>_maskz_1"
+ [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VI8_AVX512VL
+ (unspec:VI8_AVX512VL
+ [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
+ (match_operand:VI8_AVX512VL 2 "register_operand" "v")
+ (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
+ VPMADD52)
+ (match_operand:VI8_AVX512VL 4 "const0_operand" "C")
+ (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
+ "TARGET_AVX512IFMA"
+ "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
@@ -7060,6 +7060,11 @@ Enable/disable the generation of the WIDEKL instructions.
@cindex @code{target("avxvnni")} function attribute, x86
Enable/disable the generation of the AVXVNNI instructions.
+@item avxifma
+@itemx no-avxifma
+@cindex @code{target("avxifma")} function attribute, x86
+Enable/disable the generation of the AVXIFMA instructions.
+
@item cld
@itemx no-cld
@cindex @code{target("cld")} function attribute, x86
@@ -1436,7 +1436,7 @@ See RS/6000 and PowerPC Options.
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol
--mavx512fp16 @gol
+-mavx512fp16 -mavxifma @gol
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
-mkl -mwidekl @gol
@@ -32893,6 +32893,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@need 200
@itemx -mwidekl
@opindex mwidekl
+@need 200
+@itemx -mavxifma
+@opindex mavxifma
These switches enable the use of instructions in the MMX, SSE,
SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
@@ -32902,8 +32905,8 @@ WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, RDSEED, SGX, XOP, LWP,
XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
-UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16
-or CLDEMOTE extended instruction sets. Each has a corresponding
+UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16,
+AVXIFMA or CLDEMOTE extended instruction sets. Each has a corresponding
@option{-mno-} option to disable use of these instructions.
These extensions are also available as built-in functions: see
@@ -2490,6 +2490,9 @@ Target supports the execution of @code{avx512f} instructions.
@item avx512vp2intersect
Target supports the execution of @code{avx512vp2intersect} instructions.
+@item avxifma
+Target supports the execution of @code{avxifma} instructions.
+
@item amx_tile
Target supports the execution of @code{amx-tile} instructions.
@@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
@@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
@@ -22,7 +22,11 @@ main ()
/* Run AVX test only if host has AVX support. */
if (((ecx & (bit_AVX | bit_OSXSAVE)) == (bit_AVX | bit_OSXSAVE))
- && avx_os_support ())
+ && avx_os_support ()
+#ifdef AVXIFMA
+ && __builtin_cpu_supports ("avxifma")
+#endif
+ )
{
do_test ();
#ifdef DEBUG
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavxifma -O2" } */
+/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x,y,z;
+volatile __m128i x_,y_,z_;
+
+void extern
+avxifma_test (void)
+{
+ x = _mm256_madd52hi_avx_epu64 (x, y, z);
+ x = _mm256_madd52lo_avx_epu64 (x, y, z);
+ x_ = _mm_madd52hi_avx_epu64 (x_, y_, z_);
+ x_ = _mm_madd52lo_avx_epu64 (x_, y_, z_);
+}
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavxifma" } */
+/* { dg-require-effective-target avxifma } */
+#define AVXIFMA
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+void
+CALC (long long *r, long long *s1, long long *s2, long long *s3, int size)
+{
+ int i;
+ long long a,b;
+
+ for (i = 0; i < size; i++)
+ {
+ /* Simulate higher 52 bits out of 104 bit,
+ by shifting opernads with 0 in lower 26 bits. */
+ a = s2[i] >> 26;
+ b = s3[i] >> 26;
+ r[i] = a * b + s1[i];
+ }
+}
+
+void
+TEST (void)
+{
+ union256i_q src1_256, src2_256, dst_256;
+ union128i_q src1_128, src2_128, dst_128;
+ long long dst_ref_256[4], dst_ref_128[2];
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ src1_256.a[i] = 15 + 3467 * i;
+ src2_256.a[i] = 9217 + i;
+ src1_256.a[i] = src1_256.a[i] << 26;
+ src2_256.a[i] = src2_256.a[i] << 26;
+ src1_256.a[i] &= ((1LL << 52) - 1);
+ src2_256.a[i] &= ((1LL << 52) - 1);
+ dst_256.a[i] = -1;
+ }
+
+ for (i = 0; i < 2; i++)
+ {
+ src1_128.a[i] = 16 + 3467 * i;
+ src2_128.a[i] = 9127 + i;
+ src1_128.a[i] = src1_128.a[i] << 26;
+ src2_128.a[i] = src2_128.a[i] << 26;
+ src1_128.a[i] &= ((1LL << 52) - 1);
+ src2_128.a[i] &= ((1LL << 52) - 1);
+ dst_128.a[i] = -1;
+ }
+
+ CALC (dst_ref_256, dst_256.a, src1_256.a, src2_256.a, 4);
+ dst_256.x = _mm256_madd52hi_avx_epu64 (dst_256.x, src1_256.x, src2_256.x);
+ if (check_union256i_q (dst_256, dst_ref_256))
+ abort ();
+
+ CALC (dst_ref_128, dst_128.a, src1_128.a, src2_128.a, 2);
+ dst_128.x = _mm_madd52hi_avx_epu64 (dst_128.x, src1_128.x, src2_128.x);
+ if (check_union128i_q (dst_128, dst_ref_128))
+ abort ();
+
+}
+
new file mode 100644
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavxifma" } */
+/* { dg-require-effective-target avxifma } */
+#define AVXIFMA
+#ifndef CHECK
+#define CHECK "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include CHECK
+
+void
+CALC (unsigned long long *r, unsigned long long *s1,
+ unsigned long long *s2, unsigned long long *s3,
+ int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ {
+ r[i] = s2[i] * s3[i] + s1[i];
+ }
+}
+
+void
+TEST (void)
+{
+ union256i_q src1_256, src2_256, dst_256;
+ union128i_q src1_128, src2_128, dst_128;
+ unsigned long long dst_ref_256[4], dst_ref_128[2];
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ src1_256.a[i] = 3450 * i;
+ src2_256.a[i] = 7863 * i;
+ dst_256.a[i] = 117;
+ }
+
+ for (i = 0; i < 2; i++)
+ {
+ src1_128.a[i] = 3540 * i;
+ src2_128.a[i] = 7683 * i;
+ dst_128.a[i] = 117;
+ }
+
+ CALC (dst_ref_256, dst_256.a, src1_256.a, src2_256.a, 4);
+ dst_256.x = _mm256_madd52lo_avx_epu64 (dst_256.x, src1_256.x, src2_256.x);
+ if (check_union256i_q (dst_256, dst_ref_256))
+ abort ();
+
+ CALC (dst_ref_128, dst_128.a, src1_128.a, src2_128.a, 2);
+ dst_128.x = _mm_madd52lo_avx_epu64 (dst_128.x, src1_128.x, src2_128.x);
+ if (check_union128i_q (dst_128, dst_ref_128))
+ abort ();
+
+}
+
similarity index 100%
rename from gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1.c
rename to gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1a.c
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512ifma -mavx512vl -mavxifma -O2" } */
+/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i _x1, _y1, _z1;
+volatile __m256i _x2, _y2, _z2;
+volatile __m128i _x3, _y3, _z3;
+
+void extern
+avx512ifma_test (void)
+{
+ _x3 = _mm_madd52hi_epu64 (_x3, _y3, _z3);
+ _x3 = _mm_mask_madd52hi_epu64 (_x3, 2, _y3, _z3);
+ _x3 = _mm_maskz_madd52hi_epu64 (2, _x3, _y3, _z3);
+ _x2 = _mm256_madd52hi_epu64 (_x2, _y2, _z2);
+ _x2 = _mm256_mask_madd52hi_epu64 (_x2, 3, _y2, _z2);
+ _x2 = _mm256_maskz_madd52hi_epu64 (3, _x2, _y2, _z2);
+ _x1 = _mm512_madd52hi_epu64 (_x1, _y1, _z1);
+ _x1 = _mm512_mask_madd52hi_epu64 (_x1, 3, _y1, _z1);
+ _x1 = _mm512_maskz_madd52hi_epu64 (3, _x1, _y1, _z1);
+}
similarity index 100%
rename from gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1.c
rename to gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1a.c
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512ifma -mavx512vl -mavxifma -O2" } */
+/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i _x1, _y1, _z1;
+volatile __m256i _x2, _y2, _z2;
+volatile __m128i _x3, _y3, _z3;
+
+void extern
+avx512ifma_test (void)
+{
+ _x3 = _mm_madd52lo_epu64 (_x3, _y3, _z3);
+ _x3 = _mm_mask_madd52lo_epu64 (_x3, 2, _y3, _z3);
+ _x3 = _mm_maskz_madd52lo_epu64 (2, _x3, _y3, _z3);
+ _x2 = _mm256_madd52lo_epu64 (_x2, _y2, _z2);
+ _x2 = _mm256_mask_madd52lo_epu64 (_x2, 3, _y2, _z2);
+ _x2 = _mm256_maskz_madd52lo_epu64 (3, _x2, _y2, _z2);
+ _x1 = _mm512_madd52lo_epu64 (_x1, _y1, _z1);
+ _x1 = _mm512_mask_madd52lo_epu64 (_x1, 3, _y1, _z1);
+ _x1 = _mm512_maskz_madd52lo_epu64 (3, _x1, _y1, _z1);
+}
@@ -80,6 +80,7 @@ extern void test_keylocker (void) __attribute__((__target__("kl")));
extern void test_widekl (void) __attribute__((__target__("widekl")));
extern void test_avxvnni (void) __attribute__((__target__("avxvnni")));
extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16")));
+extern void test_avxifma (void) __attribute__((__target__("avxifma")));
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
@@ -161,6 +162,7 @@ extern void test_no_keylocker (void) __attribute__((__target__("no-kl")));
extern void test_no_widekl (void) __attribute__((__target__("no-widekl")));
extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni")));
extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16")));
+extern void test_no_avxifma (void) __attribute__((__target__("no-avxifma")));
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
@@ -3,7 +3,7 @@
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma" } */
#include <x86intrin.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
@@ -103,7 +103,7 @@
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
#endif
/* Following intrinsics require immediate arguments. They
@@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
@@ -843,6 +843,6 @@
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
#include <x86intrin.h>
@@ -9506,6 +9506,18 @@ proc check_effective_target_avxvnni { } {
} "-mavxvnni" ]
}
+# Return 1 if avxifma instructions can be compiled.
+proc check_effective_target_avxifma { } {
+ return [check_no_compiler_messages avxifma object {
+ typedef long long __v4di __attribute__ ((__vector_size__ (32)));
+ __v4di
+ _mm256_maddlo_avx_epu64 (__v4di __X, __v4di __Y, __v4di __Z)
+ {
+ return __builtin_ia32_avx_vpmadd52luq256 (__X, __Y, __Z);
+ }
+ } "-O0 -mavxifma" ]
+}
+
# Return 1 if sse instructions can be compiled.
proc check_effective_target_sse { } {
return [check_no_compiler_messages sse object {