[1/3] Initial support for AVX10.1
Checks
Commit Message
gcc/ChangeLog:
* common/config/i386/cpuinfo.h (get_available_features):
Add avx10_set and version and detect avx10.1.
(cpu_indicator_init): Handle avx10.1-512.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_AVX10_512BIT_SET): New.
(OPTION_MASK_ISA2_AVX10_1_SET): Ditto.
(OPTION_MASK_ISA2_AVX10_512BIT_UNSET): Ditto.
(OPTION_MASK_ISA2_AVX10_1_UNSET): Ditto.
(OPTION_MASK_ISA2_AVX2_UNSET): Modify for AVX10_1.
(ix86_handle_option): Handle -mavx10.1, -mavx10.1-256 and
-mavx10.1-512.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_AVX10_512BIT, FEATURE_AVX10_1 and
FEATURE_AVX10_512BIT.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
AVX10_512BIT, AVX10_1 and AVX10_1_512.
* config/i386/constraints.md (Yk): Add AVX10_1.
(Yv): Ditto.
(k): Ditto.
* config/i386/cpuid.h (bit_AVX10): New.
(bit_AVX10_256): Ditto.
(bit_AVX10_512): Ditto.
* config/i386/i386-c.cc (ix86_target_macros_internal):
Define AVX10_512BIT and AVX10_1.
* config/i386/i386-isa.def
(AVX10_512BIT): Add DEF_PTA(AVX10_512BIT).
(AVX10_1): Add DEF_PTA(AVX10_1).
* config/i386/i386-options.cc (isa2_opts): Add -mavx10.1.
(ix86_valid_target_attribute_inner_p): Handle avx10-512bit, avx10.1
and avx10.1-512.
(ix86_option_override_internal): Enable AVX512{F,VL,BW,DQ,CD,BF16,
FP16,VBMI,VBMI2,VNNI,IFMA,BITALG,VPOPCNTDQ} features for avx10.1-512.
(ix86_valid_target_attribute_inner_p): Handle AVX10_1.
* config/i386/i386.cc (ix86_get_ssemov): Add AVX10_1.
(ix86_conditional_register_usage): Ditto.
(ix86_hard_regno_mode_ok): Ditto.
(ix86_rtx_costs): Ditto.
* config/i386/i386.h (VALID_MASK_AVX10_MODE): New macro.
* config/i386/i386.opt: Add option -mavx10.1, -mavx10.1-256 and
-mavx10.1-512.
* doc/extend.texi: Document avx10.1, avx10.1-256 and avx10.1-512.
* doc/invoke.texi: Document -mavx10.1, -mavx10.1-256 and -mavx10.1-512.
* doc/sourcebuild.texi: Document target avx10.1, avx10.1-256
and avx10.1-512.
gcc/testsuite/ChangeLog:
* g++.target/i386/mv33.C: New test.
* gcc.target/i386/avx10_1-1.c: Ditto.
* gcc.target/i386/avx10_1-2.c: Ditto.
* gcc.target/i386/avx10_1-3.c: Ditto.
* gcc.target/i386/avx10_1-4.c: Ditto.
* gcc.target/i386/avx10_1-5.c: Ditto.
* gcc.target/i386/avx10_1-6.c: Ditto.
* gcc.target/i386/avx10_1-7.c: Ditto.
* gcc.target/i386/avx10_1-8.c: Ditto.
* gcc.target/i386/avx10_1-9.c: Ditto.
* gcc.target/i386/avx10_1-10.c: Ditto.
---
gcc/common/config/i386/cpuinfo.h | 36 +++++++++++++++
gcc/common/config/i386/i386-common.cc | 53 +++++++++++++++++++++-
gcc/common/config/i386/i386-cpuinfo.h | 3 ++
gcc/common/config/i386/i386-isas.h | 5 ++
gcc/config/i386/constraints.md | 6 +--
gcc/config/i386/cpuid.h | 6 +++
gcc/config/i386/i386-c.cc | 4 ++
gcc/config/i386/i386-isa.def | 2 +
gcc/config/i386/i386-options.cc | 26 ++++++++++-
gcc/config/i386/i386.cc | 18 ++++++--
gcc/config/i386/i386.h | 3 ++
gcc/config/i386/i386.opt | 19 ++++++++
gcc/doc/extend.texi | 13 ++++++
gcc/doc/invoke.texi | 16 +++++--
gcc/doc/sourcebuild.texi | 9 ++++
gcc/testsuite/g++.target/i386/mv33.C | 30 ++++++++++++
gcc/testsuite/gcc.target/i386/avx10_1-1.c | 22 +++++++++
gcc/testsuite/gcc.target/i386/avx10_1-10.c | 13 ++++++
gcc/testsuite/gcc.target/i386/avx10_1-2.c | 13 ++++++
gcc/testsuite/gcc.target/i386/avx10_1-3.c | 13 ++++++
gcc/testsuite/gcc.target/i386/avx10_1-4.c | 13 ++++++
gcc/testsuite/gcc.target/i386/avx10_1-5.c | 13 ++++++
gcc/testsuite/gcc.target/i386/avx10_1-6.c | 13 ++++++
gcc/testsuite/gcc.target/i386/avx10_1-7.c | 13 ++++++
gcc/testsuite/gcc.target/i386/avx10_1-8.c | 4 ++
gcc/testsuite/gcc.target/i386/avx10_1-9.c | 13 ++++++
26 files changed, 366 insertions(+), 13 deletions(-)
create mode 100644 gcc/testsuite/g++.target/i386/mv33.C
create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-10.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-4.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-5.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-6.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-7.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-8.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-9.c
Comments
On Tue, Aug 8, 2023 at 3:16 PM Haochen Jiang via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> gcc/ChangeLog:
>
> * common/config/i386/cpuinfo.h (get_available_features):
> Add avx10_set and version and detect avx10.1.
> (cpu_indicator_init): Handle avx10.1-512.
> * common/config/i386/i386-common.cc
> (OPTION_MASK_ISA2_AVX10_512BIT_SET): New.
> (OPTION_MASK_ISA2_AVX10_1_SET): Ditto.
> (OPTION_MASK_ISA2_AVX10_512BIT_UNSET): Ditto.
> (OPTION_MASK_ISA2_AVX10_1_UNSET): Ditto.
> (OPTION_MASK_ISA2_AVX2_UNSET): Modify for AVX10_1.
> (ix86_handle_option): Handle -mavx10.1, -mavx10.1-256 and
> -mavx10.1-512.
> * common/config/i386/i386-cpuinfo.h (enum processor_features):
> Add FEATURE_AVX10_512BIT, FEATURE_AVX10_1 and
> FEATURE_AVX10_512BIT.
> * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
> AVX10_512BIT, AVX10_1 and AVX10_1_512.
> * config/i386/constraints.md (Yk): Add AVX10_1.
> (Yv): Ditto.
> (k): Ditto.
> * config/i386/cpuid.h (bit_AVX10): New.
> (bit_AVX10_256): Ditto.
> (bit_AVX10_512): Ditto.
> * config/i386/i386-c.cc (ix86_target_macros_internal):
> Define AVX10_512BIT and AVX10_1.
> * config/i386/i386-isa.def
> (AVX10_512BIT): Add DEF_PTA(AVX10_512BIT).
> (AVX10_1): Add DEF_PTA(AVX10_1).
> * config/i386/i386-options.cc (isa2_opts): Add -mavx10.1.
> (ix86_valid_target_attribute_inner_p): Handle avx10-512bit, avx10.1
> and avx10.1-512.
> (ix86_option_override_internal): Enable AVX512{F,VL,BW,DQ,CD,BF16,
> FP16,VBMI,VBMI2,VNNI,IFMA,BITALG,VPOPCNTDQ} features for avx10.1-512.
> (ix86_valid_target_attribute_inner_p): Handle AVX10_1.
> * config/i386/i386.cc (ix86_get_ssemov): Add AVX10_1.
> (ix86_conditional_register_usage): Ditto.
> (ix86_hard_regno_mode_ok): Ditto.
> (ix86_rtx_costs): Ditto.
> * config/i386/i386.h (VALID_MASK_AVX10_MODE): New macro.
> * config/i386/i386.opt: Add option -mavx10.1, -mavx10.1-256 and
> -mavx10.1-512.
> * doc/extend.texi: Document avx10.1, avx10.1-256 and avx10.1-512.
> * doc/invoke.texi: Document -mavx10.1, -mavx10.1-256 and -mavx10.1-512.
> * doc/sourcebuild.texi: Document target avx10.1, avx10.1-256
> and avx10.1-512.
>
> gcc/testsuite/ChangeLog:
>
> * g++.target/i386/mv33.C: New test.
> * gcc.target/i386/avx10_1-1.c: Ditto.
> * gcc.target/i386/avx10_1-2.c: Ditto.
> * gcc.target/i386/avx10_1-3.c: Ditto.
> * gcc.target/i386/avx10_1-4.c: Ditto.
> * gcc.target/i386/avx10_1-5.c: Ditto.
> * gcc.target/i386/avx10_1-6.c: Ditto.
> * gcc.target/i386/avx10_1-7.c: Ditto.
> * gcc.target/i386/avx10_1-8.c: Ditto.
> * gcc.target/i386/avx10_1-9.c: Ditto.
> * gcc.target/i386/avx10_1-10.c: Ditto.
Ok(please wait for extra 24 hours to commit, if there's no objection)
> ---
> gcc/common/config/i386/cpuinfo.h | 36 +++++++++++++++
> gcc/common/config/i386/i386-common.cc | 53 +++++++++++++++++++++-
> gcc/common/config/i386/i386-cpuinfo.h | 3 ++
> gcc/common/config/i386/i386-isas.h | 5 ++
> gcc/config/i386/constraints.md | 6 +--
> gcc/config/i386/cpuid.h | 6 +++
> gcc/config/i386/i386-c.cc | 4 ++
> gcc/config/i386/i386-isa.def | 2 +
> gcc/config/i386/i386-options.cc | 26 ++++++++++-
> gcc/config/i386/i386.cc | 18 ++++++--
> gcc/config/i386/i386.h | 3 ++
> gcc/config/i386/i386.opt | 19 ++++++++
> gcc/doc/extend.texi | 13 ++++++
> gcc/doc/invoke.texi | 16 +++++--
> gcc/doc/sourcebuild.texi | 9 ++++
> gcc/testsuite/g++.target/i386/mv33.C | 30 ++++++++++++
> gcc/testsuite/gcc.target/i386/avx10_1-1.c | 22 +++++++++
> gcc/testsuite/gcc.target/i386/avx10_1-10.c | 13 ++++++
> gcc/testsuite/gcc.target/i386/avx10_1-2.c | 13 ++++++
> gcc/testsuite/gcc.target/i386/avx10_1-3.c | 13 ++++++
> gcc/testsuite/gcc.target/i386/avx10_1-4.c | 13 ++++++
> gcc/testsuite/gcc.target/i386/avx10_1-5.c | 13 ++++++
> gcc/testsuite/gcc.target/i386/avx10_1-6.c | 13 ++++++
> gcc/testsuite/gcc.target/i386/avx10_1-7.c | 13 ++++++
> gcc/testsuite/gcc.target/i386/avx10_1-8.c | 4 ++
> gcc/testsuite/gcc.target/i386/avx10_1-9.c | 13 ++++++
> 26 files changed, 366 insertions(+), 13 deletions(-)
> create mode 100644 gcc/testsuite/g++.target/i386/mv33.C
> create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-1.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-10.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-2.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-3.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-4.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-5.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-6.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-7.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-8.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-9.c
>
> diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
> index 30ef0d334ca..5abff83b4ca 100644
> --- a/gcc/common/config/i386/cpuinfo.h
> +++ b/gcc/common/config/i386/cpuinfo.h
> @@ -688,6 +688,9 @@ get_available_features (struct __processor_model *cpu_model,
> int amx_usable = 0;
> /* Check if KL is usable. */
> int has_kl = 0;
> + /* Record AVX10 version. */
> + int avx10_set = 0;
> + int version = 0;
> if ((ecx & bit_OSXSAVE))
> {
> /* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and
> @@ -906,6 +909,9 @@ get_available_features (struct __processor_model *cpu_model,
> {
> if (eax & bit_AVX512BF16)
> set_feature (FEATURE_AVX512BF16);
> + /* AVX10 has the same XSTATE with AVX512. */
> + if (edx & bit_AVX10)
> + avx10_set = 1;
> }
> if (amx_usable)
> {
> @@ -951,6 +957,24 @@ get_available_features (struct __processor_model *cpu_model,
> }
> }
>
> + /* Get Advanced Features at level 0x24 (eax = 0x24). */
> + if (avx10_set && max_cpuid_level >= 0x24)
> + {
> + __cpuid (0x18, eax, ebx, ecx, edx);
> + version = ebx & 0xff;
> + if (ebx & bit_AVX10_256)
> + switch (version)
> + {
> + case 1:
> + set_feature (FEATURE_AVX10_1);
> + break;
> + default:
> + gcc_unreachable ();
> + }
> + if (ebx & bit_AVX10_512)
> + set_feature (FEATURE_AVX10_512BIT);
> + }
> +
> /* Check cpuid level of extended features. */
> __cpuid (0x80000000, ext_level, ebx, ecx, edx);
>
> @@ -1155,6 +1179,18 @@ cpu_indicator_init (struct __processor_model *cpu_model,
> }
> }
>
> +#define SET_AVX10_512(A,B) \
> + if (has_cpu_feature (cpu_model, cpu_features2, FEATURE_AVX10_##A)) \
> + { \
> + CHECK___builtin_cpu_supports (B); \
> + set_cpu_feature (cpu_model, cpu_features2, FEATURE_AVX10_##A##_512); \
> + }
> +
> + if (has_cpu_feature (cpu_model, cpu_features2, FEATURE_AVX10_512BIT))
> + SET_AVX10_512 (1, "avx10.1-512");
> +
> +#undef SET_AVX10_512
> +
> gcc_assert (cpu_model->__cpu_vendor < VENDOR_MAX);
> gcc_assert (cpu_model->__cpu_type < CPU_TYPE_MAX);
> gcc_assert (cpu_model->__cpu_subtype < CPU_SUBTYPE_MAX);
> diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
> index 26005914079..6c3bebb1846 100644
> --- a/gcc/common/config/i386/i386-common.cc
> +++ b/gcc/common/config/i386/i386-common.cc
> @@ -123,6 +123,8 @@ along with GCC; see the file COPYING3. If not see
> #define OPTION_MASK_ISA2_SM3_SET OPTION_MASK_ISA2_SM3
> #define OPTION_MASK_ISA2_SHA512_SET OPTION_MASK_ISA2_SHA512
> #define OPTION_MASK_ISA2_SM4_SET OPTION_MASK_ISA2_SM4
> +#define OPTION_MASK_ISA2_AVX10_512BIT_SET OPTION_MASK_ISA2_AVX10_512BIT
> +#define OPTION_MASK_ISA2_AVX10_1_SET OPTION_MASK_ISA2_AVX10_1
>
> /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
> as -msse4.2. */
> @@ -232,7 +234,8 @@ along with GCC; see the file COPYING3. If not see
> #define OPTION_MASK_ISA2_AVX2_UNSET \
> (OPTION_MASK_ISA2_AVXIFMA_UNSET | OPTION_MASK_ISA2_AVXVNNI_UNSET \
> | OPTION_MASK_ISA2_AVXVNNIINT8_UNSET | OPTION_MASK_ISA2_AVXNECONVERT_UNSET \
> - | OPTION_MASK_ISA2_AVXVNNIINT16_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET)
> + | OPTION_MASK_ISA2_AVXVNNIINT16_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET \
> + | OPTION_MASK_ISA2_AVX10_1_UNSET)
> #define OPTION_MASK_ISA_AVX512F_UNSET \
> (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \
> | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \
> @@ -309,6 +312,8 @@ along with GCC; see the file COPYING3. If not see
> #define OPTION_MASK_ISA2_SM3_UNSET OPTION_MASK_ISA2_SM3
> #define OPTION_MASK_ISA2_SHA512_UNSET OPTION_MASK_ISA2_SHA512
> #define OPTION_MASK_ISA2_SM4_UNSET OPTION_MASK_ISA2_SM4
> +#define OPTION_MASK_ISA2_AVX10_512BIT_UNSET OPTION_MASK_ISA2_AVX10_512BIT
> +#define OPTION_MASK_ISA2_AVX10_1_UNSET OPTION_MASK_ISA2_AVX10_1
>
> /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
> as -mno-sse4.1. */
> @@ -1341,6 +1346,52 @@ ix86_handle_option (struct gcc_options *opts,
> }
> return true;
>
> + case OPT_mavx10_max_512bit:
> + if (value)
> + {
> + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_512BIT_SET;
> + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_512BIT_SET;
> + }
> + else
> + {
> + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_512BIT_UNSET;
> + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_512BIT_UNSET;
> + }
> + return true;
> +
> + case OPT_mavx10_1:
> + if (value)
> + {
> + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_1_SET;
> + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_SET;
> + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
> + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
> + }
> + else
> + {
> + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_1_UNSET;
> + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_UNSET;
> + }
> + return true;
> +
> + case OPT_mavx10_1_256:
> + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_1_SET;
> + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_SET;
> + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_512BIT_SET;
> + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_512BIT_SET;
> + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
> + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
> + return true;
> +
> + case OPT_mavx10_1_512:
> + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_1_SET;
> + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_SET;
> + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_512BIT_SET;
> + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_512BIT_SET;
> + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
> + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
> + return true;
> +
> case OPT_mfma:
> if (value)
> {
> diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
> index 9153b4d0a54..8fbfb38baed 100644
> --- a/gcc/common/config/i386/i386-cpuinfo.h
> +++ b/gcc/common/config/i386/i386-cpuinfo.h
> @@ -261,6 +261,9 @@ enum processor_features
> FEATURE_SM3,
> FEATURE_SHA512,
> FEATURE_SM4,
> + FEATURE_AVX10_512BIT,
> + FEATURE_AVX10_1,
> + FEATURE_AVX10_1_512,
> CPU_FEATURE_MAX
> };
>
> diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
> index 2297903a45e..35be0cc3f2a 100644
> --- a/gcc/common/config/i386/i386-isas.h
> +++ b/gcc/common/config/i386/i386-isas.h
> @@ -191,4 +191,9 @@ ISA_NAMES_TABLE_START
> ISA_NAMES_TABLE_ENTRY("sm3", FEATURE_SM3, P_NONE, "-msm3")
> ISA_NAMES_TABLE_ENTRY("sha512", FEATURE_SHA512, P_NONE, "-msha512")
> ISA_NAMES_TABLE_ENTRY("sm4", FEATURE_SM4, P_NONE, "-msm4")
> + ISA_NAMES_TABLE_ENTRY("avx10-max-512bit", FEATURE_AVX10_512BIT,
> + P_NONE, "-mavx10-max-512bit")
> + ISA_NAMES_TABLE_ENTRY("avx10.1", FEATURE_AVX10_1, P_NONE, "-mavx10.1")
> + ISA_NAMES_TABLE_ENTRY("avx10.1-256", FEATURE_AVX10_1, P_NONE, NULL)
> + ISA_NAMES_TABLE_ENTRY("avx10.1-512", FEATURE_AVX10_1_512, P_NONE, NULL)
> ISA_NAMES_TABLE_END
> diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
> index fd490f39110..4be6bc4816a 100644
> --- a/gcc/config/i386/constraints.md
> +++ b/gcc/config/i386/constraints.md
> @@ -78,10 +78,10 @@
> "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FP_SECOND_REG : NO_REGS"
> "Second from top of 80387 floating-point stack (@code{%st(1)}).")
>
> -(define_register_constraint "Yk" "TARGET_AVX512F ? MASK_REGS : NO_REGS"
> +(define_register_constraint "Yk" "(TARGET_AVX512F || TARGET_AVX10_1) ? MASK_REGS : NO_REGS"
> "@internal Any mask register that can be used as predicate, i.e. k1-k7.")
>
> -(define_register_constraint "k" "TARGET_AVX512F ? ALL_MASK_REGS : NO_REGS"
> +(define_register_constraint "k" "(TARGET_AVX512F || TARGET_AVX10_1) ? ALL_MASK_REGS : NO_REGS"
> "@internal Any mask register.")
>
> ;; Vector registers (also used for plain floating point nowadays).
> @@ -146,7 +146,7 @@
> "@internal Lower SSE register when avoiding REX prefix and all SSE registers otherwise.")
>
> (define_register_constraint "Yv"
> - "TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
> + "(TARGET_AVX512VL || TARGET_AVX10_1) ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
> "@internal For AVX512VL, any EVEX encodable SSE register (@code{%xmm0-%xmm31}), otherwise any SSE register.")
>
> (define_register_constraint "Yw"
> diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
> index 73c15480350..ca5551cefca 100644
> --- a/gcc/config/i386/cpuid.h
> +++ b/gcc/config/i386/cpuid.h
> @@ -149,6 +149,7 @@
> #define bit_AVXNECONVERT (1 << 5)
> #define bit_AVXVNNIINT16 (1 << 10)
> #define bit_PREFETCHI (1 << 14)
> +#define bit_AVX10 (1 << 19)
>
> /* Extended State Enumeration Sub-leaf (%eax == 0xd, %ecx == 1) */
> #define bit_XSAVEOPT (1 << 0)
> @@ -159,6 +160,11 @@
> /* %ebx */
> #define bit_PTWRITE (1 << 4)
>
> +/* AVX10 sub leaf (%eax == 0x18) */
> +/* %ebx */
> +#define bit_AVX10_256 (1 << 17)
> +#define bit_AVX10_512 (1 << 18)
> +
> /* Keylocker leaf (%eax == 0x19) */
> /* %ebx */
> #define bit_AESKLE ( 1<<0 )
> diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
> index 257950582c2..caef5531593 100644
> --- a/gcc/config/i386/i386-c.cc
> +++ b/gcc/config/i386/i386-c.cc
> @@ -692,6 +692,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
> def_or_undef (parse_in, "__SHA512__");
> if (isa_flag2 & OPTION_MASK_ISA2_SM4)
> def_or_undef (parse_in, "__SM4__");
> + if (isa_flag2 & OPTION_MASK_ISA2_AVX10_512BIT)
> + def_or_undef (parse_in, "__AVX10_512BIT__");
> + if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1)
> + def_or_undef (parse_in, "__AVX10_1__");
> if (TARGET_IAMCU)
> {
> def_or_undef (parse_in, "__iamcu");
> diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def
> index aeafcf870ac..f7d741746c3 100644
> --- a/gcc/config/i386/i386-isa.def
> +++ b/gcc/config/i386/i386-isa.def
> @@ -121,3 +121,5 @@ DEF_PTA(AVXVNNIINT16)
> DEF_PTA(SM3)
> DEF_PTA(SHA512)
> DEF_PTA(SM4)
> +DEF_PTA(AVX10_512BIT)
> +DEF_PTA(AVX10_1)
> diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
> index 127ee24203c..b2281fbd4b5 100644
> --- a/gcc/config/i386/i386-options.cc
> +++ b/gcc/config/i386/i386-options.cc
> @@ -243,7 +243,9 @@ static struct ix86_target_opts isa2_opts[] =
> { "-mavxvnniint16", OPTION_MASK_ISA2_AVXVNNIINT16 },
> { "-msm3", OPTION_MASK_ISA2_SM3 },
> { "-msha512", OPTION_MASK_ISA2_SHA512 },
> - { "-msm4", OPTION_MASK_ISA2_SM4 }
> + { "-msm4", OPTION_MASK_ISA2_SM4 },
> + { "-mavx10-max-512bit", OPTION_MASK_ISA2_AVX10_512BIT },
> + { "-mavx10.1", OPTION_MASK_ISA2_AVX10_1 }
> };
> static struct ix86_target_opts isa_opts[] =
> {
> @@ -983,7 +985,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
> ix86_opt_ix86_no,
> ix86_opt_str,
> ix86_opt_enum,
> - ix86_opt_isa
> + ix86_opt_isa,
> };
>
> static const struct
> @@ -1100,6 +1102,10 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
> IX86_ATTR_ISA ("sm3", OPT_msm3),
> IX86_ATTR_ISA ("sha512", OPT_msha512),
> IX86_ATTR_ISA ("sm4", OPT_msm4),
> + IX86_ATTR_ISA ("avx10-max-512bit", OPT_mavx10_max_512bit),
> + IX86_ATTR_ISA ("avx10.1", OPT_mavx10_1),
> + IX86_ATTR_ISA ("avx10.1-256", OPT_mavx10_1_256),
> + IX86_ATTR_ISA ("avx10.1-512", OPT_mavx10_1_512),
>
> /* enum options */
> IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
> @@ -2524,6 +2530,22 @@ ix86_option_override_internal (bool main_args_p,
> &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM)
> & ~opts->x_ix86_isa_flags_explicit);
>
> + /* Enable AVX512{F,VL,BW,DQ,CD,BF16,FP16,VBMI,VBMI2,VNNI,IFMA,BITALG,
> + VPOPCNTDQ} features for AVX10.1/512. */
> + if (TARGET_AVX10_1_P (opts->x_ix86_isa_flags2)
> + && TARGET_AVX10_512BIT_P (opts->x_ix86_isa_flags2))
> + {
> + opts->x_ix86_isa_flags
> + |= OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD
> + | OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512BW
> + | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512IFMA
> + | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI2
> + | OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VPOPCNTDQ
> + | OPTION_MASK_ISA_AVX512BITALG;
> + opts->x_ix86_isa_flags2
> + |= OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_AVX512BF16;
> + }
> +
> /* Validate -mpreferred-stack-boundary= value or default it to
> PREFERRED_STACK_BOUNDARY_DEFAULT. */
> ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index 5d57726e22c..e75614b993d 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -513,8 +513,8 @@ ix86_conditional_register_usage (void)
> if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
> accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
>
> - /* If AVX512F is disabled, disable the registers. */
> - if (! TARGET_AVX512F)
> + /* If AVX512F and AVX10 is disabled, disable the registers. */
> + if (!TARGET_AVX512F && !TARGET_AVX10_1)
> {
> for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
> CLEAR_HARD_REG_BIT (accessible_reg_set, i);
> @@ -5490,6 +5490,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
> we can only use zmm register move without memory operand. */
> if (evex_reg_p
> && !TARGET_AVX512VL
> + && !TARGET_AVX10_1
> && GET_MODE_SIZE (mode) < 64)
> {
> /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
> @@ -20259,7 +20260,8 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
>
> return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
> || (TARGET_AVX512BW
> - && VALID_MASK_AVX512BW_MODE (mode)));
> + && VALID_MASK_AVX512BW_MODE (mode))
> + || (TARGET_AVX10_1 && VALID_MASK_AVX10_MODE (mode)));
> }
>
> if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
> @@ -20294,6 +20296,13 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
> || VALID_AVX512VL_128_REG_MODE (mode)))
> return true;
>
> + /* AVX10_1 allows sse regs16+ for 256 bit modes. */
> + if (TARGET_AVX10_1
> + && (VALID_AVX256_REG_OR_OI_MODE (mode)
> + || VALID_AVX512VL_128_REG_MODE (mode)
> + || VALID_AVX512F_SCALAR_MODE (mode)))
> + return true;
> +
> /* xmm16-xmm31 are only available for AVX-512. */
> if (EXT_REX_SSE_REGNO_P (regno))
> return false;
> @@ -21584,7 +21593,8 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
> mask = XEXP (x, 2);
> /* This is masked instruction, assume the same cost,
> as nonmasked variant. */
> - if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
> + if ((TARGET_AVX512F || TARGET_AVX10_1)
> + && register_operand (mask, GET_MODE (mask)))
> *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
> else
> *total = cost->sse_op;
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index ef342fcee9b..77b50913458 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -1080,6 +1080,9 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
>
> #define VALID_MASK_AVX512BW_MODE(MODE) ((MODE) == SImode || (MODE) == DImode)
>
> +#define VALID_MASK_AVX10_MODE(MODE) ((MODE) == SImode || (MODE) == HImode \
> + || (MODE) == QImode)
> +
> #define VALID_FP_MODE_P(MODE) \
> ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \
> || (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode)
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index 1cc8563477a..0ce8e6204ff 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -1298,3 +1298,22 @@ msm4
> Target Mask(ISA2_SM4) Var(ix86_isa_flags2) Save
> Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and
> SM4 built-in functions and code generation.
> +
> +mavx10-max-512bit
> +Target Mask(ISA2_AVX10_512BIT) Var(ix86_isa_flags2) Save
> +Indicates 512 bit vector width support for AVX10.
> +
> +mavx10.1
> +Target Mask(ISA2_AVX10_1) Var(ix86_isa_flags2) Save
> +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
> +and AVX10.1 built-in functions and code generation.
> +
> +mavx10.1-256
> +Target RejectNegative
> +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
> +and AVX10.1 built-in functions and code generation.
> +
> +mavx10.1-512
> +Target RejectNegative
> +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
> +and AVX10.1-512 built-in functions and code generation.
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 89c5b4ea2b2..08e8b3b761c 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -7184,6 +7184,19 @@ Enable/disable the generation of the SHA512 instructions.
> @itemx no-sm4
> Enable/disable the generation of the SM4 instructions.
>
> +@cindex @code{target("avx10.1")} function attribute, x86
> +@item avx10.1
> +@itemx no-avx10.1
> +Enable/disable the generation of the AVX10.1 instructions.
> +
> +@cindex @code{target("avx10.1-256")} function attribute, x86
> +@item avx10.1-256
> +Enable the generation of the AVX10.1 instructions.
> +
> +@cindex @code{target("avx10.1-512")} function attribute, x86
> +@item avx10.1-512
> +Enable the generation of the AVX10.1 512 bit instructions.
> +
> @cindex @code{target("cld")} function attribute, x86
> @item cld
> @itemx no-cld
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 674f956f4b8..43b6210c3c8 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -1436,6 +1436,7 @@ See RS/6000 and PowerPC Options.
> -mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni
> -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16
> -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4
> +-mavx10.1 -mavx10.1-256 -mavx10.1-512
> -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops
> -minline-stringops-dynamically -mstringop-strategy=@var{alg}
> -mkl -mwidekl
> @@ -33670,6 +33671,15 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
> @need 200
> @opindex msm4
> @itemx -msm4
> +@need 200
> +@opindex mavx10.1
> +@itemx -mavx10.1
> +@need 200
> +@opindex mavx10.1-256
> +@itemx -mavx10.1-256
> +@need 200
> +@opindex mavx10.1-512
> +@itemx -mavx10.1-512
> These switches enable the use of instructions in the MMX, SSE,
> AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
> AES, PCLMUL, CLFLUSHOPT, CLWB, FSGSBASE, PTWRITE, RDRND, F16C, FMA, PCONFIG,
> @@ -33680,9 +33690,9 @@ GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
> ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
> UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16,
> AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AMX-FP16, PREFETCHI, RAOINT,
> -AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512, SM4 or CLDEMOTE extended instruction
> -sets. Each has a corresponding @option{-mno-} option to disable use of these
> -instructions.
> +AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512, SM4, AVX10.1 or CLDEMOTE extended
> +instruction sets. Each has a corresponding @option{-mno-} option to disable
> +use of these instructions.
>
> These extensions are also available as built-in functions: see
> @ref{x86 Built-in Functions}, for details of the functions enabled and
> diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
> index 1a78b3c1abb..cab8065cd8e 100644
> --- a/gcc/doc/sourcebuild.texi
> +++ b/gcc/doc/sourcebuild.texi
> @@ -2484,6 +2484,15 @@ Target supports compiling @code{avx} instructions.
> @item avx_runtime
> Target supports the execution of @code{avx} instructions.
>
> +@item avx10.1
> +Target supports the execution of @code{avx10.1} instructions.
> +
> +@item avx10.1-256
> +Target supports the execution of @code{avx10.1} instructions.
> +
> +@item avx10.1-512
> +Target supports the execution of @code{avx10.1-512} instructions.
> +
> @item avx2
> Target supports compiling @code{avx2} instructions.
>
> diff --git a/gcc/testsuite/g++.target/i386/mv33.C b/gcc/testsuite/g++.target/i386/mv33.C
> new file mode 100644
> index 00000000000..b50f13c5aa8
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/mv33.C
> @@ -0,0 +1,30 @@
> +// Test that dispatching can choose the right multiversion
> +// for avx10.x-512 microarchitecture levels.
> +
> +// { dg-do run }
> +// { dg-require-ifunc "" }
> +// { dg-options "-O2" }
> +
> +#include <assert.h>
> +
> +int __attribute__ ((target("default")))
> +foo ()
> +{
> + return 0;
> +}
> +
> +int __attribute__ ((target("avx10.1-512"))) foo () {
> + return 1;
> +}
> +
> +int main ()
> +{
> + int val = foo ();
> +
> + if (__builtin_cpu_supports ("avx10.1-512"))
> + assert (val == 1);
> + else
> + assert (val == 0);
> +
> + return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-1.c b/gcc/testsuite/gcc.target/i386/avx10_1-1.c
> new file mode 100644
> index 00000000000..cfd9662bb13
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx10_1-1.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */
> +
> +#include <immintrin.h>
> +
> +void
> +f1 ()
> +{
> + register __m256d a __asm ("ymm17");
> + register __m256d b __asm ("ymm16");
> + a = _mm256_add_pd (a, b);
> + asm volatile ("" : "+v" (a));
> +}
> +
> +void
> +f2 ()
> +{
> + register __m128d a __asm ("xmm17");
> + register __m128d b __asm ("xmm16");
> + a = _mm_add_pd (a, b);
> + asm volatile ("" : "+v" (a));
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-10.c b/gcc/testsuite/gcc.target/i386/avx10_1-10.c
> new file mode 100644
> index 00000000000..9a5892d8df9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx10_1-10.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64" } */
> +/* { dg-final { scan-assembler "%zmm" } } */
> +
> +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
> +
> +__attribute__ ((target ("avx10.1-512"))) __m512d
> +foo ()
> +{
> + __m512d a, b;
> + a = a + b;
> + return a;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-2.c b/gcc/testsuite/gcc.target/i386/avx10_1-2.c
> new file mode 100644
> index 00000000000..0b3991dcf74
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx10_1-2.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64 -mavx10.1-512" } */
> +/* { dg-final { scan-assembler "%zmm" } } */
> +
> +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
> +
> +__m512d
> +foo ()
> +{
> + __m512d a, b;
> + a = a + b;
> + return a;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-3.c b/gcc/testsuite/gcc.target/i386/avx10_1-3.c
> new file mode 100644
> index 00000000000..3be988a1a62
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx10_1-3.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */
> +
> +#include <immintrin.h>
> +
> +int
> +foo (int c)
> +{
> + register int a __asm ("k7") = c;
> + int b = foo (a);
> + asm volatile ("" : "+k" (b));
> + return b;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-4.c b/gcc/testsuite/gcc.target/i386/avx10_1-4.c
> new file mode 100644
> index 00000000000..68cbf197d61
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx10_1-4.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=x86-64 -mavx10.1-512" } */
> +
> +#include <immintrin.h>
> +
> +long long
> +foo (long long c)
> +{
> + register long long a __asm ("k7") = c;
> + long long b = foo (a);
> + asm volatile ("" : "+k" (b));
> + return b;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-5.c b/gcc/testsuite/gcc.target/i386/avx10_1-5.c
> new file mode 100644
> index 00000000000..5481ab2f386
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx10_1-5.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O0 -march=x86-64 -mavx10.1 -Wno-psabi" } */
> +/* { dg-final { scan-assembler-not ".%zmm" } } */
> +
> +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
> +
> +__m512d
> +foo ()
> +{
> + __m512d a, b;
> + a = a + b;
> + return a;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-6.c b/gcc/testsuite/gcc.target/i386/avx10_1-6.c
> new file mode 100644
> index 00000000000..827c80ce51e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx10_1-6.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */
> +
> +#include <immintrin.h>
> +
> +long long
> +foo (long long c)
> +{
> + register long long a __asm ("k7") = c;
> + long long b = foo (a);
> + asm volatile ("" : "+k" (b)); /* { dg-error "inconsistent operand constraints in an 'asm'" } */
> + return b;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-7.c b/gcc/testsuite/gcc.target/i386/avx10_1-7.c
> new file mode 100644
> index 00000000000..d8b8d97590b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx10_1-7.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64 -Wno-psabi" } */
> +/* { dg-final { scan-assembler-not ".%zmm" } } */
> +
> +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
> +
> +__attribute__ ((target ("avx10.1"))) __m512d
> +foo ()
> +{
> + __m512d a, b;
> + a = a + b;
> + return a;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-8.c b/gcc/testsuite/gcc.target/i386/avx10_1-8.c
> new file mode 100644
> index 00000000000..8dbd201b336
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx10_1-8.c
> @@ -0,0 +1,4 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -march=x86-64 -mavx10.1-256" } */
> +
> +#include "avx10_1-1.c"
> diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-9.c b/gcc/testsuite/gcc.target/i386/avx10_1-9.c
> new file mode 100644
> index 00000000000..00493098be7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx10_1-9.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64 -Wno-psabi" } */
> +/* { dg-final { scan-assembler-not ".%zmm" } } */
> +
> +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
> +
> +__attribute__ ((target ("avx10.1-256"))) __m512d
> +foo ()
> +{
> + __m512d a, b;
> + a = a + b;
> + return a;
> +}
> --
> 2.31.1
>
@@ -688,6 +688,9 @@ get_available_features (struct __processor_model *cpu_model,
int amx_usable = 0;
/* Check if KL is usable. */
int has_kl = 0;
+ /* Record AVX10 version. */
+ int avx10_set = 0;
+ int version = 0;
if ((ecx & bit_OSXSAVE))
{
/* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and
@@ -906,6 +909,9 @@ get_available_features (struct __processor_model *cpu_model,
{
if (eax & bit_AVX512BF16)
set_feature (FEATURE_AVX512BF16);
+ /* AVX10 has the same XSTATE with AVX512. */
+ if (edx & bit_AVX10)
+ avx10_set = 1;
}
if (amx_usable)
{
@@ -951,6 +957,24 @@ get_available_features (struct __processor_model *cpu_model,
}
}
+ /* Get Advanced Features at level 0x24 (eax = 0x24). */
+ if (avx10_set && max_cpuid_level >= 0x24)
+ {
+ __cpuid (0x18, eax, ebx, ecx, edx);
+ version = ebx & 0xff;
+ if (ebx & bit_AVX10_256)
+ switch (version)
+ {
+ case 1:
+ set_feature (FEATURE_AVX10_1);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (ebx & bit_AVX10_512)
+ set_feature (FEATURE_AVX10_512BIT);
+ }
+
/* Check cpuid level of extended features. */
__cpuid (0x80000000, ext_level, ebx, ecx, edx);
@@ -1155,6 +1179,18 @@ cpu_indicator_init (struct __processor_model *cpu_model,
}
}
+#define SET_AVX10_512(A,B) \
+ if (has_cpu_feature (cpu_model, cpu_features2, FEATURE_AVX10_##A)) \
+ { \
+ CHECK___builtin_cpu_supports (B); \
+ set_cpu_feature (cpu_model, cpu_features2, FEATURE_AVX10_##A##_512); \
+ }
+
+ if (has_cpu_feature (cpu_model, cpu_features2, FEATURE_AVX10_512BIT))
+ SET_AVX10_512 (1, "avx10.1-512");
+
+#undef SET_AVX10_512
+
gcc_assert (cpu_model->__cpu_vendor < VENDOR_MAX);
gcc_assert (cpu_model->__cpu_type < CPU_TYPE_MAX);
gcc_assert (cpu_model->__cpu_subtype < CPU_SUBTYPE_MAX);
@@ -123,6 +123,8 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_SM3_SET OPTION_MASK_ISA2_SM3
#define OPTION_MASK_ISA2_SHA512_SET OPTION_MASK_ISA2_SHA512
#define OPTION_MASK_ISA2_SM4_SET OPTION_MASK_ISA2_SM4
+#define OPTION_MASK_ISA2_AVX10_512BIT_SET OPTION_MASK_ISA2_AVX10_512BIT
+#define OPTION_MASK_ISA2_AVX10_1_SET OPTION_MASK_ISA2_AVX10_1
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
as -msse4.2. */
@@ -232,7 +234,8 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_AVX2_UNSET \
(OPTION_MASK_ISA2_AVXIFMA_UNSET | OPTION_MASK_ISA2_AVXVNNI_UNSET \
| OPTION_MASK_ISA2_AVXVNNIINT8_UNSET | OPTION_MASK_ISA2_AVXNECONVERT_UNSET \
- | OPTION_MASK_ISA2_AVXVNNIINT16_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET)
+ | OPTION_MASK_ISA2_AVXVNNIINT16_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET \
+ | OPTION_MASK_ISA2_AVX10_1_UNSET)
#define OPTION_MASK_ISA_AVX512F_UNSET \
(OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \
| OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \
@@ -309,6 +312,8 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_SM3_UNSET OPTION_MASK_ISA2_SM3
#define OPTION_MASK_ISA2_SHA512_UNSET OPTION_MASK_ISA2_SHA512
#define OPTION_MASK_ISA2_SM4_UNSET OPTION_MASK_ISA2_SM4
+#define OPTION_MASK_ISA2_AVX10_512BIT_UNSET OPTION_MASK_ISA2_AVX10_512BIT
+#define OPTION_MASK_ISA2_AVX10_1_UNSET OPTION_MASK_ISA2_AVX10_1
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
as -mno-sse4.1. */
@@ -1341,6 +1346,52 @@ ix86_handle_option (struct gcc_options *opts,
}
return true;
+ case OPT_mavx10_max_512bit:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_512BIT_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_512BIT_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_512BIT_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_512BIT_UNSET;
+ }
+ return true;
+
+ case OPT_mavx10_1:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_1_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_1_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_UNSET;
+ }
+ return true;
+
+ case OPT_mavx10_1_256:
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_1_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_SET;
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_512BIT_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_512BIT_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
+ return true;
+
+ case OPT_mavx10_1_512:
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_1_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_SET;
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_512BIT_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_512BIT_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
+ return true;
+
case OPT_mfma:
if (value)
{
@@ -261,6 +261,9 @@ enum processor_features
FEATURE_SM3,
FEATURE_SHA512,
FEATURE_SM4,
+ FEATURE_AVX10_512BIT,
+ FEATURE_AVX10_1,
+ FEATURE_AVX10_1_512,
CPU_FEATURE_MAX
};
@@ -191,4 +191,9 @@ ISA_NAMES_TABLE_START
ISA_NAMES_TABLE_ENTRY("sm3", FEATURE_SM3, P_NONE, "-msm3")
ISA_NAMES_TABLE_ENTRY("sha512", FEATURE_SHA512, P_NONE, "-msha512")
ISA_NAMES_TABLE_ENTRY("sm4", FEATURE_SM4, P_NONE, "-msm4")
+ ISA_NAMES_TABLE_ENTRY("avx10-max-512bit", FEATURE_AVX10_512BIT,
+ P_NONE, "-mavx10-max-512bit")
+ ISA_NAMES_TABLE_ENTRY("avx10.1", FEATURE_AVX10_1, P_NONE, "-mavx10.1")
+ ISA_NAMES_TABLE_ENTRY("avx10.1-256", FEATURE_AVX10_1, P_NONE, NULL)
+ ISA_NAMES_TABLE_ENTRY("avx10.1-512", FEATURE_AVX10_1_512, P_NONE, NULL)
ISA_NAMES_TABLE_END
@@ -78,10 +78,10 @@
"TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FP_SECOND_REG : NO_REGS"
"Second from top of 80387 floating-point stack (@code{%st(1)}).")
-(define_register_constraint "Yk" "TARGET_AVX512F ? MASK_REGS : NO_REGS"
+(define_register_constraint "Yk" "(TARGET_AVX512F || TARGET_AVX10_1) ? MASK_REGS : NO_REGS"
"@internal Any mask register that can be used as predicate, i.e. k1-k7.")
-(define_register_constraint "k" "TARGET_AVX512F ? ALL_MASK_REGS : NO_REGS"
+(define_register_constraint "k" "(TARGET_AVX512F || TARGET_AVX10_1) ? ALL_MASK_REGS : NO_REGS"
"@internal Any mask register.")
;; Vector registers (also used for plain floating point nowadays).
@@ -146,7 +146,7 @@
"@internal Lower SSE register when avoiding REX prefix and all SSE registers otherwise.")
(define_register_constraint "Yv"
- "TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
+ "(TARGET_AVX512VL || TARGET_AVX10_1) ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
"@internal For AVX512VL, any EVEX encodable SSE register (@code{%xmm0-%xmm31}), otherwise any SSE register.")
(define_register_constraint "Yw"
@@ -149,6 +149,7 @@
#define bit_AVXNECONVERT (1 << 5)
#define bit_AVXVNNIINT16 (1 << 10)
#define bit_PREFETCHI (1 << 14)
+#define bit_AVX10 (1 << 19)
/* Extended State Enumeration Sub-leaf (%eax == 0xd, %ecx == 1) */
#define bit_XSAVEOPT (1 << 0)
@@ -159,6 +160,11 @@
/* %ebx */
#define bit_PTWRITE (1 << 4)
+/* AVX10 sub leaf (%eax == 0x18) */
+/* %ebx */
+#define bit_AVX10_256 (1 << 17)
+#define bit_AVX10_512 (1 << 18)
+
/* Keylocker leaf (%eax == 0x19) */
/* %ebx */
#define bit_AESKLE ( 1<<0 )
@@ -692,6 +692,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__SHA512__");
if (isa_flag2 & OPTION_MASK_ISA2_SM4)
def_or_undef (parse_in, "__SM4__");
+ if (isa_flag2 & OPTION_MASK_ISA2_AVX10_512BIT)
+ def_or_undef (parse_in, "__AVX10_512BIT__");
+ if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1)
+ def_or_undef (parse_in, "__AVX10_1__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
@@ -121,3 +121,5 @@ DEF_PTA(AVXVNNIINT16)
DEF_PTA(SM3)
DEF_PTA(SHA512)
DEF_PTA(SM4)
+DEF_PTA(AVX10_512BIT)
+DEF_PTA(AVX10_1)
@@ -243,7 +243,9 @@ static struct ix86_target_opts isa2_opts[] =
{ "-mavxvnniint16", OPTION_MASK_ISA2_AVXVNNIINT16 },
{ "-msm3", OPTION_MASK_ISA2_SM3 },
{ "-msha512", OPTION_MASK_ISA2_SHA512 },
- { "-msm4", OPTION_MASK_ISA2_SM4 }
+ { "-msm4", OPTION_MASK_ISA2_SM4 },
+ { "-mavx10-max-512bit", OPTION_MASK_ISA2_AVX10_512BIT },
+ { "-mavx10.1", OPTION_MASK_ISA2_AVX10_1 }
};
static struct ix86_target_opts isa_opts[] =
{
@@ -983,7 +985,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
ix86_opt_ix86_no,
ix86_opt_str,
ix86_opt_enum,
- ix86_opt_isa
+ ix86_opt_isa,
};
static const struct
@@ -1100,6 +1102,10 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
IX86_ATTR_ISA ("sm3", OPT_msm3),
IX86_ATTR_ISA ("sha512", OPT_msha512),
IX86_ATTR_ISA ("sm4", OPT_msm4),
+ IX86_ATTR_ISA ("avx10-max-512bit", OPT_mavx10_max_512bit),
+ IX86_ATTR_ISA ("avx10.1", OPT_mavx10_1),
+ IX86_ATTR_ISA ("avx10.1-256", OPT_mavx10_1_256),
+ IX86_ATTR_ISA ("avx10.1-512", OPT_mavx10_1_512),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
@@ -2524,6 +2530,22 @@ ix86_option_override_internal (bool main_args_p,
&= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM)
& ~opts->x_ix86_isa_flags_explicit);
+ /* Enable AVX512{F,VL,BW,DQ,CD,BF16,FP16,VBMI,VBMI2,VNNI,IFMA,BITALG,
+ VPOPCNTDQ} features for AVX10.1/512. */
+ if (TARGET_AVX10_1_P (opts->x_ix86_isa_flags2)
+ && TARGET_AVX10_512BIT_P (opts->x_ix86_isa_flags2))
+ {
+ opts->x_ix86_isa_flags
+ |= OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD
+ | OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512BW
+ | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512IFMA
+ | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI2
+ | OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VPOPCNTDQ
+ | OPTION_MASK_ISA_AVX512BITALG;
+ opts->x_ix86_isa_flags2
+ |= OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_AVX512BF16;
+ }
+
/* Validate -mpreferred-stack-boundary= value or default it to
PREFERRED_STACK_BOUNDARY_DEFAULT. */
ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
@@ -513,8 +513,8 @@ ix86_conditional_register_usage (void)
if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
- /* If AVX512F is disabled, disable the registers. */
- if (! TARGET_AVX512F)
+ /* If AVX512F and AVX10 is disabled, disable the registers. */
+ if (!TARGET_AVX512F && !TARGET_AVX10_1)
{
for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
CLEAR_HARD_REG_BIT (accessible_reg_set, i);
@@ -5490,6 +5490,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
we can only use zmm register move without memory operand. */
if (evex_reg_p
&& !TARGET_AVX512VL
+ && !TARGET_AVX10_1
&& GET_MODE_SIZE (mode) < 64)
{
/* NB: Even though ix86_hard_regno_mode_ok doesn't allow
@@ -20259,7 +20260,8 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
|| (TARGET_AVX512BW
- && VALID_MASK_AVX512BW_MODE (mode)));
+ && VALID_MASK_AVX512BW_MODE (mode))
+ || (TARGET_AVX10_1 && VALID_MASK_AVX10_MODE (mode)));
}
if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
@@ -20294,6 +20296,13 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
|| VALID_AVX512VL_128_REG_MODE (mode)))
return true;
+ /* AVX10_1 allows sse regs16+ for 256 bit modes. */
+ if (TARGET_AVX10_1
+ && (VALID_AVX256_REG_OR_OI_MODE (mode)
+ || VALID_AVX512VL_128_REG_MODE (mode)
+ || VALID_AVX512F_SCALAR_MODE (mode)))
+ return true;
+
/* xmm16-xmm31 are only available for AVX-512. */
if (EXT_REX_SSE_REGNO_P (regno))
return false;
@@ -21584,7 +21593,8 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
mask = XEXP (x, 2);
/* This is masked instruction, assume the same cost,
as nonmasked variant. */
- if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
+ if ((TARGET_AVX512F || TARGET_AVX10_1)
+ && register_operand (mask, GET_MODE (mask)))
*total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
else
*total = cost->sse_op;
@@ -1080,6 +1080,9 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define VALID_MASK_AVX512BW_MODE(MODE) ((MODE) == SImode || (MODE) == DImode)
+#define VALID_MASK_AVX10_MODE(MODE) ((MODE) == SImode || (MODE) == HImode \
+ || (MODE) == QImode)
+
#define VALID_FP_MODE_P(MODE) \
((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \
|| (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode)
@@ -1298,3 +1298,22 @@ msm4
Target Mask(ISA2_SM4) Var(ix86_isa_flags2) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and
SM4 built-in functions and code generation.
+
+mavx10-max-512bit
+Target Mask(ISA2_AVX10_512BIT) Var(ix86_isa_flags2) Save
+Indicates 512 bit vector width support for AVX10.
+
+mavx10.1
+Target Mask(ISA2_AVX10_1) Var(ix86_isa_flags2) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
+and AVX10.1 built-in functions and code generation.
+
+mavx10.1-256
+Target RejectNegative
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
+and AVX10.1 built-in functions and code generation.
+
+mavx10.1-512
+Target RejectNegative
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
+and AVX10.1-512 built-in functions and code generation.
@@ -7184,6 +7184,19 @@ Enable/disable the generation of the SHA512 instructions.
@itemx no-sm4
Enable/disable the generation of the SM4 instructions.
+@cindex @code{target("avx10.1")} function attribute, x86
+@item avx10.1
+@itemx no-avx10.1
+Enable/disable the generation of the AVX10.1 instructions.
+
+@cindex @code{target("avx10.1-256")} function attribute, x86
+@item avx10.1-256
+Enable the generation of the AVX10.1 instructions.
+
+@cindex @code{target("avx10.1-512")} function attribute, x86
+@item avx10.1-512
+Enable the generation of the AVX10.1 512 bit instructions.
+
@cindex @code{target("cld")} function attribute, x86
@item cld
@itemx no-cld
@@ -1436,6 +1436,7 @@ See RS/6000 and PowerPC Options.
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni
-mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16
-mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4
+-mavx10.1 -mavx10.1-256 -mavx10.1-512
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops
-minline-stringops-dynamically -mstringop-strategy=@var{alg}
-mkl -mwidekl
@@ -33670,6 +33671,15 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@need 200
@opindex msm4
@itemx -msm4
+@need 200
+@opindex mavx10.1
+@itemx -mavx10.1
+@need 200
+@opindex mavx10.1-256
+@itemx -mavx10.1-256
+@need 200
+@opindex mavx10.1-512
+@itemx -mavx10.1-512
These switches enable the use of instructions in the MMX, SSE,
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
AES, PCLMUL, CLFLUSHOPT, CLWB, FSGSBASE, PTWRITE, RDRND, F16C, FMA, PCONFIG,
@@ -33680,9 +33690,9 @@ GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16,
AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AMX-FP16, PREFETCHI, RAOINT,
-AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512, SM4 or CLDEMOTE extended instruction
-sets. Each has a corresponding @option{-mno-} option to disable use of these
-instructions.
+AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512, SM4, AVX10.1 or CLDEMOTE extended
+instruction sets. Each has a corresponding @option{-mno-} option to disable
+use of these instructions.
These extensions are also available as built-in functions: see
@ref{x86 Built-in Functions}, for details of the functions enabled and
@@ -2484,6 +2484,15 @@ Target supports compiling @code{avx} instructions.
@item avx_runtime
Target supports the execution of @code{avx} instructions.
+@item avx10.1
+Target supports the execution of @code{avx10.1} instructions.
+
+@item avx10.1-256
+Target supports the execution of @code{avx10.1} instructions.
+
+@item avx10.1-512
+Target supports the execution of @code{avx10.1-512} instructions.
+
@item avx2
Target supports compiling @code{avx2} instructions.
new file mode 100644
@@ -0,0 +1,30 @@
+// Test that dispatching can choose the right multiversion
+// for avx10.x-512 microarchitecture levels.
+
+// { dg-do run }
+// { dg-require-ifunc "" }
+// { dg-options "-O2" }
+
+#include <assert.h>
+
+int __attribute__ ((target("default")))
+foo ()
+{
+ return 0;
+}
+
+int __attribute__ ((target("avx10.1-512"))) foo () {
+ return 1;
+}
+
+int main ()
+{
+ int val = foo ();
+
+ if (__builtin_cpu_supports ("avx10.1-512"))
+ assert (val == 1);
+ else
+ assert (val == 0);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */
+
+#include <immintrin.h>
+
+void
+f1 ()
+{
+ register __m256d a __asm ("ymm17");
+ register __m256d b __asm ("ymm16");
+ a = _mm256_add_pd (a, b);
+ asm volatile ("" : "+v" (a));
+}
+
+void
+f2 ()
+{
+ register __m128d a __asm ("xmm17");
+ register __m128d b __asm ("xmm16");
+ a = _mm_add_pd (a, b);
+ asm volatile ("" : "+v" (a));
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64" } */
+/* { dg-final { scan-assembler "%zmm" } } */
+
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+
+__attribute__ ((target ("avx10.1-512"))) __m512d
+foo ()
+{
+ __m512d a, b;
+ a = a + b;
+ return a;
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx10.1-512" } */
+/* { dg-final { scan-assembler "%zmm" } } */
+
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+
+__m512d
+foo ()
+{
+ __m512d a, b;
+ a = a + b;
+ return a;
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */
+
+#include <immintrin.h>
+
+int
+foo (int c)
+{
+ register int a __asm ("k7") = c;
+ int b = foo (a);
+ asm volatile ("" : "+k" (b));
+ return b;
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -mavx10.1-512" } */
+
+#include <immintrin.h>
+
+long long
+foo (long long c)
+{
+ register long long a __asm ("k7") = c;
+ long long b = foo (a);
+ asm volatile ("" : "+k" (b));
+ return b;
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O0 -march=x86-64 -mavx10.1 -Wno-psabi" } */
+/* { dg-final { scan-assembler-not ".%zmm" } } */
+
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+
+__m512d
+foo ()
+{
+ __m512d a, b;
+ a = a + b;
+ return a;
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */
+
+#include <immintrin.h>
+
+long long
+foo (long long c)
+{
+ register long long a __asm ("k7") = c;
+ long long b = foo (a);
+ asm volatile ("" : "+k" (b)); /* { dg-error "inconsistent operand constraints in an 'asm'" } */
+ return b;
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -Wno-psabi" } */
+/* { dg-final { scan-assembler-not ".%zmm" } } */
+
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+
+__attribute__ ((target ("avx10.1"))) __m512d
+foo ()
+{
+ __m512d a, b;
+ a = a + b;
+ return a;
+}
new file mode 100644
@@ -0,0 +1,4 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=x86-64 -mavx10.1-256" } */
+
+#include "avx10_1-1.c"
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -Wno-psabi" } */
+/* { dg-final { scan-assembler-not ".%zmm" } } */
+
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+
+__attribute__ ((target ("avx10.1-256"))) __m512d
+foo ()
+{
+ __m512d a, b;
+ a = a + b;
+ return a;
+}